| #include <algorithm> |
| #include <sstream> |
| #include <unordered_map> |
| #include <vector> |
| #include "caffe2/core/blob_serialization.h" |
| #include "caffe2/core/operator.h" |
| #include "caffe2/core/tensor.h" |
| #include "caffe2/utils/eigen_utils.h" |
| #include "caffe2/utils/math.h" |
| |
| namespace caffe2 { |
| namespace { |
| |
| void RowwiseMaxAndArg( |
| const float* mat, |
| int32_t N, |
| int32_t D, |
| float* rowMax, |
| int32_t* argMax) { |
| auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N); |
| for (auto i = 0; i < D; i++) { |
| // eigenMat.row(i) is equivalent to column i in mat |
| rowMax[i] = eigenMat.row(i).maxCoeff(argMax + i); |
| } |
| } |
| void ColwiseMaxAndArg( |
| const float* mat, |
| int32_t N, |
| int32_t D, |
| float* colMax, |
| int32_t* argMax) { |
| auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N); |
| for (auto i = 0; i < N; i++) { |
| // eigenMat.col(i) is equivalent to row i in mat |
| colMax[i] = eigenMat.col(i).maxCoeff(argMax + i); |
| } |
| } |
| |
| class ViterbiPathOp : public Operator<CPUContext> { |
| public: |
| template <class... Args> |
| explicit ViterbiPathOp(Args&&... args) |
| : Operator(std::forward<Args>(args)...) {} |
| |
| void GatherRow( |
| const TensorCPU& data, |
| int32_t rowIndex, |
| int32_t block_size, |
| int32_t block_bytesize, |
| TensorCPU* outRow) { |
| CAFFE_ENFORCE( |
| 0 <= rowIndex && rowIndex < data.size(0), |
| "rowIndex is out of DATA bounds"); |
| auto out = static_cast<char*>(outRow->raw_mutable_data(data.dtype())); |
| auto src_base = static_cast<const char*>(data.raw_data()); |
| auto src = src_base + rowIndex * block_bytesize; |
| context_.CopyItemsSameDevice(data.dtype(), block_size, src, out); |
| } |
| |
| void |
| AddColToMat(const TensorCPU& mat, const TensorCPU& col, TensorCPU* result) { |
| float* resultData = result->template mutable_data<float>(); |
| const float* colData = col.template data<float>(); |
| // Initialize the columns of the result to be = the input col |
| for (auto i = 0; i < result->dim32(1); i++) { |
| for (auto j = 0; j < result->dim32(0); j++) { |
| resultData[i * result->dim32(0) + j] = colData[i]; |
| } |
| } |
| // Element-wise add of the result and the input matrix |
| math::Add<float, CPUContext>( |
| mat.numel(), |
| resultData, |
| mat.template data<float>(), |
| resultData, |
| &context_); |
| } |
| |
| bool RunOnDevice() override { |
| auto& predictions = Input(0); |
| auto& transitions = Input(1); |
| |
| CAFFE_ENFORCE( |
| predictions.dim() == 2 && transitions.dim() == 2, |
| "Predictions and transitions hould 2D matrices"); |
| |
| CAFFE_ENFORCE( |
| predictions.size(1) == transitions.size(0), |
| "Predictions and transitions dimensions not matching"); |
| |
| auto seqLen = predictions.dim32(0); |
| |
| auto* viterbiPath = Output(0, {seqLen}, at::dtype<int32_t>()); |
| auto block_size = predictions.numel() / predictions.size(0); |
| auto block_bytesize = |
| predictions.size_from_dim(1) * predictions.dtype().itemsize(); |
| Tensor backpointers(CPU); |
| backpointers.ResizeLike(predictions); |
| |
| Tensor trellis(std::vector<int64_t>{block_size}, CPU); |
| Tensor dpMat(CPU); |
| dpMat.ResizeLike(transitions); |
| Tensor dpMax(std::vector<int64_t>{block_size}, CPU); |
| GatherRow(predictions, 0, block_size, block_bytesize, &trellis); |
| for (auto i = 1; i < seqLen; i++) { |
| AddColToMat(transitions, trellis, &dpMat); |
| RowwiseMaxAndArg( |
| dpMat.template data<float>(), |
| dpMat.size(0), |
| dpMat.size(1), |
| dpMax.template mutable_data<float>(), |
| backpointers.template mutable_data<int32_t>() + (i * block_size)); |
| |
| GatherRow(predictions, i, block_size, block_bytesize, &trellis); |
| math::Add<float, CPUContext>( |
| trellis.numel(), |
| trellis.template data<float>(), |
| dpMax.template data<float>(), |
| trellis.template mutable_data<float>(), |
| &context_); |
| } |
| |
| Tensor tMax(std::vector<int64_t>{1}, CPU); |
| Tensor tArgMax(std::vector<int64_t>{1}, CPU); |
| ColwiseMaxAndArg( |
| trellis.template data<float>(), |
| 1, |
| trellis.numel(), |
| tMax.template mutable_data<float>(), |
| tArgMax.template mutable_data<int32_t>()); |
| |
| std::vector<int32_t> viterbiVec; |
| viterbiVec.push_back(tArgMax.template data<int32_t>()[0]); |
| Tensor bpEntry(std::vector<int64_t>{block_size}, CPU); |
| block_bytesize = |
| backpointers.size_from_dim(1) * backpointers.dtype().itemsize(); |
| for (auto i = seqLen - 1; i > 0; i--) { |
| GatherRow(backpointers, i, block_size, block_bytesize, &bpEntry); |
| viterbiVec.push_back(bpEntry.template data<int32_t>()[viterbiVec.back()]); |
| } |
| std::reverse_copy( |
| viterbiVec.begin(), |
| viterbiVec.end(), |
| viterbiPath->template mutable_data<int32_t>()); |
| return true; |
| } |
| }; |
| class SwapBestPathOp : public Operator<CPUContext> { |
| public: |
| template <class... Args> |
| explicit SwapBestPathOp(Args&&... args) |
| : Operator(std::forward<Args>(args)...) {} |
| bool RunOnDevice() override { |
| auto& data = Input(0); |
| auto& newBestIdicies = Input(1); |
| |
| CAFFE_ENFORCE( |
| data.dim() == 2 && newBestIdicies.dim() == 1, |
| "predictions should be a 2D matrix and bestPath should be 1D vector"); |
| |
| CAFFE_ENFORCE( |
| data.size(0) == newBestIdicies.size(0), |
| "predictions and bestPath dimensions not matching"); |
| |
| auto* updatedData = Output(0, data.sizes(), at::dtype<float>()); |
| float* outData = updatedData->template mutable_data<float>(); |
| context_.CopyItemsSameDevice( |
| data.dtype(), data.numel(), data.template data<float>(), outData); |
| |
| Tensor bestScores(CPU); |
| bestScores.ResizeLike(newBestIdicies); |
| Tensor oldBestIndices(CPU); |
| oldBestIndices.ResizeLike(newBestIdicies); |
| |
| ColwiseMaxAndArg( |
| data.template data<float>(), |
| data.size(0), |
| data.size(1), |
| bestScores.template mutable_data<float>(), |
| oldBestIndices.template mutable_data<int32_t>()); |
| |
| auto block_size = data.numel() / data.size(0); |
| |
| const int32_t* oldBestIdx = oldBestIndices.template data<int32_t>(); |
| const int32_t* newIdx = newBestIdicies.template data<int32_t>(); |
| |
| for (auto i = 0; i < data.dim32(0); i++) { |
| std::swap( |
| outData[i * block_size + newIdx[i]], |
| outData[i * block_size + oldBestIdx[i]]); |
| } |
| return true; |
| } |
| }; |
| REGISTER_CPU_OPERATOR(ViterbiPath, ViterbiPathOp); |
| OPERATOR_SCHEMA(ViterbiPath) |
| .NumInputs(2) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| Given a predictions matrix and a transitions matrix, get the path with the best |
| score |
| )DOC") |
| .Input(0, "predictions", "N*D predictions matrix") |
| .Input(1, "transitions", "D*D transitions matrix") |
| .Output(0, "viterbi_path", "N*1 vector holds the best path indices"); |
| NO_GRADIENT(ViterbiPath); |
| REGISTER_CPU_OPERATOR(SwapBestPath, SwapBestPathOp); |
| OPERATOR_SCHEMA(SwapBestPath) |
| .NumInputs(2) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| Given a sequence of indices and a matrix, enforce that these indices have the |
| best columnwise scores |
| score |
| )DOC") |
| .Input(0, "predictions", "N*D predictions matrix") |
| .Input(1, "bestPath", "N*1 vector holds the best path indices ") |
| .Output(0, "new_predictions", "N*D updated predictions matrix"); |
| NO_GRADIENT(SwapBestPath); |
| } // namespace |
| } // namespace caffe2 |