| #include "caffe2/operators/reduction_ops.h" |
| |
| namespace caffe2 { |
| |
| REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>); |
| REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>); |
| REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>); |
| |
| REGISTER_CPU_OPERATOR( |
| SumElementsGradient, |
| SumElementsGradientOp<float, CPUContext>); |
| |
| REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>); |
| REGISTER_CPU_OPERATOR( |
| RowwiseMaxGradient, |
| MaxReductionGradientOp<float, CPUContext, true>); |
| REGISTER_CPU_OPERATOR( |
| ColwiseMaxGradient, |
| MaxReductionGradientOp<float, CPUContext, false>); |
| REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>); |
| |
| OPERATOR_SCHEMA(SumElements) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .ScalarType(TensorProto::FLOAT) |
| .SetDoc(R"DOC( |
| Sums the elements of the input tensor. Tensor type must be float32. |
| |
| Github Links: |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| |
| workspace.ResetWorkspace() |
| |
| sum_op = core.CreateOperator( |
| "SumElements", |
| ["X"], |
| ["Y"] |
| ) |
| |
| avg_op = core.CreateOperator( |
| "SumElements", |
| ["X"], |
| ["Y"], |
| average=True |
| ) |
| |
| workspace.FeedBlob("X", np.random.randint(10, size=(3,3)).astype(np.float32)) |
| print("X:\n", workspace.FetchBlob("X")) |
| workspace.RunOperatorOnce(sum_op) |
| print("Y (sum_op):", workspace.FetchBlob("Y")) |
| workspace.RunOperatorOnce(avg_op) |
| print("Y (avg_op):", workspace.FetchBlob("Y")) |
| |
| ``` |
| |
| **Result** |
| |
| ``` |
| |
| X: |
| [[7. 2. 5.] |
| [9. 4. 2.] |
| [1. 2. 5.]] |
| Y (sum_op): 37.0 |
| Y (avg_op): 4.111111 |
| |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Arg("average", "(*bool*): set to True to compute the average of the elements rather than the sum") |
| .Input(0, "X", "(*Tensor`<float>`*): blob pointing to an instance of a counter") |
| .Output(0, "sum", "(*Tensor`<float>`*): Scalar tensor containing the sum (or average)"); |
| |
| OPERATOR_SCHEMA(SumElementsInt) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .ScalarType(TensorProto::INT32) |
| .SetDoc("Sums the integer elements of the input tensor.") |
| .Input(0, "X", "Tensor to sum up") |
| .Output(0, "sum", "Scalar sum"); |
| SHOULD_NOT_DO_GRADIENT(SumElementsInt); |
| |
| OPERATOR_SCHEMA(SumSqrElements) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .ScalarType(TensorProto::FLOAT) |
| .SetDoc("Sums the squares elements of the input tensor.") |
| .Arg("average", "whether to average or not") |
| .Input(0, "X", "Tensor to sum up") |
| .Output(0, "sum", "Scalar sum of squares"); |
| |
| OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1); |
| |
| class GetSumElementsGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| "SumElementsGradient", |
| "", |
| vector<string>{I(0), GO(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(SumElements, GetSumElementsGradient); |
| |
| OPERATOR_SCHEMA(RowwiseMax) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| Compute row-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxM$, with one row for each element of the batch, and the same number of columns as the number of rows of the input tensor. |
| |
| Github Links: |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| |
| workspace.ResetWorkspace() |
| |
| op = core.CreateOperator( |
| "RowwiseMax", |
| ["X"], |
| ["Y"] |
| ) |
| |
| // Create X, simulating a batch of 2, 4x4 matricies |
| X = np.random.randint(0,high=20,size=(2,4,4)) |
| print("X:\n",X) |
| |
| // Feed X into workspace |
| workspace.FeedBlob("X", X.astype(np.float32)) |
| |
| // Run op |
| workspace.RunOperatorOnce(op) |
| |
| // Collect Output |
| print("Y:\n", workspace.FetchBlob("Y")) |
| |
| ``` |
| |
| **Result** |
| |
| ``` |
| |
| X: |
| [[[ 5 12 10 1] |
| [ 4 16 2 15] |
| [ 5 11 12 15] |
| [15 4 17 19]] |
| |
| [[16 5 5 13] |
| [17 2 1 17] |
| [18 3 19 5] |
| [14 16 10 16]]] |
| Y: |
| [[12. 16. 15. 19.] |
| [16. 17. 19. 16.]] |
| |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Input( |
| 0, |
| "X", |
| "A tensor of dimensions $B x M x N$ to compute rowwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.") |
| .Output( |
| 0, |
| "Y", |
| "The output tensor of shape $B x M$, where each row represents the row-wise maximums for that element of the input batch."); |
| |
| OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1); |
| class GetRowwiseMaxGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| "RowwiseMaxGradient", |
| "", |
| vector<string>{I(0), O(0), GO(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient); |
| |
| OPERATOR_SCHEMA(ColwiseMaxGradient); |
| |
| OPERATOR_SCHEMA(ColwiseMax) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| Compute column-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxN$, with one row for each element of the batch, and the same number of columns as the input tensor. |
| |
| Github Links: |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| workspace.ResetWorkspace() |
| |
| op = core.CreateOperator( |
| "ColwiseMax", |
| ["X"], |
| ["Y"] |
| ) |
| |
| // Create X, simulating a batch of 2, 4x4 matricies |
| X = np.random.randint(0,high=20,size=(2,4,4)) |
| print("X:\n",X) |
| |
| // Feed X into workspace |
| workspace.FeedBlob("X", X.astype(np.float32)) |
| |
| // Run op |
| workspace.RunOperatorOnce(op) |
| |
| // Collect Output |
| print("Y:\n", workspace.FetchBlob("Y")) |
| |
| ``` |
| |
| **Result** |
| |
| ``` |
| |
| X: |
| [[[17 15 2 6] |
| [ 8 12 6 0] |
| [ 6 9 7 3] |
| [ 4 13 16 13]] |
| |
| [[ 0 3 4 12] |
| [18 1 17 12] |
| [ 7 17 13 14] |
| [12 17 2 1]]] |
| Y: |
| [[17. 15. 16. 13.] |
| [18. 17. 17. 14.]] |
| |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .TensorInferenceFunction([](const OperatorDef& /*unused*/, |
| const std::vector<TensorShape>& in) { |
| vector<int64_t> output_dims = {in[0].dims()[0], in[0].dims()[2]}; |
| return vector<TensorShape>{ |
| CreateTensorShape(vector<int64_t>{output_dims}, in[0].data_type())}; |
| }) |
| .Input( |
| 0, |
| "X", |
| "A tensor of dimensions $B x M x N$ to compute columnwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.") |
| .Output( |
| 0, |
| "Y", |
| "The output tensor of shape $B x N$, where each row represents the column-wise maximums for that element of the input batch."); |
| |
| OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1); |
| class GetColwiseMaxGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| "ColwiseMaxGradient", |
| "", |
| vector<string>{I(0), O(0), GO(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient); |
| |
| template <typename T, class Context> |
| bool SumElementsGradientOp<T, Context>::RunOnDevice() |
| // TODO: T21635077 fix float-divide-by-zero undefined behavior |
| #if defined(__has_feature) |
| #if __has_feature(__address_sanitizer__) |
| __attribute__((__no_sanitize__("float-divide-by-zero"))) |
| #endif |
| #endif |
| { |
| auto& X = Input(0); |
| Tensor sum_grad(Input(1), CPU); |
| |
| auto* dX = Output(0, X.sizes(), at::dtype<T>()); |
| TORCH_DCHECK_EQ(sum_grad.numel(), 1); |
| math::Set<T, Context>( |
| dX->numel(), |
| static_cast<T>( |
| sum_grad.template data<T>()[0] * (average_ ? 1.0 / X.numel() : 1)), |
| dX->template mutable_data<T>(), |
| &context_); |
| return true; |
| } |
| |
| template <typename T, class Context, bool ROWWISE> |
| bool MaxReductionGradientOp<T, Context, ROWWISE>::RunOnDevice() { |
| auto& X = Input(0); |
| auto& Y = Input(1); |
| auto& dY = Input(2); |
| |
| auto* dX = Output(0, X.sizes(), at::dtype<T>()); |
| |
| CAFFE_ENFORCE_EQ(X.dim(), 3); |
| |
| const int batch_size = X.dim32(0); |
| const int M = X.dim32(1); |
| const int N = X.dim32(2); |
| |
| const T* Xdata = X.template data<T>(); |
| const T* Ydata = Y.template data<T>(); |
| const T* dYdata = dY.template data<T>(); |
| T* dXdata = dX->template mutable_data<T>(); |
| |
| const int input_size = M * N; |
| for (int i = 0; i < batch_size; ++i) { |
| const T* Xdata_i = Xdata + i * input_size; |
| T* dXdata_i = dXdata + i * input_size; |
| if (ROWWISE) { |
| const T* Ydata_i = Ydata + i * M; |
| const T* dYdata_i = dYdata + i * M; |
| for (int m = 0; m < M; ++m) { |
| const T* Xdata_m = Xdata_i + m * N; |
| T* dXdata_m = dXdata_i + m * N; |
| for (int n = 0; n < N; ++n) { |
| if (Xdata_m[n] == Ydata_i[m]) { |
| dXdata_m[n] = dYdata_i[m]; |
| } else { |
| dXdata_m[n] = static_cast<T>(0); |
| } |
| } |
| } |
| } else { |
| const T* Ydata_i = Ydata + i * N; |
| const T* dYdata_i = dYdata + i * N; |
| for (int n = 0; n < N; ++n) { |
| for (int m = 0; m < M; ++m) { |
| const T* Xdata_m = Xdata_i + m * N; |
| T* dXdata_m = dXdata_i + m * N; |
| if (Xdata_m[n] == Ydata_i[n]) { |
| dXdata_m[n] = dYdata_i[n]; |
| } else { |
| dXdata_m[n] = static_cast<T>(0); |
| } |
| } |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| } // namespace caffe2 |