caffe2/operators/reduce_ops.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/reduce_ops.h"

 #include <c10/util/accumulate.h>
 #include "caffe2/utils/math.h"

 #include <algorithm>
 #include <functional>
 #include <vector>

 namespace caffe2 {

 namespace {

 template <typename T>
 void ComputeReduceMinMaxGradient(
     const std::vector<int>& dY_dims,
     const std::vector<int>& dX_dims,
     const T* dY_data,
     const T* X_data,
     const T* Y_data,
     T* dX_data) {
   const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
   const int ndim = dX_dims.size();
   std::vector<int> index(ndim, 0);
   for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
     const int dY_index =
         math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
     dX_data[dX_index] =
         Y_data[dY_index] == X_data[dX_index] ? dY_data[dY_index] : T(0);
     math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
   }
 }

 std::vector<TensorShape> ReduceShapeInference(
     const OperatorDef& def,
     const std::vector<TensorShape>& in) {
   if (in.size() != 1) {
     return std::vector<TensorShape>{
         CreateTensorShape({}, TensorProto_DataType_UNDEFINED)};
   }

   const auto& dims = in.front().dims();
   ArgumentHelper helper(def);
   std::vector<TensorShape> out;
   out.emplace_back();
   auto& ts = out.back();
   auto axis = helper.GetRepeatedArgument<int32_t>("axes");
   std::sort(axis.begin(), axis.end());
   auto keepdims = helper.GetSingleArgument<bool>("keepdims", true);
   size_t cursor = 0;
   int32_t id = 0;
   for (const auto d : dims) {
     if (cursor < axis.size() && id == axis[cursor]) {
       if (keepdims) {
         ts.add_dims(d == 0 ? 0 : 1);
       }
       ++cursor;
     } else {
       ts.add_dims(d);
     }
     ++id;
   }
   if (ts.dims_size() == 0 && dims.size() != 0) {
     ts.add_dims(1);
   }
   if (cursor != axis.size()) {
     ts.set_unknown_shape(true);
   }
   ts.set_data_type(in.front().data_type());
   return out;
 }

 } // namespace

 template <>
 template <typename T>
 bool MinReducer<CPUContext>::Backward(
     const std::vector<int>& dY_dims,
     const std::vector<int>& dX_dims,
     const T* dY_data,
     const T* X_data,
     const T* Y_data,
     T* dX_data,
     CPUContext* /* context */) const {
   ComputeReduceMinMaxGradient(
       dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
   return true;
 }

 template <>
 template <typename T>
 bool MaxReducer<CPUContext>::Backward(
     const std::vector<int>& dY_dims,
     const std::vector<int>& dX_dims,
     const T* dY_data,
     const T* X_data,
     const T* Y_data,
     T* dX_data,
     CPUContext* /* context */) const {
   ComputeReduceMinMaxGradient(
       dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
   return true;
 }

 REGISTER_CPU_OPERATOR(
     ReduceMin,
     ReduceOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         MinReducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceMinGradient,
     ReduceGradientOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         MinReducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceMin)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
   Computes the min of the input tensor's element along the provided axes.
   The resulted tensor has the same rank as the input if keepdims equal True.
   If keepdims equal false, then the resulted tensor have the reduced dimension
   pruned.
 )DOC")
     .Arg("axes", "A list of integers, along which to reduce.")
     .Arg(
         "keepdims",
         "Keep the reduced dimension(s) or not, default True keeps the reduced "
         "dimension(s).")
     .Input(0, "data", "An input tensor.")
     .Output(0, "reduced", "Reduced output tensor.");

 OPERATOR_SCHEMA(ReduceMinGradient).NumInputs(3).NumOutputs(1);

 REGISTER_CPU_OPERATOR(
     ReduceMax,
     ReduceOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         MaxReducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceMaxGradient,
     ReduceGradientOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         MaxReducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceMax)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
   Computes the max of the input tensor's element along the provided axes.
   The resulted tensor has the same rank as the input if keepdims equal True.
   If keepdims equal false, then the resulted tensor have the reduced dimension
   pruned.
 )DOC")
     .Arg("axes", "A list of integers, along which to reduce.")
     .Arg(
         "keepdims",
         "Keep the reduced dimension(s) or not, default True keeps the reduced "
         "dimension(s).")
     .Input(0, "data", "An input tensor.")
     .Output(0, "reduced", "Reduced output tensor.");

 OPERATOR_SCHEMA(ReduceMaxGradient).NumInputs(3).NumOutputs(1);

 REGISTER_CPU_OPERATOR(
     ReduceSum,
     ReduceOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         SumReducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceSumGradient,
     ReduceGradientOp<
         TensorTypes<std::int32_t, std::int64_t, float, double>,
         CPUContext,
         SumReducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceSum)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Computes the **sum** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "ReduceSum",
     ["X"],
     ["Y"],
     axes=(0,1),
     keepdims=0
 )

 workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
 print("X:", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(op)
 print("Y:", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
 [[[[5. 3. 7. 9. 5.]
    [4. 5. 1. 8. 3.]
    [1. 0. 9. 7. 6.]
    [7. 5. 0. 3. 1.]
    [6. 4. 4. 8. 3.]]

   [[8. 9. 6. 7. 7.]
    [5. 5. 4. 7. 0.]
    [9. 7. 6. 6. 7.]
    [7. 5. 2. 4. 2.]
    [4. 5. 1. 9. 4.]]]]
 Y:
 [[13. 12. 13. 16. 12.]
  [ 9. 10.  5. 15.  3.]
  [10.  7. 15. 13. 13.]
  [14. 10.  2.  7.  3.]
  [10.  9.  5. 17.  7.]]

 ```

 </details>

 )DOC")
     .TensorInferenceFunction(ReduceShapeInference)
     .Arg("axes", "(*Tuple(int)*): list of axes to reduce")
     .Arg(
         "keepdims",
         "(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
     .Input(0, "X", "(*Tensor`<float>`*): input tensor")
     .Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");

 OPERATOR_SCHEMA(ReduceSumGradient).NumInputs(3).NumOutputs(1);

 REGISTER_CPU_OPERATOR(
     ReduceMean,
     ReduceOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceMeanGradient,
     ReduceGradientOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceMean)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Computes the **mean** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "ReduceMean",
     ["X"],
     ["Y"],
     axes=(0,1),
     keepdims=0
 )

 workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
 print("X:", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(op)
 print("Y:", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
 [[[[9. 0. 3. 6. 0.]
    [3. 4. 5. 0. 9.]
    [6. 9. 1. 1. 5.]
    [6. 2. 3. 7. 7.]
    [3. 1. 1. 0. 1.]]

   [[4. 3. 9. 8. 1.]
    [8. 2. 0. 4. 0.]
    [8. 9. 9. 0. 2.]
    [7. 2. 5. 8. 9.]
    [5. 9. 1. 9. 0.]]]]
 Y:
 [[6.5 1.5 6.  7.  0.5]
  [5.5 3.  2.5 2.  4.5]
  [7.  9.  5.  0.5 3.5]
  [6.5 2.  4.  7.5 8. ]
  [4.  5.  1.  4.5 0.5]]

 ```

 </details>


 )DOC")
     .TensorInferenceFunction(ReduceShapeInference)
     .Arg("axes", "(*Tuple(int)*): list of axes to reduce")
     .Arg(
         "keepdims",
         "(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
     .Input(0, "X", "(*Tensor`<float>`*): input tensor")
     .Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");

 OPERATOR_SCHEMA(ReduceMeanGradient).NumInputs(3).NumOutputs(1);

 template <>
 template <typename T>
 bool L1Reducer<CPUContext>::Backward(
     const std::vector<int>& dY_dims,
     const std::vector<int>& dX_dims,
     const T* dY_data,
     const T* X_data,
     const T* /* Y_data */,
     T* dX_data,
     CPUContext* /* context */) const {
   const float kEps = 1e-12f;
   const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
   const int ndim = dX_dims.size();
   std::vector<int> index(ndim, 0);
   for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
     const int dY_index =
         math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
     float temp = X_data[dX_index];
     if (temp < -kEps) {
       dX_data[dX_index] = -dY_data[dY_index];
     } else if (temp > kEps) {
       dX_data[dX_index] = dY_data[dY_index];
     } else {
       dX_data[dX_index] = T(0);
     }
     math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
   }
   return true;
 }

 template <>
 template <typename T>
 bool L2Reducer<CPUContext>::Backward(
     const std::vector<int>& dY_dims,
     const std::vector<int>& dX_dims,
     const T* dY_data,
     const T* X_data,
     const T* Y_data,
     T* dX_data,
     CPUContext* /* context */) const {
   const float kEps = 1e-12f;
   const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
   const int ndim = dX_dims.size();
   std::vector<int> index(ndim, 0);
   for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
     const int dY_index =
         math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
     T norm = Y_data[dY_index];
     if (norm < kEps) {
       dX_data[dX_index] = dY_data[dY_index];
     } else {
       dX_data[dX_index] = dY_data[dY_index] * X_data[dX_index] / norm;
     }
     math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
   }
   return true;
 }

 REGISTER_CPU_OPERATOR(
     ReduceL1,
     ReduceOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceL1Gradient,
     ReduceGradientOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceL1)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Computes the **L1 norm** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "ReduceL1",
     ["X"],
     ["Y"],
     axes=(0,1),
     keepdims=0
 )

 workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
 print("X:", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(op)
 print("Y:", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
 [[[[ 2.  7.  6.  4.  5.]
    [ 2.  1.  9.  8.  7.]
    [ 4.  9.  1.  0.  0.]
    [ 6.  4.  0.  8.  1.]
    [ 1.  7.  1.  0.  2.]]

   [[ 5.  8.  1.  7.  7.]
    [ 4.  5.  6.  5.  4.]
    [ 1.  9.  6.  6.  3.]
    [ 6.  6.  8.  8.  4.]
    [ 2.  3.  5.  8.  1.]]]]

 Y:
 [[  7.  15.   7.  11.  12.]
  [  6.   6.  15.  13.  11.]
  [  5.  18.   7.   6.   3.]
  [ 12.  10.   8.  16.   5.]
  [  3.  10.   6.   8.   3.]]

 ```

 </details>


 )DOC")
     .Arg("axes", "(*Tuple(int)*): list of axes to reduce")
     .Arg(
         "keepdims",
         "(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
     .Input(0, "X", "(*Tensor`<float>`*): input tensor")
     .Output(0, "Y", "(*Tensor`<float>`*): reduced tensor");

 OPERATOR_SCHEMA(ReduceL1Gradient).NumInputs(3).NumOutputs(1);

 REGISTER_CPU_OPERATOR(
     ReduceL2,
     ReduceOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);
 REGISTER_CPU_OPERATOR(
     ReduceL2Gradient,
     ReduceGradientOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);

 OPERATOR_SCHEMA(ReduceL2)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Computes the **L2 norm** of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "ReduceL2",
     ["X"],
     ["Y"],
     axes=(0,1),
     keepdims=0
 )

 workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
 print("X:", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(op)
 print("Y:", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
 [[[[ 8.  0.  2.  5.  1.]
    [ 1.  3.  0.  4.  0.]
    [ 1.  3.  6.  7.  7.]
    [ 6.  9.  8.  4.  6.]
    [ 6.  1.  5.  7.  3.]]

   [[ 2.  4.  6.  2.  8.]
    [ 1.  1.  8.  0.  8.]
    [ 5.  9.  0.  3.  2.]
    [ 1.  7.  3.  7.  3.]
    [ 6.  8.  9.  8.  7.]]]]

 Y:
 [[  8.24621105   4.           6.3245554    5.38516474   8.06225777]
  [  1.41421354   3.1622777    8.           4.           8.        ]
  [  5.09901953   9.48683262   6.           7.6157732    7.28010988]
  [  6.08276272  11.40175438   8.54400349   8.06225777   6.70820379]
  [  8.48528099   8.06225777  10.29563046  10.63014603   7.6157732 ]]

 ```

 </details>


 )DOC")
     .Arg("axes", "(*Tuple(int)*): list of axes to reduce")
     .Arg(
         "keepdims",
         "(*int*): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
     .Input(0, "X", "(*Tensor`<float>`*): input tensor")
     .Output(0, "Y", "(*Tensor`<float>`*): reduced tensor")
     .InheritOnnxSchema("ReduceMean");

 OPERATOR_SCHEMA(ReduceL2Gradient).NumInputs(3).NumOutputs(1);

 namespace {

 class GetReduceGradient final : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;

   std::vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         def_.type() + "Gradient",
         "",
         std::vector<string>{GO(0), I(0), O(0)},
         std::vector<string>{GI(0)});
   }
 };

 } // namespace

 REGISTER_GRADIENT(ReduceMin, GetReduceGradient);
 REGISTER_GRADIENT(ReduceMax, GetReduceGradient);
 REGISTER_GRADIENT(ReduceSum, GetReduceGradient);
 REGISTER_GRADIENT(ReduceMean, GetReduceGradient);
 REGISTER_GRADIENT(ReduceL1, GetReduceGradient);
 REGISTER_GRADIENT(ReduceL2, GetReduceGradient);

 } // namespace caffe2
	#include "caffe2/operators/reduce_ops.h"

	#include <c10/util/accumulate.h>
	#include "caffe2/utils/math.h"

	#include <algorithm>
	#include <functional>
	#include <vector>

	namespace caffe2 {

	namespace {

	template <typename T>
	void ComputeReduceMinMaxGradient(
	const std::vector<int>& dY_dims,
	const std::vector<int>& dX_dims,
	const T* dY_data,
	const T* X_data,
	const T* Y_data,
	T* dX_data) {
	const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
	const int ndim = dX_dims.size();
	std::vector<int> index(ndim, 0);
	for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
	const int dY_index =
	math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
	dX_data[dX_index] =
	Y_data[dY_index] == X_data[dX_index] ? dY_data[dY_index] : T(0);
	math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
	}
	}

	std::vector<TensorShape> ReduceShapeInference(
	const OperatorDef& def,
	const std::vector<TensorShape>& in) {
	if (in.size() != 1) {
	return std::vector<TensorShape>{
	CreateTensorShape({}, TensorProto_DataType_UNDEFINED)};
	}

	const auto& dims = in.front().dims();
	ArgumentHelper helper(def);
	std::vector<TensorShape> out;
	out.emplace_back();
	auto& ts = out.back();
	auto axis = helper.GetRepeatedArgument<int32_t>("axes");
	std::sort(axis.begin(), axis.end());
	auto keepdims = helper.GetSingleArgument<bool>("keepdims", true);
	size_t cursor = 0;
	int32_t id = 0;
	for (const auto d : dims) {
	if (cursor < axis.size() && id == axis[cursor]) {
	if (keepdims) {
	ts.add_dims(d == 0 ? 0 : 1);
	}
	++cursor;
	} else {
	ts.add_dims(d);
	}
	++id;
	}
	if (ts.dims_size() == 0 && dims.size() != 0) {
	ts.add_dims(1);
	}
	if (cursor != axis.size()) {
	ts.set_unknown_shape(true);
	}
	ts.set_data_type(in.front().data_type());
	return out;
	}

	} // namespace

	template <>
	template <typename T>
	bool MinReducer<CPUContext>::Backward(
	const std::vector<int>& dY_dims,
	const std::vector<int>& dX_dims,
	const T* dY_data,
	const T* X_data,
	const T* Y_data,
	T* dX_data,
	CPUContext* /* context */) const {
	ComputeReduceMinMaxGradient(
	dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
	return true;
	}

	template <>
	template <typename T>
	bool MaxReducer<CPUContext>::Backward(
	const std::vector<int>& dY_dims,
	const std::vector<int>& dX_dims,
	const T* dY_data,
	const T* X_data,
	const T* Y_data,
	T* dX_data,
	CPUContext* /* context */) const {
	ComputeReduceMinMaxGradient(
	dY_dims, dX_dims, dY_data, X_data, Y_data, dX_data);
	return true;
	}

	REGISTER_CPU_OPERATOR(
	ReduceMin,
	ReduceOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	MinReducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceMinGradient,
	ReduceGradientOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	MinReducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceMin)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the min of the input tensor's element along the provided axes.
	The resulted tensor has the same rank as the input if keepdims equal True.
	If keepdims equal false, then the resulted tensor have the reduced dimension
	pruned.
	)DOC")
	.Arg("axes", "A list of integers, along which to reduce.")
	.Arg(
	"keepdims",
	"Keep the reduced dimension(s) or not, default True keeps the reduced "
	"dimension(s).")
	.Input(0, "data", "An input tensor.")
	.Output(0, "reduced", "Reduced output tensor.");

	OPERATOR_SCHEMA(ReduceMinGradient).NumInputs(3).NumOutputs(1);

	REGISTER_CPU_OPERATOR(
	ReduceMax,
	ReduceOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	MaxReducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceMaxGradient,
	ReduceGradientOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	MaxReducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceMax)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the max of the input tensor's element along the provided axes.
	The resulted tensor has the same rank as the input if keepdims equal True.
	If keepdims equal false, then the resulted tensor have the reduced dimension
	pruned.
	)DOC")
	.Arg("axes", "A list of integers, along which to reduce.")
	.Arg(
	"keepdims",
	"Keep the reduced dimension(s) or not, default True keeps the reduced "
	"dimension(s).")
	.Input(0, "data", "An input tensor.")
	.Output(0, "reduced", "Reduced output tensor.");

	OPERATOR_SCHEMA(ReduceMaxGradient).NumInputs(3).NumOutputs(1);

	REGISTER_CPU_OPERATOR(
	ReduceSum,
	ReduceOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	SumReducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceSumGradient,
	ReduceGradientOp<
	TensorTypes<std::int32_t, std::int64_t, float, double>,
	CPUContext,
	SumReducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceSum)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the sum of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

	Github Links:
	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

	<details>

	<summary> <b>Example</b> </summary>

	Code

	```

	workspace.ResetWorkspace()

	op = core.CreateOperator(
	"ReduceSum",
	["X"],
	["Y"],
	axes=(0,1),
	keepdims=0
	)

	workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
	print("X:", workspace.FetchBlob("X"))
	workspace.RunOperatorOnce(op)
	print("Y:", workspace.FetchBlob("Y"))

	```

	Result

	```

	X:
	[[[[5. 3. 7. 9. 5.]
	[4. 5. 1. 8. 3.]
	[1. 0. 9. 7. 6.]
	[7. 5. 0. 3. 1.]
	[6. 4. 4. 8. 3.]]

	[[8. 9. 6. 7. 7.]
	[5. 5. 4. 7. 0.]
	[9. 7. 6. 6. 7.]
	[7. 5. 2. 4. 2.]
	[4. 5. 1. 9. 4.]]]]
	Y:
	[[13. 12. 13. 16. 12.]
	[ 9. 10. 5. 15. 3.]
	[10. 7. 15. 13. 13.]
	[14. 10. 2. 7. 3.]
	[10. 9. 5. 17. 7.]]

	```

	</details>

	)DOC")
	.TensorInferenceFunction(ReduceShapeInference)
	.Arg("axes", "(Tuple(int)): list of axes to reduce")
	.Arg(
	"keepdims",
	"(int): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
	.Input(0, "X", "(Tensor`<float>`): input tensor")
	.Output(0, "Y", "(Tensor`<float>`): reduced tensor");

	OPERATOR_SCHEMA(ReduceSumGradient).NumInputs(3).NumOutputs(1);

	REGISTER_CPU_OPERATOR(
	ReduceMean,
	ReduceOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceMeanGradient,
	ReduceGradientOp<TensorTypes<float>, CPUContext, MeanReducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceMean)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the mean of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

	Github Links:
	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

	<details>

	<summary> <b>Example</b> </summary>

	Code

	```

	workspace.ResetWorkspace()

	op = core.CreateOperator(
	"ReduceMean",
	["X"],
	["Y"],
	axes=(0,1),
	keepdims=0
	)

	workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
	print("X:", workspace.FetchBlob("X"))
	workspace.RunOperatorOnce(op)
	print("Y:", workspace.FetchBlob("Y"))

	```

	Result

	```

	X:
	[[[[9. 0. 3. 6. 0.]
	[3. 4. 5. 0. 9.]
	[6. 9. 1. 1. 5.]
	[6. 2. 3. 7. 7.]
	[3. 1. 1. 0. 1.]]

	[[4. 3. 9. 8. 1.]
	[8. 2. 0. 4. 0.]
	[8. 9. 9. 0. 2.]
	[7. 2. 5. 8. 9.]
	[5. 9. 1. 9. 0.]]]]
	Y:
	[[6.5 1.5 6. 7. 0.5]
	[5.5 3. 2.5 2. 4.5]
	[7. 9. 5. 0.5 3.5]
	[6.5 2. 4. 7.5 8. ]
	[4. 5. 1. 4.5 0.5]]

	```

	</details>


	)DOC")
	.TensorInferenceFunction(ReduceShapeInference)
	.Arg("axes", "(Tuple(int)): list of axes to reduce")
	.Arg(
	"keepdims",
	"(int): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
	.Input(0, "X", "(Tensor`<float>`): input tensor")
	.Output(0, "Y", "(Tensor`<float>`): reduced tensor");

	OPERATOR_SCHEMA(ReduceMeanGradient).NumInputs(3).NumOutputs(1);

	template <>
	template <typename T>
	bool L1Reducer<CPUContext>::Backward(
	const std::vector<int>& dY_dims,
	const std::vector<int>& dX_dims,
	const T* dY_data,
	const T* X_data,
	const T* /* Y_data */,
	T* dX_data,
	CPUContext* /* context */) const {
	const float kEps = 1e-12f;
	const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
	const int ndim = dX_dims.size();
	std::vector<int> index(ndim, 0);
	for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
	const int dY_index =
	math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
	float temp = X_data[dX_index];
	if (temp < -kEps) {
	dX_data[dX_index] = -dY_data[dY_index];
	} else if (temp > kEps) {
	dX_data[dX_index] = dY_data[dY_index];
	} else {
	dX_data[dX_index] = T(0);
	}
	math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
	}
	return true;
	}

	template <>
	template <typename T>
	bool L2Reducer<CPUContext>::Backward(
	const std::vector<int>& dY_dims,
	const std::vector<int>& dX_dims,
	const T* dY_data,
	const T* X_data,
	const T* Y_data,
	T* dX_data,
	CPUContext* /* context */) const {
	const float kEps = 1e-12f;
	const auto dX_size = c10::multiply_integers(dX_dims.cbegin(), dX_dims.cend());
	const int ndim = dX_dims.size();
	std::vector<int> index(ndim, 0);
	for (int dX_index = 0; dX_index < dX_size; ++dX_index) {
	const int dY_index =
	math::utils::GetIndexFromDims(ndim, dY_dims.data(), index.data());
	T norm = Y_data[dY_index];
	if (norm < kEps) {
	dX_data[dX_index] = dY_data[dY_index];
	} else {
	dX_data[dX_index] = dY_data[dY_index] * X_data[dX_index] / norm;
	}
	math::utils::IncreaseIndexInDims(ndim, dX_dims.data(), index.data());
	}
	return true;
	}

	REGISTER_CPU_OPERATOR(
	ReduceL1,
	ReduceOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceL1Gradient,
	ReduceGradientOp<TensorTypes<float>, CPUContext, L1Reducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceL1)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the L1 norm of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

	Github Links:
	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

	<details>

	<summary> <b>Example</b> </summary>

	Code

	```

	workspace.ResetWorkspace()

	op = core.CreateOperator(
	"ReduceL1",
	["X"],
	["Y"],
	axes=(0,1),
	keepdims=0
	)

	workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
	print("X:", workspace.FetchBlob("X"))
	workspace.RunOperatorOnce(op)
	print("Y:", workspace.FetchBlob("Y"))

	```

	Result

	```

	X:
	[[[[ 2. 7. 6. 4. 5.]
	[ 2. 1. 9. 8. 7.]
	[ 4. 9. 1. 0. 0.]
	[ 6. 4. 0. 8. 1.]
	[ 1. 7. 1. 0. 2.]]

	[[ 5. 8. 1. 7. 7.]
	[ 4. 5. 6. 5. 4.]
	[ 1. 9. 6. 6. 3.]
	[ 6. 6. 8. 8. 4.]
	[ 2. 3. 5. 8. 1.]]]]

	Y:
	[[ 7. 15. 7. 11. 12.]
	[ 6. 6. 15. 13. 11.]
	[ 5. 18. 7. 6. 3.]
	[ 12. 10. 8. 16. 5.]
	[ 3. 10. 6. 8. 3.]]

	```

	</details>


	)DOC")
	.Arg("axes", "(Tuple(int)): list of axes to reduce")
	.Arg(
	"keepdims",
	"(int): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
	.Input(0, "X", "(Tensor`<float>`): input tensor")
	.Output(0, "Y", "(Tensor`<float>`): reduced tensor");

	OPERATOR_SCHEMA(ReduceL1Gradient).NumInputs(3).NumOutputs(1);

	REGISTER_CPU_OPERATOR(
	ReduceL2,
	ReduceOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);
	REGISTER_CPU_OPERATOR(
	ReduceL2Gradient,
	ReduceGradientOp<TensorTypes<float>, CPUContext, L2Reducer<CPUContext>>);

	OPERATOR_SCHEMA(ReduceL2)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes the L2 norm of the input tensor's elements along the provided `axes`. The resulting tensor has the same rank as the input if the `keepdims` argument equals 1 (default). If `keepdims` is set to 0, then the `axes` dimensions are pruned.

	Github Links:
	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduce_ops.cc

	<details>

	<summary> <b>Example</b> </summary>

	Code

	```

	workspace.ResetWorkspace()

	op = core.CreateOperator(
	"ReduceL2",
	["X"],
	["Y"],
	axes=(0,1),
	keepdims=0
	)

	workspace.FeedBlob("X", np.random.randint(10, size=(1,2,5,5)).astype(np.float32))
	print("X:", workspace.FetchBlob("X"))
	workspace.RunOperatorOnce(op)
	print("Y:", workspace.FetchBlob("Y"))

	```

	Result

	```

	X:
	[[[[ 8. 0. 2. 5. 1.]
	[ 1. 3. 0. 4. 0.]
	[ 1. 3. 6. 7. 7.]
	[ 6. 9. 8. 4. 6.]
	[ 6. 1. 5. 7. 3.]]

	[[ 2. 4. 6. 2. 8.]
	[ 1. 1. 8. 0. 8.]
	[ 5. 9. 0. 3. 2.]
	[ 1. 7. 3. 7. 3.]
	[ 6. 8. 9. 8. 7.]]]]

	Y:
	[[ 8.24621105 4. 6.3245554 5.38516474 8.06225777]
	[ 1.41421354 3.1622777 8. 4. 8. ]
	[ 5.09901953 9.48683262 6. 7.6157732 7.28010988]
	[ 6.08276272 11.40175438 8.54400349 8.06225777 6.70820379]
	[ 8.48528099 8.06225777 10.29563046 10.63014603 7.6157732 ]]

	```

	</details>


	)DOC")
	.Arg("axes", "(Tuple(int)): list of axes to reduce")
	.Arg(
	"keepdims",
	"(int): set to 1 to keep the reduced dimension(s) (default=1), else set to 0 to not keep the reduced dimension(s)")
	.Input(0, "X", "(Tensor`<float>`): input tensor")
	.Output(0, "Y", "(Tensor`<float>`): reduced tensor")
	.InheritOnnxSchema("ReduceMean");

	OPERATOR_SCHEMA(ReduceL2Gradient).NumInputs(3).NumOutputs(1);

	namespace {

	class GetReduceGradient final : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;

	std::vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	def_.type() + "Gradient",
	"",
	std::vector<string>{GO(0), I(0), O(0)},
	std::vector<string>{GI(0)});
	}
	};

	} // namespace

	REGISTER_GRADIENT(ReduceMin, GetReduceGradient);
	REGISTER_GRADIENT(ReduceMax, GetReduceGradient);
	REGISTER_GRADIENT(ReduceSum, GetReduceGradient);
	REGISTER_GRADIENT(ReduceMean, GetReduceGradient);
	REGISTER_GRADIENT(ReduceL1, GetReduceGradient);
	REGISTER_GRADIENT(ReduceL2, GetReduceGradient);

	} // namespace caffe2