caffe2/operators/tile_op.h - platform/external/pytorch - Git at Google

 #ifndef CAFFE2_OPERATORS_TILE_OP_H_
 #define CAFFE2_OPERATORS_TILE_OP_H_

 #include <array>
 #include <string>
 #include <type_traits>
 #include <vector>

 #include "caffe2/core/common_omp.h"
 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/eigen_utils.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 // Copy a Blob n times along a specified axis.
 template <class Context>
 class TileOp final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;

   template <class... Args>
   explicit TileOp(Args&&... args)
       : Operator<Context>(std::forward<Args>(args)...),
         OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1),
         OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {}

   bool RunOnDevice() override {
     return DispatchHelper<
         TensorTypes<std::int32_t, std::int64_t, float, double>>::
         call(this, Input(0));
   }

   template <typename T>
   bool DoRunWithType() {
     if (InputSize() > 1) {
       // We potentially have tiles and/or axis specified as inputs
       // as well. We will check for them in that order. In other words:
       // InputSize() == 2: tiles is specified
       // InputSize() == 3: tiles is specified and axis.
       // Anything specified as input will override the arguments
       CAFFE_ENFORCE(
           Input(1).dim() == 1 && Input(1).numel() == 1,
           "Input `tiles` should be a vector of size 1.");
       tiles_ = GetArgFromTensor(Input(1));

       // Because of a bug in original code, temporarily adds this part to keep
       // backward compatibility.
       // TODO(yangxm): Remove this part when prod runtime upgraded with fixed
       // model config.
       if (Input(1).template IsType<std::int64_t>()) {
         axis_ = 0;
       }

       if (InputSize() > 2) {
         CAFFE_ENFORCE(
             Input(2).dim() == 1 && Input(2).numel() == 1,
             "Input `axis` should be a vector of size 1.");
         axis_ = GetArgFromTensor(Input(2));
       } else {
         CAFFE_ENFORCE(
             OperatorBase::HasArgument("axis"),
             "Argument `axis` is missing and was not specified as input.");
       }
     } else {
       CAFFE_ENFORCE(
           OperatorBase::HasArgument("tiles"),
           "Argument `tiles` is missing and was not specified as input.");
       CAFFE_ENFORCE(
           OperatorBase::HasArgument("axis"),
           "Argument `axis` is missing and was not specified as input.");
     }

     const auto& X = Input(0);
     auto* Y = Output(0);
     const int axis = X.canonical_axis_index(axis_);

     // reshape output to be input tiled along the axis
     std::vector<std::int64_t> Y_dims = X.sizes().vec();
     Y_dims[axis] *= tiles_;
     Y->Resize(Y_dims);

     // size up to (and not including) axis
     const int outer_size = X.size_to_dim(axis);
     // size from axis up
     const int inner_size = X.size_from_dim(axis);

     const T* X_data = X.template data<T>();
     T* Y_data = Y->template mutable_data<T>();
     return DoTile<T>(outer_size, inner_size, X_data, Y_data);
   }

  private:
   std::int32_t GetArgFromTensor(const Tensor& tensor) {
     CAFFE_ENFORCE(
         tensor.IsType<std::int32_t>() || tensor.IsType<std::int64_t>());
     std::int32_t val = -1;
     if (tensor.IsType<std::int32_t>()) {
       context_.template CopyToCPU<std::int32_t>(
           1, tensor.data<std::int32_t>(), &val);
     } else if (tensor.IsType<std::int64_t>()) {
       std::int64_t val_int64;
       context_.template CopyToCPU<std::int64_t>(
           1, tensor.data<std::int64_t>(), &val_int64);
       val = static_cast<std::int32_t>(val_int64);
     }
     return val;
   }

   template <typename T>
   bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) {
     if (inner_size == 1) {
       EigenArrayMap<T> Y_arr(Y, tiles_, outer_size);
       for (const auto i : c10::irange(outer_size)) {
         Y_arr.col(i) = X[i];
       }
     } else {
       ConstEigenArrayMap<T> X_arr(X, inner_size, outer_size);
       for (const auto i : c10::irange(outer_size)) {
         EigenArrayMap<T>(Y + i * tiles_ * inner_size, inner_size, tiles_)
             .colwise() = X_arr.col(i);
       }
     }
     return true;
   }

   std::int32_t tiles_;
   std::int32_t axis_;
 };

 template <class Context>
 class TileGradientOp final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;

   template <class... Args>
   explicit TileGradientOp(Args&&... args)
       : Operator<Context>(std::forward<Args>(args)...),
         OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1),
         OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {}

   bool RunOnDevice() override {
     return DispatchHelper<
         TensorTypes<std::int32_t, std::int64_t, float, double>>::
         call(this, Input(0));
   }

   template <typename T>
   bool DoRunWithType() {
     if (InputSize() > 1) {
       // We potentially have tiles and/or axis specified as inputs
       // as well. We will check for them in that order. In other words:
       // InputSize() == 2: tiles is specified
       // InputSize() == 3: tiles is specified and axis.
       // Anything specified as input will override the arguments
       CAFFE_ENFORCE(
           Input(1).dim() == 1 && Input(1).numel() == 1,
           "Input `tiles` should be a vector of size 1.");
       tiles_ = GetArgFromTensor(Input(1));
       if (InputSize() > 2) {
         CAFFE_ENFORCE(
             Input(2).dim() == 1 && Input(2).numel() == 1,
             "Input `axis` should be a vector of size 1.");
         axis_ = GetArgFromTensor(Input(2));
       } else {
         CAFFE_ENFORCE(
             OperatorBase::HasArgument("axis"),
             "Argument `axis` is missing and was not specified as input.");
       }
     } else {
       CAFFE_ENFORCE(
           OperatorBase::HasArgument("tiles"),
           "Argument `tiles` is missing and was not specified as input.");
       CAFFE_ENFORCE(
           OperatorBase::HasArgument("axis"),
           "Argument `axis` is missing and was not specified as input.");
     }

     const auto& dY = Input(0);
     auto* dX = Output(0);
     const int axis = dY.canonical_axis_index(axis_);

     // reshape output to be input "untiled" along the axis
     std::vector<std::int64_t> X_dims = dY.sizes().vec();
     CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0);
     X_dims[axis] /= tiles_;
     dX->Resize(X_dims);

     // size up to (and not including) axis
     const int outer_size = dX->size_to_dim(axis);
     // size from axis up
     const int inner_size = dX->size_from_dim(axis);

     /**
      * How this works:
      * Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times along axis 1
      * (column).
      * This is equivalent to multiplying by a vector of 1s transposed.
      * The gradient of this is all 1s in the shape of the input matrix
      * (call it X).
      * So the output gradient should be the matrix multiplication result
      * of input gradient (gradient of tiled tensor output) and X.
      */
     const T* dY_data = dY.template data<T>();
     T* dX_data = dX->template mutable_data<T>();
     return DoTileGradient<T>(outer_size, inner_size, dY_data, dX_data);
   }

  private:
   std::int32_t GetArgFromTensor(const Tensor& tensor) {
     CAFFE_ENFORCE(
         tensor.IsType<std::int32_t>() || tensor.IsType<std::int64_t>());
     std::int32_t val = -1;
     if (tensor.IsType<std::int32_t>()) {
       context_.template CopyToCPU<std::int32_t>(
           1, tensor.data<std::int32_t>(), &val);
     } else if (tensor.IsType<std::int64_t>()) {
       std::int64_t val_int64;
       context_.template CopyToCPU<std::int64_t>(
           1, tensor.data<std::int64_t>(), &val_int64);
       val = static_cast<std::int32_t>(val_int64);
     }
     return val;
   }

   template <typename T>
   bool DoTileGradient(
       const int outer_size,
       const int inner_size,
       const T* dY,
       T* dX) {
     if (inner_size == 1) {
       const std::array<int, 2> dY_dims = {outer_size, tiles_};
       const std::array<int, 2> dX_dims = {outer_size, 1};
       math::ReduceSum<T, Context>(
           2, dY_dims.data(), dX_dims.data(), T(1), dY, dX, &context_);
     } else {
       math::CopyMatrix<T, Context>(
           outer_size,
           inner_size,
           dY,
           inner_size * tiles_,
           dX,
           inner_size,
           &context_);
       for (const auto i : c10::irange(outer_size)) {
         const T* dY_ptr = dY + i * tiles_ * inner_size;
         T* dX_ptr = dX + i * inner_size;
         for (const auto j : c10::irange(1, tiles_)) {
           math::Add<T, Context>(
               inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_);
         }
       }
     }
     return true;
   }

   std::int32_t tiles_;
   std::int32_t axis_;

   Tensor ones_;
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_TILE_OP_H_
	#ifndef CAFFE2_OPERATORS_TILE_OP_H_
	#define CAFFE2_OPERATORS_TILE_OP_H_

	#include <array>
	#include <string>
	#include <type_traits>
	#include <vector>

	#include "caffe2/core/common_omp.h"
	#include "caffe2/core/context.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/utils/eigen_utils.h"
	#include "caffe2/utils/math.h"

	namespace caffe2 {

	// Copy a Blob n times along a specified axis.
	template <class Context>
	class TileOp final : public Operator<Context> {
	public:
	USE_OPERATOR_CONTEXT_FUNCTIONS;

	template <class... Args>
	explicit TileOp(Args&&... args)
	: Operator<Context>(std::forward<Args>(args)...),
	OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1),
	OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {}

	bool RunOnDevice() override {
	return DispatchHelper<
	TensorTypes<std::int32_t, std::int64_t, float, double>>::
	call(this, Input(0));
	}

	template <typename T>
	bool DoRunWithType() {
	if (InputSize() > 1) {
	// We potentially have tiles and/or axis specified as inputs
	// as well. We will check for them in that order. In other words:
	// InputSize() == 2: tiles is specified
	// InputSize() == 3: tiles is specified and axis.
	// Anything specified as input will override the arguments
	CAFFE_ENFORCE(
	Input(1).dim() == 1 && Input(1).numel() == 1,
	"Input `tiles` should be a vector of size 1.");
	tiles_ = GetArgFromTensor(Input(1));

	// Because of a bug in original code, temporarily adds this part to keep
	// backward compatibility.
	// TODO(yangxm): Remove this part when prod runtime upgraded with fixed
	// model config.
	if (Input(1).template IsType<std::int64_t>()) {
	axis_ = 0;
	}

	if (InputSize() > 2) {
	CAFFE_ENFORCE(
	Input(2).dim() == 1 && Input(2).numel() == 1,
	"Input `axis` should be a vector of size 1.");
	axis_ = GetArgFromTensor(Input(2));
	} else {
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("axis"),
	"Argument `axis` is missing and was not specified as input.");
	}
	} else {
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("tiles"),
	"Argument `tiles` is missing and was not specified as input.");
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("axis"),
	"Argument `axis` is missing and was not specified as input.");
	}

	const auto& X = Input(0);
	auto* Y = Output(0);
	const int axis = X.canonical_axis_index(axis_);

	// reshape output to be input tiled along the axis
	std::vector<std::int64_t> Y_dims = X.sizes().vec();
	Y_dims[axis] *= tiles_;
	Y->Resize(Y_dims);

	// size up to (and not including) axis
	const int outer_size = X.size_to_dim(axis);
	// size from axis up
	const int inner_size = X.size_from_dim(axis);

	const T* X_data = X.template data<T>();
	T* Y_data = Y->template mutable_data<T>();
	return DoTile<T>(outer_size, inner_size, X_data, Y_data);
	}

	private:
	std::int32_t GetArgFromTensor(const Tensor& tensor) {
	CAFFE_ENFORCE(
	tensor.IsType<std::int32_t>() \|\| tensor.IsType<std::int64_t>());
	std::int32_t val = -1;
	if (tensor.IsType<std::int32_t>()) {
	context_.template CopyToCPU<std::int32_t>(
	1, tensor.data<std::int32_t>(), &val);
	} else if (tensor.IsType<std::int64_t>()) {
	std::int64_t val_int64;
	context_.template CopyToCPU<std::int64_t>(
	1, tensor.data<std::int64_t>(), &val_int64);
	val = static_cast<std::int32_t>(val_int64);
	}
	return val;
	}

	template <typename T>
	bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) {
	if (inner_size == 1) {
	EigenArrayMap<T> Y_arr(Y, tiles_, outer_size);
	for (const auto i : c10::irange(outer_size)) {
	Y_arr.col(i) = X[i];
	}
	} else {
	ConstEigenArrayMap<T> X_arr(X, inner_size, outer_size);
	for (const auto i : c10::irange(outer_size)) {
	EigenArrayMap<T>(Y + i * tiles_ * inner_size, inner_size, tiles_)
	.colwise() = X_arr.col(i);
	}
	}
	return true;
	}

	std::int32_t tiles_;
	std::int32_t axis_;
	};

	template <class Context>
	class TileGradientOp final : public Operator<Context> {
	public:
	USE_OPERATOR_CONTEXT_FUNCTIONS;

	template <class... Args>
	explicit TileGradientOp(Args&&... args)
	: Operator<Context>(std::forward<Args>(args)...),
	OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1),
	OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {}

	bool RunOnDevice() override {
	return DispatchHelper<
	TensorTypes<std::int32_t, std::int64_t, float, double>>::
	call(this, Input(0));
	}

	template <typename T>
	bool DoRunWithType() {
	if (InputSize() > 1) {
	// We potentially have tiles and/or axis specified as inputs
	// as well. We will check for them in that order. In other words:
	// InputSize() == 2: tiles is specified
	// InputSize() == 3: tiles is specified and axis.
	// Anything specified as input will override the arguments
	CAFFE_ENFORCE(
	Input(1).dim() == 1 && Input(1).numel() == 1,
	"Input `tiles` should be a vector of size 1.");
	tiles_ = GetArgFromTensor(Input(1));
	if (InputSize() > 2) {
	CAFFE_ENFORCE(
	Input(2).dim() == 1 && Input(2).numel() == 1,
	"Input `axis` should be a vector of size 1.");
	axis_ = GetArgFromTensor(Input(2));
	} else {
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("axis"),
	"Argument `axis` is missing and was not specified as input.");
	}
	} else {
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("tiles"),
	"Argument `tiles` is missing and was not specified as input.");
	CAFFE_ENFORCE(
	OperatorBase::HasArgument("axis"),
	"Argument `axis` is missing and was not specified as input.");
	}

	const auto& dY = Input(0);
	auto* dX = Output(0);
	const int axis = dY.canonical_axis_index(axis_);

	// reshape output to be input "untiled" along the axis
	std::vector<std::int64_t> X_dims = dY.sizes().vec();
	CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0);
	X_dims[axis] /= tiles_;
	dX->Resize(X_dims);

	// size up to (and not including) axis
	const int outer_size = dX->size_to_dim(axis);
	// size from axis up
	const int inner_size = dX->size_from_dim(axis);

	/**
	* How this works:
	* Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times along axis 1
	* (column).
	* This is equivalent to multiplying by a vector of 1s transposed.
	* The gradient of this is all 1s in the shape of the input matrix
	* (call it X).
	* So the output gradient should be the matrix multiplication result
	* of input gradient (gradient of tiled tensor output) and X.
	*/
	const T* dY_data = dY.template data<T>();
	T* dX_data = dX->template mutable_data<T>();
	return DoTileGradient<T>(outer_size, inner_size, dY_data, dX_data);
	}

	private:
	std::int32_t GetArgFromTensor(const Tensor& tensor) {
	CAFFE_ENFORCE(
	tensor.IsType<std::int32_t>() \|\| tensor.IsType<std::int64_t>());
	std::int32_t val = -1;
	if (tensor.IsType<std::int32_t>()) {
	context_.template CopyToCPU<std::int32_t>(
	1, tensor.data<std::int32_t>(), &val);
	} else if (tensor.IsType<std::int64_t>()) {
	std::int64_t val_int64;
	context_.template CopyToCPU<std::int64_t>(
	1, tensor.data<std::int64_t>(), &val_int64);
	val = static_cast<std::int32_t>(val_int64);
	}
	return val;
	}

	template <typename T>
	bool DoTileGradient(
	const int outer_size,
	const int inner_size,
	const T* dY,
	T* dX) {
	if (inner_size == 1) {
	const std::array<int, 2> dY_dims = {outer_size, tiles_};
	const std::array<int, 2> dX_dims = {outer_size, 1};
	math::ReduceSum<T, Context>(
	2, dY_dims.data(), dX_dims.data(), T(1), dY, dX, &context_);
	} else {
	math::CopyMatrix<T, Context>(
	outer_size,
	inner_size,
	dY,
	inner_size * tiles_,
	dX,
	inner_size,
	&context_);
	for (const auto i : c10::irange(outer_size)) {
	const T* dY_ptr = dY + i * tiles_ * inner_size;
	T* dX_ptr = dX + i * inner_size;
	for (const auto j : c10::irange(1, tiles_)) {
	math::Add<T, Context>(
	inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_);
	}
	}
	}
	return true;
	}

	std::int32_t tiles_;
	std::int32_t axis_;

	Tensor ones_;
	};

	} // namespace caffe2

	#endif // CAFFE2_OPERATORS_TILE_OP_H_