caffe2/operators/numpy_tile_op.h - platform/external/pytorch - Git at Google

 #ifndef CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
 #define CAFFE2_OPERATORS_NUMPY_TILE_OP_H_

 #include "caffe2/core/common_omp.h"
 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/math.h"
 #include "c10/util/irange.h"

 namespace caffe2 {

 // Copy a Blob n times along a specified axis.
 template <class Context>
 class NumpyTileOp : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   template <class... Args>
   explicit NumpyTileOp(Args&&... args)
       : Operator<Context>(std::forward<Args>(args)...) {}
   ~NumpyTileOp() {}

   bool RunOnDevice() override {
     const auto& input = Input(0);
     const auto& repeats = Input(1);

     // Check that the `repeats` tensor has the correct rank, has a number of
     // elements equal to the number of axes of `input`.
     CAFFE_ENFORCE_EQ(repeats.dim(), 1, "repeats input must be a 1-d tensor");
     CAFFE_ENFORCE_EQ(
         repeats.numel(),
         input.dim(),
         "repeats input have the same"
         " number of elements as `inputs` has dimensions.");
     const int64_t* repeats_data = repeats.template data<int64_t>();
     // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
     for (const auto i : c10::irange(repeats.numel())) {
       CAFFE_ENFORCE_GE(repeats_data[i], 0);
     }

     auto* output = Output(0);

     // Alternate inputs and outputs between two buffers. Repeatedly apply the
     // Tile kernel along each axis. Then copy out the resulting data into the
     // output tensor.
     Tensor *src = &buffer, *dst = output;
     src->CopyFrom(input);
     vector<int64_t> output_dims(input.sizes().vec());
     for (const auto i : c10::irange(repeats.numel())) {
       if (repeats_data[i] == 1) {
         continue;
       }
       // size up to (and not including) axis
       const auto outer_dim = src->size_to_dim(i);
       // size from axis up
       const auto inner_dim = src->size_from_dim(i);

       dst->Resize(outer_dim, inner_dim * repeats_data[i]);

       /**
        * How this works:
        * Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times.
        * - Tiling along axis 0 (row) means copying the entire 3x10 Matrix 2
        * times. outer_dim = 0, inner_dim = 30.
        * - Tiling along axis 1 (column) means copying each row 2 times, then
        * proceed to the next row, until the end. outer_dim = 3, inner_dim = 10.
        */
       const char* src_data = static_cast<const char*>(src->raw_data());
       char* dst_data = static_cast<char*>(dst->raw_mutable_data(src->dtype()));

       DoTile(
           src->dtype(),
           src->itemsize(),
           outer_dim,
           inner_dim,
           repeats_data[i],
           src_data,
           dst_data);

       output_dims[i] *= repeats_data[i];
       dst->Reshape(output_dims);

       std::swap(src, dst);
     }

     // NB: because we have the swap at the end of the above loop, our real
     // result tensor is going to live in *src when we reach this line
     // whether we entered the loop or not :)
     if (output != src)
       output->CopyFrom(*src);

     return true;
   }

  private:
   void DoTile(
       const TypeMeta meta,
       int item_size,
       int outer_dim,
       int inner_dim,
       int64_t num_tiles,
       const char* input_data,
       char* output_data) {
     for (const auto i : c10::irange(outer_dim)) {
       (void)i; // Suppress unused variable warning
       for (const auto t : c10::irange(num_tiles)) {
         (void)t; // Suppress unused variable warning
         context_.CopyItemsSameDevice(meta, inner_dim, input_data, output_data);
         output_data += inner_dim * item_size;
       }
       input_data += inner_dim * item_size;
     }
   }

   Tensor buffer{Context::GetDeviceType()};
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
	#ifndef CAFFE2_OPERATORS_NUMPY_TILE_OP_H_
	#define CAFFE2_OPERATORS_NUMPY_TILE_OP_H_

	#include "caffe2/core/common_omp.h"
	#include "caffe2/core/context.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/utils/math.h"
	#include "c10/util/irange.h"

	namespace caffe2 {

	// Copy a Blob n times along a specified axis.
	template <class Context>
	class NumpyTileOp : public Operator<Context> {
	public:
	USE_OPERATOR_CONTEXT_FUNCTIONS;
	template <class... Args>
	explicit NumpyTileOp(Args&&... args)
	: Operator<Context>(std::forward<Args>(args)...) {}
	~NumpyTileOp() {}

	bool RunOnDevice() override {
	const auto& input = Input(0);
	const auto& repeats = Input(1);

	// Check that the `repeats` tensor has the correct rank, has a number of
	// elements equal to the number of axes of `input`.
	CAFFE_ENFORCE_EQ(repeats.dim(), 1, "repeats input must be a 1-d tensor");
	CAFFE_ENFORCE_EQ(
	repeats.numel(),
	input.dim(),
	"repeats input have the same"
	" number of elements as `inputs` has dimensions.");
	const int64_t* repeats_data = repeats.template data<int64_t>();
	// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
	for (const auto i : c10::irange(repeats.numel())) {
	CAFFE_ENFORCE_GE(repeats_data[i], 0);
	}

	auto* output = Output(0);

	// Alternate inputs and outputs between two buffers. Repeatedly apply the
	// Tile kernel along each axis. Then copy out the resulting data into the
	// output tensor.
	Tensor src = &buffer, dst = output;
	src->CopyFrom(input);
	vector<int64_t> output_dims(input.sizes().vec());
	for (const auto i : c10::irange(repeats.numel())) {
	if (repeats_data[i] == 1) {
	continue;
	}
	// size up to (and not including) axis
	const auto outer_dim = src->size_to_dim(i);
	// size from axis up
	const auto inner_dim = src->size_from_dim(i);

	dst->Resize(outer_dim, inner_dim * repeats_data[i]);

	/**
	* How this works:
	* Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times.
	* - Tiling along axis 0 (row) means copying the entire 3x10 Matrix 2
	* times. outer_dim = 0, inner_dim = 30.
	* - Tiling along axis 1 (column) means copying each row 2 times, then
	* proceed to the next row, until the end. outer_dim = 3, inner_dim = 10.
	*/
	const char* src_data = static_cast<const char*>(src->raw_data());
	char* dst_data = static_cast<char*>(dst->raw_mutable_data(src->dtype()));

	DoTile(
	src->dtype(),
	src->itemsize(),
	outer_dim,
	inner_dim,
	repeats_data[i],
	src_data,
	dst_data);

	output_dims[i] *= repeats_data[i];
	dst->Reshape(output_dims);

	std::swap(src, dst);
	}

	// NB: because we have the swap at the end of the above loop, our real
	// result tensor is going to live in *src when we reach this line
	// whether we entered the loop or not :)
	if (output != src)
	output->CopyFrom(*src);

	return true;
	}

	private:
	void DoTile(
	const TypeMeta meta,
	int item_size,
	int outer_dim,
	int inner_dim,
	int64_t num_tiles,
	const char* input_data,
	char* output_data) {
	for (const auto i : c10::irange(outer_dim)) {
	(void)i; // Suppress unused variable warning
	for (const auto t : c10::irange(num_tiles)) {
	(void)t; // Suppress unused variable warning
	context_.CopyItemsSameDevice(meta, inner_dim, input_data, output_data);
	output_data += inner_dim * item_size;
	}
	input_data += inner_dim * item_size;
	}
	}

	Tensor buffer{Context::GetDeviceType()};
	};

	} // namespace caffe2

	#endif // CAFFE2_OPERATORS_NUMPY_TILE_OP_H_