caffe2/operators/batch_permutation_op.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/batch_permutation_op.h"

 #include <cstring>
 #include <vector>

 #ifdef USE_MKLDNN
 #include <caffe2/ideep/operators/operator_fallback_ideep.h>
 #include <caffe2/ideep/utils/ideep_operator.h>
 #endif

 namespace caffe2 {

 template <bool forwards>
 void batch_permutation_loop(
     const int N,
     const int K,
     const float* src,
     const int* indices,
     float* dst) {
   // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
   long numBytes = K * sizeof(float);
   if (forwards) {
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
     for (int n = 0; n < N; n++) {
       int origIdx = n * K;
       int permuteIdx = indices[n] * K;
       std::memcpy(dst + origIdx, src + permuteIdx, numBytes);
     }
   } else {
     std::vector<int> backward_indices(N);
     for (int i = 0; i < N; ++i) {
       backward_indices[indices[i]] = i;
     }
     for (int n = 0; n < N; n++) {
       int permuteIdx = n * K;
       int origIdx = backward_indices[n] * K;
       std::memcpy(dst + permuteIdx, src + origIdx, numBytes);
     }
   }
 }

 template <>
 bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
   auto& X = Input(0);
   auto& indices = Input(1);

   CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
   CAFFE_ENFORCE(
       X.dim32(0) == indices.dim32(0),
       "X.dim32(0) must be equal to indices.dim32(0)",
       "(",
       X.dim32(0),
       " vs. ",
       indices.dim32(0),
       ")");

   auto* Y = Output(0, X.sizes(), at::dtype<float>());

   if (X.dim32(0) > 0) {
     batch_permutation_loop<true>(
         X.dim32(0),
         // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
         X.numel() / X.dim32(0),
         X.data<float>(),
         indices.data<int>(),
         Y->mutable_data<float>());
   }
   return true;
 }

 template <>
 bool BatchPermutationGradientOp<float, CPUContext>::RunOnDevice() {
   auto& indices = Input(0);
   auto& dY = Input(1);

   auto* dX = Output(0, dY.sizes(), at::dtype<float>());

   if (dY.dim32(0) > 0) {
     batch_permutation_loop<false>(
         dY.dim32(0),
         // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
         dY.numel() / dY.dim32(0),
         dY.data<float>(),
         indices.data<int>(),
         dX->mutable_data<float>());
   }
   return true;
 }

 #ifdef USE_MKLDNN
 REGISTER_IDEEP_OPERATOR(
     BatchPermutation,
     IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
 #endif

 REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(
     BatchPermutationGradient,
     BatchPermutationGradientOp<float, CPUContext>);

 // Input: X, indices; Output: Y
 OPERATOR_SCHEMA(BatchPermutation)
     .NumInputs(2)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Batch permutation of an input tensor X given input indices. First dimension of
 X equals batch size N. The indices stores a be permutation of N.
 The output Y is a tensor of same shape as X, with data re-ordered according to
 the indices within the batch size.

 Example of batch permutation on a 2-D tensor with batch size 4:
   X = [
     [1, 5, 2, 3, 4, 6, 0],
     [4, 3, 3, 5, 2, 3, 1],
     [2, 2, 3, 6, 0, 0, 1],
     [0, 0, 1, 1, 2, 2, 3]
   ]
   indices = [2, 0, 1, 3]
   Y = [
     [2, 2, 3, 6, 0, 0, 1],
     [1, 5, 2, 3, 4, 6, 0],
     [4, 3, 3, 5, 2, 3, 1],
     [0, 0, 1, 1, 2, 2, 3]
   ]

 Example of batch permutation on a 3-D tensor with batch size 4:
   X = [
     [[1, 5, 2], [3, 4, 6, 0]],
     [[4, 3, 3], [5, 2, 3, 1]],
     [[2, 2, 3], [6, 0, 0, 1]],
     [[0, 0, 1], [1, 2, 2, 3]]
   ]
   indices = [2, 0, 1, 3]
   Y = [
     [[2, 2, 3], [6, 0, 0, 1]],
     [[1, 5, 2], [3, 4, 6, 0]],
     [[4, 3, 3], [5, 2, 3, 1]],
     [[0, 0, 1], [1, 2, 2, 3]]
   ]
 )DOC")
     .Input(0, "X", "Input tensor, where 1st dimension equals batch size")
     .Input(1, "indices", "Input indices of batch to permute")
     .Output(0, "Y", "Output permuted tensor");
 // Input: indices, dY (aka "gradOutput"); Output: dX (aka "gradInput")
 OPERATOR_SCHEMA(BatchPermutationGradient).NumInputs(2).NumOutputs(1);

 class GetBatchPermutationGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "BatchPermutationGradient",
         "",
         vector<string>{I(1), GO(0)},
         vector<string>{GI(0)});
   }
 };

 REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);

 } // namespace caffe2

 using BatchPermutationOpFloatCPU =
     caffe2::BatchPermutationOp<float, caffe2::CPUContext>;

 C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
     BatchPermutation,
     "_caffe2::BatchPermutation(Tensor X, Tensor indices) -> Tensor",
     BatchPermutationOpFloatCPU);
	#include "caffe2/operators/batch_permutation_op.h"

	#include <cstring>
	#include <vector>

	#ifdef USE_MKLDNN
	#include <caffe2/ideep/operators/operator_fallback_ideep.h>
	#include <caffe2/ideep/utils/ideep_operator.h>
	#endif

	namespace caffe2 {

	template <bool forwards>
	void batch_permutation_loop(
	const int N,
	const int K,
	const float* src,
	const int* indices,
	float* dst) {
	// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
	long numBytes = K * sizeof(float);
	if (forwards) {
	#ifdef _OPENMP
	#pragma omp parallel for
	#endif
	for (int n = 0; n < N; n++) {
	int origIdx = n * K;
	int permuteIdx = indices[n] * K;
	std::memcpy(dst + origIdx, src + permuteIdx, numBytes);
	}
	} else {
	std::vector<int> backward_indices(N);
	for (int i = 0; i < N; ++i) {
	backward_indices[indices[i]] = i;
	}
	for (int n = 0; n < N; n++) {
	int permuteIdx = n * K;
	int origIdx = backward_indices[n] * K;
	std::memcpy(dst + permuteIdx, src + origIdx, numBytes);
	}
	}
	}

	template <>
	bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
	auto& X = Input(0);
	auto& indices = Input(1);

	CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
	CAFFE_ENFORCE(
	X.dim32(0) == indices.dim32(0),
	"X.dim32(0) must be equal to indices.dim32(0)",
	"(",
	X.dim32(0),
	" vs. ",
	indices.dim32(0),
	")");

	auto* Y = Output(0, X.sizes(), at::dtype<float>());

	if (X.dim32(0) > 0) {
	batch_permutation_loop<true>(
	X.dim32(0),
	// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
	X.numel() / X.dim32(0),
	X.data<float>(),
	indices.data<int>(),
	Y->mutable_data<float>());
	}
	return true;
	}

	template <>
	bool BatchPermutationGradientOp<float, CPUContext>::RunOnDevice() {
	auto& indices = Input(0);
	auto& dY = Input(1);

	auto* dX = Output(0, dY.sizes(), at::dtype<float>());

	if (dY.dim32(0) > 0) {
	batch_permutation_loop<false>(
	dY.dim32(0),
	// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
	dY.numel() / dY.dim32(0),
	dY.data<float>(),
	indices.data<int>(),
	dX->mutable_data<float>());
	}
	return true;
	}

	#ifdef USE_MKLDNN
	REGISTER_IDEEP_OPERATOR(
	BatchPermutation,
	IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
	#endif

	REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
	REGISTER_CPU_OPERATOR(
	BatchPermutationGradient,
	BatchPermutationGradientOp<float, CPUContext>);

	// Input: X, indices; Output: Y
	OPERATOR_SCHEMA(BatchPermutation)
	.NumInputs(2)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Batch permutation of an input tensor X given input indices. First dimension of
	X equals batch size N. The indices stores a be permutation of N.
	The output Y is a tensor of same shape as X, with data re-ordered according to
	the indices within the batch size.

	Example of batch permutation on a 2-D tensor with batch size 4:
	X = [
	[1, 5, 2, 3, 4, 6, 0],
	[4, 3, 3, 5, 2, 3, 1],
	[2, 2, 3, 6, 0, 0, 1],
	[0, 0, 1, 1, 2, 2, 3]
	]
	indices = [2, 0, 1, 3]
	Y = [
	[2, 2, 3, 6, 0, 0, 1],
	[1, 5, 2, 3, 4, 6, 0],
	[4, 3, 3, 5, 2, 3, 1],
	[0, 0, 1, 1, 2, 2, 3]
	]

	Example of batch permutation on a 3-D tensor with batch size 4:
	X = [
	[[1, 5, 2], [3, 4, 6, 0]],
	[[4, 3, 3], [5, 2, 3, 1]],
	[[2, 2, 3], [6, 0, 0, 1]],
	[[0, 0, 1], [1, 2, 2, 3]]
	]
	indices = [2, 0, 1, 3]
	Y = [
	[[2, 2, 3], [6, 0, 0, 1]],
	[[1, 5, 2], [3, 4, 6, 0]],
	[[4, 3, 3], [5, 2, 3, 1]],
	[[0, 0, 1], [1, 2, 2, 3]]
	]
	)DOC")
	.Input(0, "X", "Input tensor, where 1st dimension equals batch size")
	.Input(1, "indices", "Input indices of batch to permute")
	.Output(0, "Y", "Output permuted tensor");
	// Input: indices, dY (aka "gradOutput"); Output: dX (aka "gradInput")
	OPERATOR_SCHEMA(BatchPermutationGradient).NumInputs(2).NumOutputs(1);

	class GetBatchPermutationGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	"BatchPermutationGradient",
	"",
	vector<string>{I(1), GO(0)},
	vector<string>{GI(0)});
	}
	};

	REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);

	} // namespace caffe2

	using BatchPermutationOpFloatCPU =
	caffe2::BatchPermutationOp<float, caffe2::CPUContext>;

	C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
	BatchPermutation,
	"_caffe2::BatchPermutation(Tensor X, Tensor indices) -> Tensor",
	BatchPermutationOpFloatCPU);