caffe2/operators/locally_connected_op.h - platform/external/pytorch - Git at Google

 #ifndef CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
 #define CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_

 #include <vector>

 #include "caffe2/core/context.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/operators/conv_op_shared.h"
 #include "caffe2/operators/conv_pool_op_base.h"
 #include "caffe2/operators/locally_connected_op_util.h"

 namespace caffe2 {

 template <typename T, class Context>
 class LocallyConnectedOp final : public ConvPoolOpBase<Context> {
  public:
   USE_CONV_POOL_BASE_FUNCTIONS(Context);

   template <class... Args>
   explicit LocallyConnectedOp(Args&&... args)
       : ConvPoolOpBase<Context>(std::forward<Args>(args)...) {
     // Since this is the default locally connected implementation, we will
     // use CAFFE_ENFORCE instead of OPERATOR_NEEDS_FEATURE.
     CAFFE_ENFORCE(
         group_ == 1 || order_ == StorageOrder::NCHW,
         "Group locally connected only supports NCHW order right now.");
   }

   ~LocallyConnectedOp() = default;

   bool RunOnDeviceWithOrderNCHW() override;
   bool RunOnDeviceWithOrderNHWC() override;

  private:
   void RunOnDeviceWithOrderNCHWImpl(
       const lc_op_util::ShapeParams& shape,
       const T* X_data,
       const T* filter_data,
       const T* bias_data,
       T* Y_data,
       Tensor* column_buffer,
       Tensor* column_transposed_buffer,
       Tensor* output_buffer);

   void RunOnDeviceWithOrderNHWCImpl(
       const lc_op_util::ShapeParams& shape,
       const T* X_data,
       const T* filter_data,
       const T* bias_data,
       T* Y_data,
       Tensor* column_buffer,
       Tensor* column_transposed_buffer,
       Tensor* Y_transposed_buffer);

   Tensor bias_multiplier_{Context::GetDeviceType()};

   // Buffer.
   Tensor column_buffer_{Context::GetDeviceType()};
   Tensor column_transposed_buffer_{Context::GetDeviceType()};
   Tensor Y_transposed_buffer_{Context::GetDeviceType()};

   // Input: X, W, b
   // Output: Y
   INPUT_TAGS(INPUT, FILTER, BIAS);
 };

 template <typename T, class Context>
 class LocallyConnectedGradientOp final : public ConvPoolOpBase<Context> {
  public:
   USE_CONV_POOL_BASE_FUNCTIONS(Context);

   template <class... Args>
   explicit LocallyConnectedGradientOp(Args&&... args)
       : ConvPoolOpBase<Context>(std::forward<Args>(args)...),
         OP_SINGLE_ARG(bool, "no_bias", no_bias_, false) {
     CAFFE_ENFORCE(
         !(no_bias_ && OutputSize() == 3),
         "If bias is not present, you should not have 3 grad output.");
     CAFFE_ENFORCE(
         group_ == 1 || order_ == StorageOrder::NCHW,
         "Group locally connected only supports NCHW order right now.");
   }

   ~LocallyConnectedGradientOp() = default;

   bool RunOnDeviceWithOrderNCHW() override;
   bool RunOnDeviceWithOrderNHWC() override;

  private:
   void RunOnDeviceWithOrderNCHWImpl(
       const lc_op_util::ShapeParams& shape,
       const T* X_data,
       const T* filter_data,
       const T* dY_data,
       T* dfilter_data,
       T* dX_data,
       T* dbias_data,
       Tensor* column_buffer,
       Tensor* column_transposed_buffer,
       Tensor* dY_transposed_buffer);

   void RunOnDeviceWithOrderNHWCImpl(
       const lc_op_util::ShapeParams& shape,
       const T* X_data,
       const T* filter_data,
       const T* dY_data,
       T* dfilter_data,
       T* dX_data,
       T* dbias_data,
       Tensor* column_buffer,
       Tensor* column_transposed_buffer,
       Tensor* dY_transposed_buffer);

   const bool no_bias_;

   Tensor bias_multiplier_{Context::GetDeviceType()};

   // Buffer.
   Tensor column_buffer_{Context::GetDeviceType()};
   Tensor column_transposed_buffer_{Context::GetDeviceType()};
   Tensor dY_transposed_buffer_{Context::GetDeviceType()};

   // input: X, W, dY
   // output: dW, db, and optionally dX
   INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
   OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
	#ifndef CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
	#define CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_

	#include <vector>

	#include "caffe2/core/context.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/operators/conv_op_shared.h"
	#include "caffe2/operators/conv_pool_op_base.h"
	#include "caffe2/operators/locally_connected_op_util.h"

	namespace caffe2 {

	template <typename T, class Context>
	class LocallyConnectedOp final : public ConvPoolOpBase<Context> {
	public:
	USE_CONV_POOL_BASE_FUNCTIONS(Context);

	template <class... Args>
	explicit LocallyConnectedOp(Args&&... args)
	: ConvPoolOpBase<Context>(std::forward<Args>(args)...) {
	// Since this is the default locally connected implementation, we will
	// use CAFFE_ENFORCE instead of OPERATOR_NEEDS_FEATURE.
	CAFFE_ENFORCE(
	group_ == 1 \|\| order_ == StorageOrder::NCHW,
	"Group locally connected only supports NCHW order right now.");
	}

	~LocallyConnectedOp() = default;

	bool RunOnDeviceWithOrderNCHW() override;
	bool RunOnDeviceWithOrderNHWC() override;

	private:
	void RunOnDeviceWithOrderNCHWImpl(
	const lc_op_util::ShapeParams& shape,
	const T* X_data,
	const T* filter_data,
	const T* bias_data,
	T* Y_data,
	Tensor* column_buffer,
	Tensor* column_transposed_buffer,
	Tensor* output_buffer);

	void RunOnDeviceWithOrderNHWCImpl(
	const lc_op_util::ShapeParams& shape,
	const T* X_data,
	const T* filter_data,
	const T* bias_data,
	T* Y_data,
	Tensor* column_buffer,
	Tensor* column_transposed_buffer,
	Tensor* Y_transposed_buffer);

	Tensor bias_multiplier_{Context::GetDeviceType()};

	// Buffer.
	Tensor column_buffer_{Context::GetDeviceType()};
	Tensor column_transposed_buffer_{Context::GetDeviceType()};
	Tensor Y_transposed_buffer_{Context::GetDeviceType()};

	// Input: X, W, b
	// Output: Y
	INPUT_TAGS(INPUT, FILTER, BIAS);
	};

	template <typename T, class Context>
	class LocallyConnectedGradientOp final : public ConvPoolOpBase<Context> {
	public:
	USE_CONV_POOL_BASE_FUNCTIONS(Context);

	template <class... Args>
	explicit LocallyConnectedGradientOp(Args&&... args)
	: ConvPoolOpBase<Context>(std::forward<Args>(args)...),
	OP_SINGLE_ARG(bool, "no_bias", no_bias_, false) {
	CAFFE_ENFORCE(
	!(no_bias_ && OutputSize() == 3),
	"If bias is not present, you should not have 3 grad output.");
	CAFFE_ENFORCE(
	group_ == 1 \|\| order_ == StorageOrder::NCHW,
	"Group locally connected only supports NCHW order right now.");
	}

	~LocallyConnectedGradientOp() = default;

	bool RunOnDeviceWithOrderNCHW() override;
	bool RunOnDeviceWithOrderNHWC() override;

	private:
	void RunOnDeviceWithOrderNCHWImpl(
	const lc_op_util::ShapeParams& shape,
	const T* X_data,
	const T* filter_data,
	const T* dY_data,
	T* dfilter_data,
	T* dX_data,
	T* dbias_data,
	Tensor* column_buffer,
	Tensor* column_transposed_buffer,
	Tensor* dY_transposed_buffer);

	void RunOnDeviceWithOrderNHWCImpl(
	const lc_op_util::ShapeParams& shape,
	const T* X_data,
	const T* filter_data,
	const T* dY_data,
	T* dfilter_data,
	T* dX_data,
	T* dbias_data,
	Tensor* column_buffer,
	Tensor* column_transposed_buffer,
	Tensor* dY_transposed_buffer);

	const bool no_bias_;

	Tensor bias_multiplier_{Context::GetDeviceType()};

	// Buffer.
	Tensor column_buffer_{Context::GetDeviceType()};
	Tensor column_transposed_buffer_{Context::GetDeviceType()};
	Tensor dY_transposed_buffer_{Context::GetDeviceType()};

	// input: X, W, dY
	// output: dW, db, and optionally dX
	INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
	OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
	};

	} // namespace caffe2

	#endif // CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_