caffe2/operators/tt_linear_op.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/tt_linear_op.h"

 namespace caffe2 {
 namespace {

 REGISTER_CPU_OPERATOR(TT, TTLinearOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(TTLinearGradient, TTLinearGradientOp<float, CPUContext>);

 // The TT-layer serves as a low-rank decomposition of a fully connected layer.
 // The inputs are the same as to an FC layer, but the number of the parameters
 // are greatly reduced.
 OPERATOR_SCHEMA(TT)
     .NumInputs(3)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 The TT-layer serves as a low-rank decomposition of a fully connected layer. The
 inputs are the same as to a fully connected layer, but the number of parameters
 are greatly reduced and forward computation time can be drastically reduced
 especially for layers with large weight matrices. The multiplication is computed
 as a product of the input vector with each of the cores that make up the TT
 layer. Given the input sizes (inp_sizes), output sizes(out_sizes), and the ranks
 of each of the cores (tt_ranks), the ith core will have size:

     inp_sizes[i] * tt_ranks[i] * tt_ranks[i + 1] * out_sizes[i].

 The complexity of the computation is dictated by the sizes of inp_sizes,
 out_sizes, and tt_ranks, where there is the trade off between accuracy of the
 low-rank decomposition and the speed of the computation.
 )DOC")
     .Arg(
         "inp_sizes",
         "(int[]) Input sizes of cores. Indicates the input size of "
         "the individual cores; the size of the input vector X must match the "
         "product of the inp_sizes array.")
     .Arg(
         "out_sizes",
         "(int[]) Output sizes of cores. Indicates the output size "
         "of the individual cores; the size of the output vector Y must match "
         "the product of the out_sizes array.")
     .Arg(
         "tt_ranks",
         "(int[]) Ranks of cores. Indicates the ranks of the "
         "individual cores; lower rank means larger compression, faster "
         "computation but reduce accuracy.")
     .Input(
         0,
         "X",
         "Input tensor from previous layer with size (M x K), where "
         "M is the batch size and K is the input size.")
     .Input(1, "b", "1D blob containing the bias vector")
     .Input(
         2,
         "cores",
         "1D blob containing each individual cores with sizes "
         "specified above.")
     .Output(
         0,
         "Y",
         "Output tensor from previous layer with size (M x N), "
         "where M is the batch size and N is the output size.");

 OPERATOR_SCHEMA(TTLinearGradient);

 GRADIENT_NOT_IMPLEMENTED_YET(TT);
 } // namespace
 } // namespace caffe2
	#include "caffe2/operators/tt_linear_op.h"

	namespace caffe2 {
	namespace {

	REGISTER_CPU_OPERATOR(TT, TTLinearOp<float, CPUContext>);
	REGISTER_CPU_OPERATOR(TTLinearGradient, TTLinearGradientOp<float, CPUContext>);

	// The TT-layer serves as a low-rank decomposition of a fully connected layer.
	// The inputs are the same as to an FC layer, but the number of the parameters
	// are greatly reduced.
	OPERATOR_SCHEMA(TT)
	.NumInputs(3)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	The TT-layer serves as a low-rank decomposition of a fully connected layer. The
	inputs are the same as to a fully connected layer, but the number of parameters
	are greatly reduced and forward computation time can be drastically reduced
	especially for layers with large weight matrices. The multiplication is computed
	as a product of the input vector with each of the cores that make up the TT
	layer. Given the input sizes (inp_sizes), output sizes(out_sizes), and the ranks
	of each of the cores (tt_ranks), the ith core will have size:

	inp_sizes[i] * tt_ranks[i] * tt_ranks[i + 1] * out_sizes[i].

	The complexity of the computation is dictated by the sizes of inp_sizes,
	out_sizes, and tt_ranks, where there is the trade off between accuracy of the
	low-rank decomposition and the speed of the computation.
	)DOC")
	.Arg(
	"inp_sizes",
	"(int[]) Input sizes of cores. Indicates the input size of "
	"the individual cores; the size of the input vector X must match the "
	"product of the inp_sizes array.")
	.Arg(
	"out_sizes",
	"(int[]) Output sizes of cores. Indicates the output size "
	"of the individual cores; the size of the output vector Y must match "
	"the product of the out_sizes array.")
	.Arg(
	"tt_ranks",
	"(int[]) Ranks of cores. Indicates the ranks of the "
	"individual cores; lower rank means larger compression, faster "
	"computation but reduce accuracy.")
	.Input(
	0,
	"X",
	"Input tensor from previous layer with size (M x K), where "
	"M is the batch size and K is the input size.")
	.Input(1, "b", "1D blob containing the bias vector")
	.Input(
	2,
	"cores",
	"1D blob containing each individual cores with sizes "
	"specified above.")
	.Output(
	0,
	"Y",
	"Output tensor from previous layer with size (M x N), "
	"where M is the batch size and N is the output size.");

	OPERATOR_SCHEMA(TTLinearGradient);

	GRADIENT_NOT_IMPLEMENTED_YET(TT);
	} // namespace
	} // namespace caffe2