| #include "caffe2/operators/tt_linear_op.h" |
| |
| namespace caffe2 { |
| namespace { |
| |
| REGISTER_CPU_OPERATOR(TT, TTLinearOp<float, CPUContext>); |
| REGISTER_CPU_OPERATOR(TTLinearGradient, TTLinearGradientOp<float, CPUContext>); |
| |
| // The TT-layer serves as a low-rank decomposition of a fully connected layer. |
| // The inputs are the same as to an FC layer, but the number of the parameters |
| // are greatly reduced. |
| OPERATOR_SCHEMA(TT) |
| .NumInputs(3) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| The TT-layer serves as a low-rank decomposition of a fully connected layer. The |
| inputs are the same as to a fully connected layer, but the number of parameters |
| are greatly reduced and forward computation time can be drastically reduced |
| especially for layers with large weight matrices. The multiplication is computed |
| as a product of the input vector with each of the cores that make up the TT |
| layer. Given the input sizes (inp_sizes), output sizes(out_sizes), and the ranks |
| of each of the cores (tt_ranks), the ith core will have size: |
| |
| inp_sizes[i] * tt_ranks[i] * tt_ranks[i + 1] * out_sizes[i]. |
| |
| The complexity of the computation is dictated by the sizes of inp_sizes, |
| out_sizes, and tt_ranks, where there is the trade off between accuracy of the |
| low-rank decomposition and the speed of the computation. |
| )DOC") |
| .Arg( |
| "inp_sizes", |
| "(int[]) Input sizes of cores. Indicates the input size of " |
| "the individual cores; the size of the input vector X must match the " |
| "product of the inp_sizes array.") |
| .Arg( |
| "out_sizes", |
| "(int[]) Output sizes of cores. Indicates the output size " |
| "of the individual cores; the size of the output vector Y must match " |
| "the product of the out_sizes array.") |
| .Arg( |
| "tt_ranks", |
| "(int[]) Ranks of cores. Indicates the ranks of the " |
| "individual cores; lower rank means larger compression, faster " |
| "computation but reduce accuracy.") |
| .Input( |
| 0, |
| "X", |
| "Input tensor from previous layer with size (M x K), where " |
| "M is the batch size and K is the input size.") |
| .Input(1, "b", "1D blob containing the bias vector") |
| .Input( |
| 2, |
| "cores", |
| "1D blob containing each individual cores with sizes " |
| "specified above.") |
| .Output( |
| 0, |
| "Y", |
| "Output tensor from previous layer with size (M x N), " |
| "where M is the batch size and N is the output size."); |
| |
| OPERATOR_SCHEMA(TTLinearGradient); |
| |
| GRADIENT_NOT_IMPLEMENTED_YET(TT); |
| } // namespace |
| } // namespace caffe2 |