| #include "caffe2/operators/sequence_ops.h" |
| #include "caffe2/core/operator.h" |
| #include "caffe2/core/tensor.h" |
| |
| namespace caffe2 { |
| |
| vector<TensorShape> TensorInferenceForAddPadding( |
| const OperatorDef& def, |
| const vector<TensorShape>& in) { |
| ArgumentHelper helper(def); |
| const int padding_width = helper.GetSingleArgument<int>("padding_width", 1); |
| const int end_padding_width = helper.GetSingleArgument<int>("end_padding_width", padding_width); |
| CAFFE_ENFORCE_GT(in.size(), 0); |
| CAFFE_ENFORCE_GE(in[0].dims_size(), 1); |
| if (in.size() > 1) { |
| CAFFE_ENFORCE_EQ(in[1].dims_size(), 1); |
| } |
| |
| const auto num_paddings = (in.size() == 1 ? 1 : in[1].dims(0)); |
| vector<int> out_shape(in[0].dims().begin(), in[0].dims().end()); |
| out_shape[0] += (padding_width + end_padding_width) * num_paddings; |
| |
| if (def.output_size() == 1) { |
| return vector<TensorShape>{CreateTensorShape(out_shape, in[0].data_type())}; |
| } else { |
| return vector<TensorShape>{ |
| CreateTensorShape(out_shape, in[0].data_type()), |
| CreateTensorShape(vector<int>(1, num_paddings), TensorProto::INT32)}; |
| } |
| } |
| |
| template <> |
| template <typename T> |
| void GatherPaddingOp<CPUContext>::GatherPadding( |
| const int outer_size, |
| const int lengths_size, |
| const int block_size, |
| const int pad_width, |
| const T* in_ptr, |
| const int* lengths_ptr, |
| T* padding_start_ptr, |
| T* padding_end_ptr) { |
| CAFFE_ENFORCE( |
| (!std::is_same<bool, T>::value), |
| "GatherPadding should not be executed on an input of type bool, as " |
| "addition is not properly defined with booleans."); |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check total length consistency |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| // accumulate start paddings |
| for (int j = 0; j < startPaddingWidth_; ++j) { |
| for (int k = 0; k < block_size; ++k) { |
| // Note: MSVC warns about unsafe use of type bool in operation. |
| // This is now guarded by a CAFFE_ENFORCE so we can suppress it. |
| #pragma warning(suppress: 4804) |
| padding_start_ptr[k] += in_ptr[k]; |
| } |
| in_ptr += block_size; |
| } |
| in_ptr += block_size * (length - pad_width); |
| // accumulate end paddings |
| for (int j = 0; j < endPaddingWidth_; ++j) { |
| for (int k = 0; k < block_size; ++k) { |
| #pragma warning(suppress: 4804) |
| padding_end_ptr[k] += in_ptr[k]; |
| } |
| in_ptr += block_size; |
| } |
| } |
| } |
| |
| template <> |
| template <typename T> |
| bool RemovePaddingOp<CPUContext>::DoRunWithType() { |
| const auto& in = Input(0); |
| CAFFE_ENFORCE_GE(in.dim(), 1); |
| const int32_t outer_size = in.sizes()[0]; |
| const auto block_size = std::accumulate( |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| in.sizes().begin() + 1, in.sizes().end(), 1, std::multiplies<int64_t>()); |
| const auto pad_width = startPaddingWidth_ + endPaddingWidth_; |
| |
| // if no lengths is provided, assume it is a single full-span entry |
| const int32_t* lengths_ptr = &outer_size; |
| int64_t lengths_size = 1; |
| if (InputSize() > 1) { |
| const auto& lengths = Input(1); |
| lengths_ptr = lengths.data<int32_t>(); |
| lengths_size = lengths.numel(); |
| } |
| |
| auto out_dims = in.sizes().vec(); |
| out_dims[0] -= pad_width * lengths_size; |
| auto* out = Output(0, std::move(out_dims), at::dtype<T>()); |
| |
| const auto* in_ptr = in.template data<T>(); |
| auto* out_ptr = out->template mutable_data<T>(); |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check that total length is consistent |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| std::copy( |
| in_ptr + block_size * startPaddingWidth_, |
| in_ptr + block_size * (length - endPaddingWidth_), |
| out_ptr); |
| in_ptr += block_size * length; |
| out_ptr += block_size * (length - pad_width); |
| } |
| if (OutputSize() == 1) { |
| return true; |
| } |
| |
| auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>()); |
| std::transform( |
| lengths_ptr, |
| lengths_ptr + lengths_size, |
| lengths_out->template mutable_data<int32_t>(), |
| [pad_width](int32_t x) { return x - pad_width; }); |
| return true; |
| } |
| |
| template <> |
| template <typename T> |
| bool AddPaddingOp<CPUContext>::MakePadding( |
| const T* in_ptr, |
| T* out_ptr, |
| const int32_t* lengths_ptr, |
| int32_t lengths_size, |
| int32_t outer_size, |
| const T* padding_start_ptr, |
| const T* padding_end_ptr, |
| int64_t block_size) { |
| if (!lengths_ptr) { |
| lengths_ptr = &outer_size; |
| } |
| |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check that total length is consistent |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| // copy padding before |
| if (!padding_start_ptr) { |
| memset(out_ptr, 0, block_size * startPaddingWidth_ * sizeof(T)); |
| out_ptr += block_size * startPaddingWidth_; |
| } else { |
| for (int j = 0; j < startPaddingWidth_; ++j) { |
| std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr); |
| out_ptr += block_size; |
| } |
| } |
| // copy payload |
| const auto num_elems = block_size * length; |
| std::copy(in_ptr, in_ptr + num_elems, out_ptr); |
| in_ptr += num_elems; |
| out_ptr += num_elems; |
| // copy padding after |
| if (!padding_end_ptr) { |
| memset(out_ptr, 0, block_size * endPaddingWidth_ * sizeof(T)); |
| out_ptr += block_size * endPaddingWidth_; |
| } else { |
| for (int j = 0; j < endPaddingWidth_; ++j) { |
| std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr); |
| out_ptr += block_size; |
| } |
| } |
| } |
| if (OutputSize() == 1) { |
| return true; |
| } |
| |
| auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>()); |
| const auto pad_width = startPaddingWidth_ + endPaddingWidth_; |
| std::transform( |
| lengths_ptr, |
| lengths_ptr + lengths_size, |
| lengths_out->template mutable_data<int32_t>(), |
| [pad_width](int32_t x) { return x + pad_width; }); |
| return true; |
| } |
| |
| template <> |
| bool PadEmptySamplesOp<CPUContext>::RunOnDevice() { |
| auto& lengths = Input(0); |
| auto* lengthsPtr = lengths.template data<int32_t>(); |
| CAFFE_ENFORCE(lengths.dim() == 1, "LENGTH should be 1-D"); |
| CAFFE_ENFORCE(InputSize() >= 1, "Input size must be no less than 1"); |
| |
| int needPadding = 0; |
| int sumLen = 0; |
| for (int i = 0; i < lengths.numel(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| needPadding++; |
| } |
| sumLen += lengthsPtr[i]; |
| } |
| |
| auto* out_lengths = Output(0, {lengths.numel()}, at::dtype<int32_t>()); |
| auto* outLengthsPtr = out_lengths->template mutable_data<int32_t>(); |
| for (int i = 0; i < lengths.numel(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| outLengthsPtr[i] = 1; |
| } else { |
| outLengthsPtr[i] = lengthsPtr[i]; |
| } |
| } |
| |
| for (int k = 0; k < InputSize() - 1; k++) { |
| auto& features = Input(1 + k); |
| CAFFE_ENFORCE(features.dim() >= 1, "FEATURE should at least 1-D"); |
| CAFFE_ENFORCE( |
| features.size(0) == sumLen, "FEATURE and LENGTH should be consistent"); |
| const auto block_size = features.size_from_dim(1); |
| |
| auto* out_features = Output(1 + k); |
| auto outDim = features.sizes().vec(); |
| outDim.at(0) += needPadding; |
| out_features->Resize(outDim); |
| auto dst = |
| static_cast<char*>(out_features->raw_mutable_data(features.dtype())); |
| auto src_base = static_cast<const char*>(features.raw_data()); |
| // copy data and add padding index as zero |
| Tensor zero{CPU}; |
| zero.Resize(block_size); |
| auto zeroPtr = static_cast<char*>(zero.raw_mutable_data(features.dtype())); |
| // TODO Handle other composite types, such as vector<...> |
| if (!features.dtype().Match<std::string>()) { |
| memset(zeroPtr, 0, zero.nbytes()); |
| } |
| int start_dest = 0; |
| int start_src = 0; |
| for (int i = 0; i < lengths.numel(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| context_.CopyItemsSameDevice( |
| features.dtype(), |
| block_size, |
| zeroPtr, |
| dst + start_dest * features.dtype().itemsize()); |
| start_dest += block_size; |
| } else { |
| auto src = src_base + start_src * features.dtype().itemsize(); |
| context_.CopyItemsSameDevice( |
| features.dtype(), |
| lengthsPtr[i] * block_size, |
| src, |
| dst + start_dest * features.dtype().itemsize()); |
| // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) |
| start_src += lengthsPtr[i] * block_size; |
| // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) |
| start_dest += lengthsPtr[i] * block_size; |
| } |
| } |
| } |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp<CPUContext>); |
| |
| struct GetAddPaddingGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| // whether to provide lengths as input to gradient |
| vector<std::string> g_inputs{GO(0)}; |
| if (Def().input_size() > 1) { |
| CAFFE_ENFORCE(Def().output_size() > 1); |
| g_inputs.push_back(O(1)); |
| } |
| |
| vector<OperatorDef> ops; |
| // gradient on the data |
| ops.push_back(CreateOperatorDef( |
| "RemovePadding", "", g_inputs, vector<string>{GI(0)})); |
| // gradient on the start_padding (and end_padding) |
| if (Def().input_size() >= 3) { |
| std::vector<string> padding_grads{GI(2)}; |
| if (Def().input_size() == 4) { |
| padding_grads.push_back(GI(3)); |
| } |
| // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) |
| auto g_inputs2 = g_inputs; |
| ops.push_back( |
| CreateOperatorDef("GatherPadding", "", g_inputs2, padding_grads)); |
| } |
| return ops; |
| } |
| }; |
| REGISTER_GRADIENT(AddPadding, GetAddPaddingGradient); |
| |
| struct GetRemovePaddingGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| // whether to provide lengths as input to gradient |
| vector<std::string> g_inputs{GO(0)}; |
| if (Def().input_size() > 1) { |
| CAFFE_ENFORCE(Def().output_size() > 1); |
| g_inputs.push_back(O(1)); |
| } |
| |
| return SingleGradientDef("AddPadding", "", g_inputs, vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(RemovePadding, GetRemovePaddingGradient); |
| |
| OPERATOR_SCHEMA(AddPadding) |
| .NumInputs(1, 4) |
| .NumOutputs(1, 2) |
| .TensorInferenceFunction( |
| OpSchema::NeedsAllInputShapes(TensorInferenceForAddPadding)) |
| .SetDoc(R"DOC( |
| Given a partitioned tensor $T<N, D_1, ..., D_n>$, where the partitions are |
| defined as ranges on its outer-most (slowest varying) dimension $N$, |
| return a tensor $T<(N + 2 * padding\_width), D_1, ..., D_n>$ with paddings |
| added to the start and end of each range. |
| |
| Optionally, different paddings can be provided for beginning and end. |
| Paddings provided must be a tensor $T<D_1, ..., D_n>$. If no padding is |
| provided, add zero padding. If no lengths vector is provided, add padding |
| only once, at the start and end of data. |
| |
| Github Links: |
| |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| workspace.ResetWorkspace() |
| |
| op = core.CreateOperator( |
| "AddPadding", |
| ["X", "lengths"], |
| ["Y", "lengths_out"], |
| padding_width=1 |
| |
| ) |
| |
| workspace.FeedBlob("X", (np.random.rand(3,2,2).astype(np.float32))) |
| workspace.FeedBlob("lengths", np.array([3]).astype(np.int32)) |
| |
| print("X:", workspace.FetchBlob("X")) |
| workspace.RunOperatorOnce(op) |
| print("Y:", workspace.FetchBlob("Y")) |
| print("lengths_out:", workspace.FetchBlob("lengths_out")) |
| ``` |
| |
| **Result** |
| |
| ``` |
| X: [[[0.2531572 0.4588472 ] |
| [0.45140603 0.61161053]] |
| |
| [[0.92500854 0.8045306 ] |
| [0.03356671 0.30233648]] |
| |
| [[0.4660227 0.6287745 ] |
| [0.79372746 0.08609265]]] |
| Y: [[[0. 0. ] |
| [0. 0. ]] |
| |
| [[0.2531572 0.4588472 ] |
| [0.45140603 0.61161053]] |
| |
| [[0.92500854 0.8045306 ] |
| [0.03356671 0.30233648]] |
| |
| [[0.4660227 0.6287745 ] |
| [0.79372746 0.08609265]] |
| |
| [[0. 0. ] |
| [0. 0. ]]] |
| lengths_out: [5] |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Arg( |
| "padding_width", |
| "*(type: int)* Number of copies of padding to add around each range.") |
| .Arg( |
| "end_padding_width", |
| "*(type: int)* [OPTIONAL] Specifies a different end-padding width. If " |
| "this is not set, will use same as `padding_width`.") |
| .Input( |
| 0, |
| "data_in", |
| "*(type: Tensor)* Input data ($T<N, D_1, ..., D_n>$).") |
| .Input( |
| 1, |
| "lengths", |
| "*(type: Tensor`<int>`)* Number of elements in each range. " |
| "sum(lengths) = N.") |
| .Input( |
| 2, |
| "start_padding", |
| "*(type: Tensor`<int>`)* [OPTIONAL] Padding data for range start " |
| "($T<D_1, ..., D_n>$).") |
| .Input( |
| 3, |
| "end_padding", |
| "*(type: Tensor`<int>`)* [OPTIONAL] Padding for range end. If not " |
| "provided, `start_padding` is used ($T<D_1, ..., D_n>$).") |
| .Output( |
| 0, |
| "data_out", |
| "*(type: Tensor)* Padded data tensor ($T<N + 2*padding_width, " |
| "D_1, ..., D_n>$).") |
| .Output( |
| 1, |
| "lengths_out", |
| "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range."); |
| |
| OPERATOR_SCHEMA(RemovePadding) |
| .NumInputs(1, 2) |
| .NumOutputs(1, 2) |
| .SetDoc(R"DOC( |
| Remove padding around the edges of each segment of the input data. This is the |
| reverse operation of **AddPadding**, and uses the same arguments and conventions |
| for input and output data format. |
| |
| Github Links: |
| |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| workspace.ResetWorkspace() |
| |
| addpad_op = core.CreateOperator( |
| "AddPadding", |
| ["X", "lengths_add"], |
| ["Y", "lengths_out_add"], |
| padding_width=1 |
| ) |
| |
| rmpad_op = core.CreateOperator( |
| "RemovePadding", |
| ["Y", "lengths_rm"], |
| ["Z", "lengths_out_rm"], |
| padding_width=1 |
| ) |
| |
| workspace.FeedBlob("X", (np.random.randint(20, size=(3,5)))) |
| workspace.FeedBlob("lengths_add", np.array([3]).astype(np.int32)) |
| workspace.FeedBlob("lengths_rm", np.array([5]).astype(np.int32)) |
| |
| print("X:", workspace.FetchBlob("X")) |
| workspace.RunOperatorOnce(addpad_op) |
| print("Y:", workspace.FetchBlob("Y")) |
| print("lengths_out_add:", workspace.FetchBlob("lengths_out_add")) |
| |
| workspace.RunOperatorOnce(rmpad_op) |
| print("Z:", workspace.FetchBlob("Z")) |
| print("lengths_out_rm:", workspace.FetchBlob("lengths_out_rm")) |
| ``` |
| |
| **Result** |
| |
| ``` |
| X: [[17 19 1 9 1] |
| [19 3 5 19 1] |
| [16 0 0 0 4]] |
| Y: [[ 0 0 0 0 0] |
| [17 19 1 9 1] |
| [19 3 5 19 1] |
| [16 0 0 0 4] |
| [ 0 0 0 0 0]] |
| lengths_out_add: [5] |
| Z: [[17 19 1 9 1] |
| [19 3 5 19 1] |
| [16 0 0 0 4]] |
| lengths_out_rm: [3] |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Arg( |
| "padding_width", |
| "*(type: int)* Outer-size of padding to remove around each range.") |
| .Arg( |
| "end_padding_width", |
| "*(type: int)* [OPTIONAL] Specifies a different end-padding width. " |
| "If this is not set, will use same as `padding_width`.") |
| .Input( |
| 0, |
| "data_in", |
| "Input tensor ($T<N, D_1, ..., D_n>$).") |
| .Input( |
| 1, |
| "lengths", |
| "*(type: Tensor`<int>`)* Number of elements in each range. " |
| "sum(lengths) = N. If not provided, considers all data as a single " |
| "segment.") |
| .Output( |
| 0, |
| "data_out", |
| "*(type: Tensor)* Padded data tensor " |
| "($T<N + 2*padding_width, D_1, ..., D_n>$).") |
| .Output( |
| 1, |
| "lengths_out", |
| "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range."); |
| |
| OPERATOR_SCHEMA(GatherPadding) |
| .NumInputs(2) |
| .NumOutputs(1, 2) |
| .SetDoc(R"DOC( |
| Gather the sum of start and end paddings in a padded input sequence. Used in |
| order to compute the gradients of AddPadding w.r.t the padding tensors. |
| )DOC") |
| .Arg("padding_width", "Outer-size of padding present around each range.") |
| .Arg( |
| "end_padding_width", |
| "(Optional) Specifies a different end-padding width.") |
| .Input(0, "data_in", "T<N, D1..., Dn> Padded input data") |
| .Input( |
| 1, |
| "lengths", |
| "(i64) Num of elements in each range. sum(lengths) = N. " |
| "If not provided, considers all data as a single segment.") |
| .Output( |
| 0, |
| "padding_sum", |
| "Sum of all start paddings, or of all " |
| "paddings if end_padding_sum is not provided.") |
| .Output( |
| 1, |
| "end_padding_sum", |
| "T<D1..., Dn> Sum of all end paddings, if provided."); |
| |
| OPERATOR_SCHEMA(PadEmptySamples) |
| .NumInputs(1, INT_MAX) |
| .NumOutputs(1, INT_MAX) |
| .SetDoc(R"DOC( |
| Pad empty field given lengths and index features, |
| |
| Input(0) is a blob pointing to the lengths of samples in one batch, |
| [Input(1),... Input(num_fields)] a list of tensors containing the data for |
| each field of the features. |
| |
| PadEmptySamples is thread safe. |
| )DOC") |
| .Input(0, "lengths", "A blob containing a pointer to the lengths.") |
| .Output( |
| 0, |
| "out_lengths", |
| "Tensor containing lengths with empty sample padded."); |
| |
| } // namespace caffe2 |