| #ifndef CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ |
| #define CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ |
| |
| #include "caffe2/core/context.h" |
| #include "caffe2/core/operator.h" |
| #include "c10/util/irange.h" |
| |
| namespace caffe2 { |
| |
| template <class Context> |
| class MergeDenseFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeDenseFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids"); |
| } |
| ~MergeDenseFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(0)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| auto& dense_data = Input(0); |
| int numExamples = dense_data.size(0); |
| int numFeatures = dense_data.size(1); |
| |
| const bool* inPresenceData = Input(1).template data<bool>(); |
| int totalNumFeatures = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| for (const auto inputIndex : c10::irange(numFeatures)) { |
| if (inPresenceData[exampleIndex * numFeatures + inputIndex]) { |
| ++totalNumFeatures; |
| } |
| } |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| T* outValuesData = outValues->template mutable_data<T>(); |
| const T* inData = |
| Input(0).template data<T>(); |
| |
| int keysOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| auto offset = exampleIndex * numFeatures; |
| for (const auto inputIndex : c10::irange(numFeatures)) { |
| if (inPresenceData[offset]) { |
| ++outLengthsData[exampleIndex]; |
| outKeysData[keysOffset] = featureIDs_[inputIndex]; |
| outValuesData[keysOffset] = inData[offset]; |
| ++keysOffset; |
| } |
| offset++; |
| } |
| } |
| return true; |
| } |
| |
| private: |
| std::vector<int64_t> featureIDs_; |
| }; |
| |
| template <class Context> |
| class MergeSingleScalarFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeSingleScalarFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids"); |
| } |
| ~MergeSingleScalarFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(0)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>(); |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| if (inPresenceData[exampleIndex]) { |
| ++totalNumFeatures; |
| } |
| } |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| T* outValuesData = outValues->template mutable_data<T>(); |
| |
| int keysOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const T* inData = |
| Input(kNumTensorsPerInput * inputIndex).template data<T>(); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>(); |
| if (inPresenceData[exampleIndex]) { |
| ++outLengthsData[exampleIndex]; |
| outKeysData[keysOffset] = featureIDs_[inputIndex]; |
| outValuesData[keysOffset] = inData[exampleIndex]; |
| ++keysOffset; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 2; |
| int numInputs_; |
| std::vector<int64_t> featureIDs_; |
| }; |
| |
| template <class Context> |
| class MergeSingleScalarFeatureTensorsGradientOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeSingleScalarFeatureTensorsGradientOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numFeatureInputs_ = InputSize() - 1; // Everything other than values_grad |
| } |
| ~MergeSingleScalarFeatureTensorsGradientOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(InputSize() - 1)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| Output(inputIndex)->ResizeLike(Input(inputIndex)); |
| } |
| |
| const T* inValuesGradData = Input(InputSize() - 1).template data<T>(); |
| |
| T default_value = T(); |
| int valuesOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| const bool* inPresenceData = Input(inputIndex).template data<bool>(); |
| T* outFeatureData = Output(inputIndex)->template mutable_data<T>(); |
| if (inPresenceData[exampleIndex]) { |
| outFeatureData[exampleIndex] = inValuesGradData[valuesOffset]; |
| ++valuesOffset; |
| } else { |
| outFeatureData[exampleIndex] = default_value; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| int numFeatureInputs_; |
| }; |
| |
| template <class Context> |
| class MergeSingleListFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeSingleListFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| inValuesOffset_.resize(numInputs_); |
| featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids"); |
| } |
| ~MergeSingleListFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(1)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| int totalNumValues = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>(); |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| if (inPresenceData[exampleIndex]) { |
| ++totalNumFeatures; |
| totalNumValues += inLengthsData[exampleIndex]; |
| } |
| } |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValuesLengths = |
| Output(2, {totalNumFeatures}, at::dtype<int32_t>()); |
| auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| int32_t* outValuesLengthsData = |
| outValuesLengths->template mutable_data<int32_t>(); |
| T* outValuesValuesData = outValuesValues->template mutable_data<T>(); |
| |
| int keysOffset = 0; |
| int valuesOffset = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| inValuesOffset_[inputIndex] = 0; |
| } |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 1); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>(); |
| if (inPresenceData[exampleIndex]) { |
| ++outLengthsData[exampleIndex]; |
| outKeysData[keysOffset] = featureIDs_[inputIndex]; |
| outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex]; |
| context_.CopyItemsSameDevice( |
| inValues.dtype(), |
| inLengthsData[exampleIndex], |
| &inValues.template data<T>()[inValuesOffset_[inputIndex]], |
| &outValuesValuesData[valuesOffset]); |
| valuesOffset += inLengthsData[exampleIndex]; |
| inValuesOffset_[inputIndex] += inLengthsData[exampleIndex]; |
| ++keysOffset; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 3; |
| int numInputs_; |
| std::vector<int> inValuesOffset_; |
| std::vector<int64_t> featureIDs_; |
| }; |
| |
| template <class Context> |
| class MergeSingleListOrMapFeatureTensorsGradientOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeSingleListOrMapFeatureTensorsGradientOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; |
| } |
| ~MergeSingleListOrMapFeatureTensorsGradientOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(InputSize() - 1)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| std::vector<int> outValuesOffset(numFeatureInputs_); |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| int inputNumValues = 0; |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>(); |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| if (inPresenceData[exampleIndex]) { |
| inputNumValues += inLengthsData[exampleIndex]; |
| } |
| } |
| Output(inputIndex)->Resize(inputNumValues); |
| } |
| |
| const auto& inValuesValuesGrad = Input(InputSize() - 1); |
| const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>(); |
| |
| int inValuesValuesOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>(); |
| if (inPresenceData[exampleIndex]) { |
| T* outFeatureValues = Output(inputIndex)->template mutable_data<T>(); |
| context_.CopyItemsSameDevice( |
| inValuesValuesGrad.dtype(), |
| inLengthsData[exampleIndex], |
| &inValuesValuesGradData[inValuesValuesOffset], |
| &outFeatureValues[outValuesOffset[inputIndex]]); |
| outValuesOffset[inputIndex] += inLengthsData[exampleIndex]; |
| inValuesValuesOffset += inLengthsData[exampleIndex]; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 2; |
| int numFeatureInputs_; |
| }; |
| |
| template <class Context> |
| class MergeSingleMapFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeSingleMapFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| inValuesOffset_.resize(numInputs_); |
| featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids"); |
| } |
| ~MergeSingleMapFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(1)); |
| } |
| |
| template <typename K> |
| bool DoRunWithType() { |
| return DispatchHelper< |
| TensorTypes2<bool, int32_t, int64_t, float, double, std::string>, |
| K>::call(this, Input(2)); |
| } |
| |
| template <typename K, typename V> |
| bool DoRunWithType2() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| int totalNumValues = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>(); |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| if (inPresenceData[exampleIndex]) { |
| ++totalNumFeatures; |
| totalNumValues += inLengthsData[exampleIndex]; |
| } |
| } |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValuesLengths = |
| Output(2, {totalNumFeatures}, at::dtype<int32_t>()); |
| auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>()); |
| auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| int32_t* outValuesLengthsData = |
| outValuesLengths->template mutable_data<int32_t>(); |
| K* outValuesKeysData = outValuesKeys->template mutable_data<K>(); |
| V* outValuesValuesData = outValuesValues->template mutable_data<V>(); |
| |
| int keysOffset = 0; |
| int valuesOffset = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| inValuesOffset_[inputIndex] = 0; |
| } |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const auto& inKeys = Input(kNumTensorsPerInput * inputIndex + 1); |
| const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 2); |
| const bool* inPresenceData = |
| Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>(); |
| if (inPresenceData[exampleIndex]) { |
| ++outLengthsData[exampleIndex]; |
| outKeysData[keysOffset] = featureIDs_[inputIndex]; |
| outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex]; |
| context_.CopyItemsSameDevice( |
| inKeys.dtype(), |
| inLengthsData[exampleIndex], |
| &inKeys.template data<K>()[inValuesOffset_[inputIndex]], |
| &outValuesKeysData[valuesOffset]); |
| context_.CopyItemsSameDevice( |
| inValues.dtype(), |
| inLengthsData[exampleIndex], |
| &inValues.template data<V>()[inValuesOffset_[inputIndex]], |
| &outValuesValuesData[valuesOffset]); |
| valuesOffset += inLengthsData[exampleIndex]; |
| inValuesOffset_[inputIndex] += inLengthsData[exampleIndex]; |
| ++keysOffset; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 4; |
| int numInputs_; |
| std::vector<int> inValuesOffset_; |
| std::vector<int64_t> featureIDs_; |
| }; |
| |
| template <class Context> |
| class MergeMultiScalarFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeMultiScalarFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| inKeysOffset_.resize(numInputs_); |
| } |
| ~MergeMultiScalarFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(2)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| T* outValuesData = outValues->template mutable_data<T>(); |
| |
| int outKeysOffset = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| inKeysOffset_[inputIndex] = 0; |
| } |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| auto inputKeysBlobIdx = kNumTensorsPerInput * inputIndex + 1; |
| const int64_t* inKeysData = |
| Input(inputKeysBlobIdx).template data<int64_t>(); |
| const T* inValuesData = |
| Input(kNumTensorsPerInput * inputIndex + 2).template data<T>(); |
| outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; |
| for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; |
| ++featureIndex) { |
| CAFFE_ENFORCE_LT(outKeysOffset, totalNumFeatures); |
| CAFFE_ENFORCE_LT( |
| inKeysOffset_[inputIndex], Input(inputKeysBlobIdx).numel()); |
| outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; |
| outValuesData[outKeysOffset] = |
| inValuesData[inKeysOffset_[inputIndex]]; |
| ++outKeysOffset; |
| ++inKeysOffset_[inputIndex]; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 3; |
| int numInputs_; |
| std::vector<int> inKeysOffset_; |
| }; |
| |
| template <class Context> |
| class MergeMultiScalarFeatureTensorsGradientOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeMultiScalarFeatureTensorsGradientOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; |
| } |
| ~MergeMultiScalarFeatureTensorsGradientOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(InputSize() - 1)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| std::vector<int> outValuesOffset(numFeatureInputs_); |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| int inputNumValues = 0; |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| inputNumValues += inLengthsData[exampleIndex]; |
| } |
| Output(inputIndex)->Resize(inputNumValues); |
| } |
| |
| const auto& inValuesGrad = Input(InputSize() - 1); |
| const T* inValuesGradData = inValuesGrad.template data<T>(); |
| |
| int inValuesOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| if (inLengthsData[exampleIndex] > 0) { |
| T* outFeatureValues = Output(inputIndex)->template mutable_data<T>(); |
| context_.CopyItemsSameDevice( |
| inValuesGrad.dtype(), |
| inLengthsData[exampleIndex], |
| &inValuesGradData[inValuesOffset], |
| &outFeatureValues[outValuesOffset[inputIndex]]); |
| outValuesOffset[inputIndex] += inLengthsData[exampleIndex]; |
| inValuesOffset += inLengthsData[exampleIndex]; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private: |
| int kNumTensorsPerInput = 1; |
| int numFeatureInputs_; |
| }; |
| |
| template <class Context> |
| class MergeMultiListFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeMultiListFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| inKeysOffset_.resize(numInputs_); |
| inValuesValuesOffset_.resize(numInputs_); |
| } |
| ~MergeMultiListFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(3)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| int totalNumValues = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); |
| totalNumValues += Input(kNumTensorsPerInput * inputIndex + 3).numel(); |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValuesLengths = |
| Output(2, {totalNumFeatures}, at::dtype<int32_t>()); |
| auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| int32_t* outValuesLengthsData = |
| outValuesLengths->template mutable_data<int32_t>(); |
| T* outValuesValuesData = outValuesValues->template mutable_data<T>(); |
| |
| int outKeysOffset = 0; |
| int outValuesValuesOffset = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| inKeysOffset_[inputIndex] = 0; |
| inValuesValuesOffset_[inputIndex] = 0; |
| } |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1) |
| .template data<int64_t>(); |
| const int32_t* inValuesLengthsData = |
| Input(kNumTensorsPerInput * inputIndex + 2) |
| .template data<int32_t>(); |
| const auto& inValuesValues = |
| Input(kNumTensorsPerInput * inputIndex + 3); |
| outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; |
| for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; |
| ++featureIndex) { |
| outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; |
| outValuesLengthsData[outKeysOffset] = |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| context_.CopyItemsSameDevice( |
| inValuesValues.dtype(), |
| inValuesLengthsData[inKeysOffset_[inputIndex]], |
| &inValuesValues |
| .template data<T>()[inValuesValuesOffset_[inputIndex]], |
| &outValuesValuesData[outValuesValuesOffset]); |
| outValuesValuesOffset += |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| inValuesValuesOffset_[inputIndex] += |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| ++outKeysOffset; |
| ++inKeysOffset_[inputIndex]; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 4; |
| int numInputs_; |
| std::vector<int> inKeysOffset_; |
| std::vector<int> inValuesValuesOffset_; |
| }; |
| |
| template <class Context> |
| class MergeMultiMapFeatureTensorsOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeMultiMapFeatureTensorsOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numInputs_ = InputSize() / kNumTensorsPerInput; |
| inKeysOffset_.resize(numInputs_); |
| inValuesValuesOffset_.resize(numInputs_); |
| } |
| ~MergeMultiMapFeatureTensorsOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(3)); |
| } |
| |
| template <typename K> |
| bool DoRunWithType() { |
| return DispatchHelper< |
| TensorTypes2<bool, int32_t, int64_t, float, double, std::string>, |
| K>::call(this, Input(4)); |
| } |
| |
| template <typename K, typename V> |
| bool DoRunWithType2() { |
| int numExamples = Input(0).numel(); |
| int totalNumFeatures = 0; |
| int totalNumValues = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); |
| totalNumValues += Input(kNumTensorsPerInput * inputIndex + 4).numel(); |
| } |
| |
| auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>()); |
| auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>()); |
| auto* outValuesLengths = |
| Output(2, {totalNumFeatures}, at::dtype<int32_t>()); |
| auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>()); |
| auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>()); |
| |
| int32_t* outLengthsData = outLengths->template mutable_data<int32_t>(); |
| int64_t* outKeysData = outKeys->template mutable_data<int64_t>(); |
| int32_t* outValuesLengthsData = |
| outValuesLengths->template mutable_data<int32_t>(); |
| K* outValuesKeysData = outValuesKeys->template mutable_data<K>(); |
| V* outValuesValuesData = outValuesValues->template mutable_data<V>(); |
| |
| int outKeysOffset = 0; |
| int outValuesValuesOffset = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| inKeysOffset_[inputIndex] = 0; |
| inValuesValuesOffset_[inputIndex] = 0; |
| } |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| outLengthsData[exampleIndex] = 0; |
| for (const auto inputIndex : c10::irange(numInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1) |
| .template data<int64_t>(); |
| const int32_t* inValuesLengthsData = |
| Input(kNumTensorsPerInput * inputIndex + 2) |
| .template data<int32_t>(); |
| const auto& inValuesKeys = Input(kNumTensorsPerInput * inputIndex + 3); |
| const auto& inValuesValues = |
| Input(kNumTensorsPerInput * inputIndex + 4); |
| outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; |
| for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; |
| ++featureIndex) { |
| outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; |
| outValuesLengthsData[outKeysOffset] = |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| context_.CopyItemsSameDevice( |
| inValuesKeys.dtype(), |
| inValuesLengthsData[inKeysOffset_[inputIndex]], |
| &inValuesKeys |
| .template data<K>()[inValuesValuesOffset_[inputIndex]], |
| &outValuesKeysData[outValuesValuesOffset]); |
| context_.CopyItemsSameDevice( |
| inValuesValues.dtype(), |
| inValuesLengthsData[inKeysOffset_[inputIndex]], |
| &inValuesValues |
| .template data<V>()[inValuesValuesOffset_[inputIndex]], |
| &outValuesValuesData[outValuesValuesOffset]); |
| outValuesValuesOffset += |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| inValuesValuesOffset_[inputIndex] += |
| inValuesLengthsData[inKeysOffset_[inputIndex]]; |
| ++outKeysOffset; |
| ++inKeysOffset_[inputIndex]; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| private: |
| const int kNumTensorsPerInput = 5; |
| int numInputs_; |
| std::vector<int> inKeysOffset_; |
| std::vector<int> inValuesValuesOffset_; |
| }; |
| |
| template <class Context> |
| class MergeMultiListOrMapFeatureTensorsGradientOp : public Operator<Context> { |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| |
| template <class... Args> |
| explicit MergeMultiListOrMapFeatureTensorsGradientOp(Args&&... args) |
| : Operator<Context>(std::forward<Args>(args)...) { |
| numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; |
| } |
| ~MergeMultiListOrMapFeatureTensorsGradientOp() noexcept override {} |
| |
| bool RunOnDevice() override { |
| return DispatchHelper< |
| TensorTypes<bool, int32_t, int64_t, float, double, std::string>>:: |
| call(this, Input(InputSize() - 1)); |
| } |
| |
| template <typename T> |
| bool DoRunWithType() { |
| int numExamples = Input(0).numel(); |
| std::vector<int> outValuesLengthOffset(numFeatureInputs_); |
| std::vector<int> outValuesValuesOffset(numFeatureInputs_); |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| int inputNumValues = 0; |
| auto& inValuesLength = Input(kNumTensorsPerInput * inputIndex + 1); |
| const int32_t* inValuesLengthsData = |
| inValuesLength.template data<int32_t>(); |
| for (const auto valuesIndex : c10::irange(inValuesLength.numel())) { |
| inputNumValues += inValuesLengthsData[valuesIndex]; |
| } |
| Output(inputIndex)->Resize(inputNumValues); |
| } |
| |
| const auto& inValuesValuesGrad = Input(InputSize() - 1); |
| const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>(); |
| |
| int inValuesValuesOffset = 0; |
| for (const auto exampleIndex : c10::irange(numExamples)) { |
| for (const auto inputIndex : c10::irange(numFeatureInputs_)) { |
| const int32_t* inLengthsData = |
| Input(kNumTensorsPerInput * inputIndex).template data<int32_t>(); |
| const int32_t* inValuesLengthsData = |
| Input(kNumTensorsPerInput * inputIndex + 1) |
| .template data<int32_t>(); |
| int valuesLengthCopy = 0; |
| for (int valuesLengthIndex = 0; |
| valuesLengthIndex < inLengthsData[exampleIndex]; |
| ++valuesLengthIndex) { |
| valuesLengthCopy += inValuesLengthsData |
| [outValuesLengthOffset[inputIndex] + valuesLengthIndex]; |
| } |
| if (valuesLengthCopy > 0) { |
| T* outFeatureValues = Output(inputIndex)->template mutable_data<T>(); |
| context_.CopyItemsSameDevice( |
| inValuesValuesGrad.dtype(), |
| valuesLengthCopy, |
| &inValuesValuesGradData[inValuesValuesOffset], |
| &outFeatureValues[outValuesValuesOffset[inputIndex]]); |
| } |
| outValuesLengthOffset[inputIndex] += inLengthsData[exampleIndex]; |
| outValuesValuesOffset[inputIndex] += valuesLengthCopy; |
| inValuesValuesOffset += valuesLengthCopy; |
| } |
| } |
| return true; |
| } |
| |
| private: |
| int kNumTensorsPerInput = 2; |
| int numFeatureInputs_; |
| }; |
| |
| } // namespace caffe2 |
| |
| #endif // CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ |