| #include "caffe2/operators/copy_op.h" |
| |
| namespace caffe2 { |
| |
| // From CPU, copy it to whatever the current context |
| REGISTER_CPU_OPERATOR( |
| CopyFromCPUInput, |
| CopyOp<CPUContext, CPUContext, CPUContext>); |
| REGISTER_CPU_OPERATOR( |
| CopyOnDeviceLike, |
| CopyOnDeviceLikeOp<CPUContext, CPUContext, CPUContext>); |
| REGISTER_CPU_OPERATOR(Copy, CopyOp<CPUContext, CPUContext, CPUContext>); |
| |
| OPERATOR_SCHEMA(Copy) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .IdenticalTypeAndShape() |
| .InputsCanCrossDevices() |
| .InheritOnnxSchema("Identity") |
| .SetDoc(R"DOC( |
| Copy input tensor into output, potentially across devices. |
| |
| Github Links: |
| |
| - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/copy_op.cc |
| - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/copy_op.h |
| |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| |
| workspace.ResetWorkspace() |
| |
| op = core.CreateOperator( |
| "Copy", |
| ["input"], |
| ["output"] |
| ) |
| |
| workspace.FeedBlob("input", np.random.rand(3,3)) |
| print("input:", workspace.FetchBlob("input")) |
| workspace.RunOperatorOnce(op) |
| print("output:", workspace.FetchBlob("output")) |
| |
| ``` |
| |
| **Result** |
| |
| ``` |
| |
| input: |
| [[0.16826761 0.68168217 0.55196001] |
| [0.19735483 0.34837823 0.69015595] |
| [0.09448514 0.57390828 0.37097193]] |
| output: |
| [[0.16826761 0.68168217 0.55196001] |
| [0.19735483 0.34837823 0.69015595] |
| [0.09448514 0.57390828 0.37097193]] |
| |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Input(0, "input", "(*Tensor*): input tensor to copy") |
| .Output(0, "output", "(*Tensor*): copy of input tensor"); |
| |
| OPERATOR_SCHEMA(CopyGPUToCPU) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .IdenticalTypeAndShape() |
| .InputsCanCrossDevices() |
| .DeviceInferenceFunction([](const OperatorDef& def) { |
| CAFFE_ENFORCE( |
| def.has_device_option(), |
| "CopyGPUToCPU op should have cuda device option."); |
| auto& cuda_option = def.device_option(); |
| auto cpu_option = DeviceOption(); |
| vector<DeviceOption> in_dev(def.input_size(), cuda_option); |
| vector<DeviceOption> out_dev(def.output_size(), cpu_option); |
| return std::make_pair(in_dev, out_dev); |
| }) |
| .SetDoc(R"DOC( |
| Copy tensor for GPU to CPU context. Must be run under GPU device option. |
| )DOC") |
| .Input(0, "input", "The input tensor.") |
| .Output(0, "output", "Tensor that will contain a copy of the input."); |
| |
| OPERATOR_SCHEMA(CopyCPUToGPU) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .IdenticalTypeAndShape() |
| .InputsCanCrossDevices() |
| .DeviceInferenceFunction([](const OperatorDef& def) { |
| CAFFE_ENFORCE( |
| def.has_device_option(), |
| "CopyCPUToGPU op should have cuda device option."); |
| auto& cuda_option = def.device_option(); |
| auto cpu_option = DeviceOption(); |
| vector<DeviceOption> in_dev(def.input_size(), cpu_option); |
| vector<DeviceOption> out_dev(def.output_size(), cuda_option); |
| return std::make_pair(in_dev, out_dev); |
| }) |
| .SetDoc(R"DOC( |
| Copy tensor for CPU to GPU context. Must be run under GPU device option. |
| )DOC") |
| .Input(0, "input", "The input tensor.") |
| .Output(0, "output", "Tensor that will contain a copy of the input."); |
| |
| OPERATOR_SCHEMA(CopyFromCPUInput) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .IdenticalTypeAndShape() |
| .InputsCanCrossDevices() |
| .DeviceInferenceFunction([](const OperatorDef& def) { |
| auto op_device = |
| def.has_device_option() ? def.device_option() : DeviceOption(); |
| auto cpu_option = DeviceOption(); |
| vector<DeviceOption> in_dev(def.input_size(), cpu_option); |
| vector<DeviceOption> out_dev(def.output_size(), op_device); |
| return std::make_pair(in_dev, out_dev); |
| }) |
| .SetDoc(R"DOC( |
| Take a CPU input tensor and copy it to an output in the current |
| Context (GPU or CPU). This may involves cross-device MemCpy. |
| )DOC") |
| .Input(0, "input", "The input CPU tensor.") |
| .Output(0, "output", "either a TensorCUDA or a TensorCPU"); |
| |
| OPERATOR_SCHEMA(CopyOnDeviceLike) |
| .NumInputs(2) |
| .NumOutputs(1) |
| .SetDoc("Copy input tensor into output to the specific device.") |
| .Input(0, "input", "The input tensor.") |
| .Input(1, "dst", "Tensor, on which device the copy will be performed.") |
| .Output(0, "output", "Tensor that will contain a copy of the input."); |
| |
| struct GetCopyGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| "CopyOnDeviceLike", |
| "", |
| vector<string>{GO(0), I(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(Copy, GetCopyGradient); |
| |
| struct GetGPUToCPUGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| if (g_output_[0].IsDense()) { |
| return SingleGradientDef( |
| "CopyCPUToGPU", "", vector<string>{GO(0)}, vector<string>{GI(0)}); |
| } else { |
| return vector<OperatorDef>{CreateOperatorDef( |
| "CopyCPUToGPU", |
| "", |
| std::vector<string>{GO_I(0)}, |
| std::vector<string>{GI_I(0)}), |
| CreateOperatorDef( |
| "CopyCPUToGPU", |
| "", |
| std::vector<string>{GO_V(0)}, |
| std::vector<string>{GI_V(0)})}; |
| } |
| } |
| }; |
| REGISTER_GRADIENT(CopyGPUToCPU, GetGPUToCPUGradient); |
| |
| struct GetCPUToGPUGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| if (g_output_[0].IsDense()) { |
| return SingleGradientDef( |
| "CopyGPUToCPU", "", vector<string>{GO(0)}, vector<string>{GI(0)}); |
| } else { |
| return vector<OperatorDef>{CreateOperatorDef( |
| "CopyGPUToCPU", |
| "", |
| std::vector<string>{GO_I(0)}, |
| std::vector<string>{GI_I(0)}), |
| CreateOperatorDef( |
| "CopyGPUToCPU", |
| "", |
| std::vector<string>{GO_V(0)}, |
| std::vector<string>{GI_V(0)})}; |
| } |
| } |
| }; |
| REGISTER_GRADIENT(CopyCPUToGPU, GetCPUToGPUGradient); |
| |
| } // namespace caffe2 |
| |
| C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( |
| CopyGPUToCPU, |
| "_caffe2::CopyGPUToCPU(Tensor input) -> Tensor", |
| /*optional_alias_analysis_kind=*/c10::nullopt); |
| |
| C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( |
| CopyCPUToGPU, |
| "_caffe2::CopyCPUToGPU(Tensor input) -> Tensor", |
| /*optional_alias_analysis_kind=*/c10::nullopt); |