| #include "caffe2/operators/swish_op.h" |
| |
| #include <string> |
| #include <vector> |
| |
| #include "caffe2/core/types.h" |
| #include "caffe2/utils/eigen_utils.h" |
| #include "caffe2/utils/math.h" |
| |
| namespace caffe2 { |
| |
| template <> |
| template <typename T> |
| bool SwishFunctor<CPUContext>:: |
| operator()(const int N, const T* X, T* Y, CPUContext* /* context */) const { |
| ConstEigenVectorArrayMap<T> X_arr(X, N); |
| EigenVectorArrayMap<T>(Y, N) = X_arr / (T(1) + (-X_arr).exp()); |
| return true; |
| } |
| |
| template <> |
| template <typename T> |
| bool SwishGradientOp<CPUContext>::DoRunWithType() { |
| auto& Xin = Input(X); |
| auto& Yin = Input(Y); |
| auto& DYin = Input(DY); |
| |
| CAFFE_ENFORCE_EQ(Xin.numel(), Yin.numel()); |
| CAFFE_ENFORCE_EQ(DYin.numel(), Yin.numel()); |
| auto* DXout = Output(DX, Yin.sizes(), at::dtype<float>()); |
| |
| const float* Xdata = Xin.template data<float>(); |
| const float* Ydata = Yin.template data<float>(); |
| const float* dYdata = DYin.template data<float>(); |
| float* dXdata = DXout->template mutable_data<float>(); |
| |
| EigenVectorArrayMap<float> dXvec(dXdata, DXout->numel()); |
| ConstEigenVectorArrayMap<float> Xvec(Xdata, Xin.numel()); |
| ConstEigenVectorArrayMap<float> Yvec(Ydata, Yin.numel()); |
| ConstEigenVectorArrayMap<float> dYvec(dYdata, DYin.numel()); |
| |
| // dx = dy * (y + sigmoid(x)*(1-y)) |
| dXvec = dYvec * (Yvec + (T(1) / (T(1) + (-Xvec).exp())) * (T(1) - Yvec)); |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR( |
| Swish, |
| UnaryElementwiseOp< |
| TensorTypes<float>, |
| CPUContext, |
| SwishFunctor<CPUContext>>); |
| REGISTER_CPU_OPERATOR(SwishGradient, SwishGradientOp<CPUContext>); |
| |
| // Input: X, output: Y |
| OPERATOR_SCHEMA(Swish) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .IdenticalTypeAndShape() |
| .SetDoc(R"DOC( |
| Swish takes one input data (Tensor) and produces one output data |
| (Tensor) where the swish function, y = x / (1 + exp(-x)), is applied to the |
| tensor elementwise. |
| )DOC") |
| .Input(0, "X", "1D input tensor") |
| .Output(0, "Y", "1D output tensor"); |
| // Input: X, Y, dY, output: dX |
| OPERATOR_SCHEMA(SwishGradient) |
| .NumInputs(3) |
| .NumOutputs(1) |
| .AllowInplace({{2, 0}}) |
| .SetDoc(R"DOC( |
| SwishGradient takes X, Y and dY and uses this to update dX according to the |
| chain rule and derivatives of the swish function. |
| )DOC"); |
| |
| namespace { |
| |
| class GetSwishGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| std::vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| "SwishGradient", |
| "", |
| std::vector<std::string>{I(0), O(0), GO(0)}, |
| std::vector<std::string>{GI(0)}); |
| } |
| }; |
| |
| } // namespace |
| |
| REGISTER_GRADIENT(Swish, GetSwishGradient); |
| |
| } // namespace caffe2 |