| #include "caffe2/operators/selu_op.h" |
| |
| #include "caffe2/utils/eigen_utils.h" |
| #include "caffe2/utils/math.h" |
| |
| namespace caffe2 { |
| |
| template <> |
| bool SeluOp<float, CPUContext>::RunOnDevice() { |
| auto& X = Input(0); |
| |
| auto* Y = Output(0, X.sizes(), at::dtype<float>()); |
| |
| ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel()); |
| EigenVectorArrayMap<float> Yvec( |
| Y->template mutable_data<float>(), Y->numel()); |
| Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_)); |
| return true; |
| } |
| |
| template <> |
| bool SeluGradientOp<float, CPUContext>::RunOnDevice() { |
| auto& Y = Input(0); |
| auto& dY = Input(1); |
| |
| CAFFE_ENFORCE_EQ(dY.numel(), Y.numel()); |
| auto* dX = Output(0, Y.sizes(), at::dtype<float>()); |
| |
| ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.numel()); |
| ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.numel()); |
| EigenVectorArrayMap<float> dXvec( |
| dX->template mutable_data<float>(), dX->numel()); |
| |
| const float la = lambda_ * alpha_; |
| dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la)); |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>); |
| REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>); |
| |
| // Input: X; output: Y |
| OPERATOR_SCHEMA(Selu) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .AllowInplace({{0, 0}}) |
| .IdenticalTypeAndShape() |
| .SetDoc(R"DOC( |
| |
| The *Selu* op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise *Selu* operation, defined as |
| |
| $$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$ |
| |
| The default value of *alpha* is 1.6732632423543772848170429916717 and the default value of *scale* is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information. |
| |
| Github Links: |
| |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h |
| - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc |
| |
| |
| <details> |
| |
| <summary> <b>Example</b> </summary> |
| |
| **Code** |
| |
| ``` |
| |
| workspace.ResetWorkspace() |
| |
| op = core.CreateOperator( |
| "Selu", |
| ["X"], |
| ["Y"], |
| ) |
| |
| workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32)) |
| print("X:\n", workspace.FetchBlob("X"), "\n") |
| |
| workspace.RunOperatorOnce(op) |
| print("Y:\n", workspace.FetchBlob("Y")) |
| |
| ``` |
| |
| **Result** |
| |
| ``` |
| |
| X: |
| [[ 1.1613879 -0.27111396 -1.2076733 ] |
| [ 1.3442237 -1.0701777 1.2070968 ] |
| [ 0.23810555 0.9740916 -1.7872391 ]] |
| |
| Y: |
| [[ 1.2202715 -0.4174965 -1.2326177 ] |
| [ 1.4123772 -1.1551634 1.2682979 ] |
| [ 0.25017774 1.023479 -1.4637551 ]] |
| |
| ``` |
| |
| </details> |
| |
| )DOC") |
| .Arg( |
| "alpha", |
| "*(type: float; default: 1.673263~)* Alpha constant in equation.") |
| .Arg( |
| "scale", |
| "*(type: float; default: 1.050700~; must be > 1.0)* Scale constant in equation.") |
| .Input(0, "X", "Input tensor of data to be operated on.") |
| .Output(0, "Y", "Output tensor with same shape as input.") |
| .InheritOnnxSchema(); |
| |
| // Input: Y, dY; output: dX |
| OPERATOR_SCHEMA(SeluGradient) |
| .NumInputs(2) |
| .NumOutputs(1) |
| .AllowInplace({{1, 0}}) |
| .SetDoc(R"DOC( |
| SeluGradient takes both Y and dY and uses this to update dX according to the |
| chain rule and derivatives of the selu function. |
| )DOC") |
| .Arg( |
| "alpha", |
| "(float) default to 1.6732~; affects the activation function itself." |
| "This should go with the weight initialization in the paper. " |
| " See https://arxiv.org/abs/1706.02515 ") |
| .Arg( |
| "scale", |
| "(float) default to 1.0507~; affects the activation function itself.") |
| .Input(0, "Y", "input tensor") |
| .Input(1, "dY", "input tensor"); |
| |
| class GetSeluGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| def_.type() + "Gradient", |
| "", |
| vector<string>{O(0), GO(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(Selu, GetSeluGradient); |
| |
| } // namespace caffe2 |