blob: b5a634d11e8c9cd81c120401ac128d807573e5fa [file] [log] [blame]
#include "caffe2/operators/selu_op.h"
#include "caffe2/utils/eigen_utils.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
template <>
bool SeluOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto* Y = Output(0, X.sizes(), at::dtype<float>());
ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel());
EigenVectorArrayMap<float> Yvec(
Y->template mutable_data<float>(), Y->numel());
Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
return true;
}
template <>
bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
auto& Y = Input(0);
auto& dY = Input(1);
CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
auto* dX = Output(0, Y.sizes(), at::dtype<float>());
ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.numel());
ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.numel());
EigenVectorArrayMap<float> dXvec(
dX->template mutable_data<float>(), dX->numel());
const float la = lambda_ * alpha_;
dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
return true;
}
REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);
// Input: X; output: Y
OPERATOR_SCHEMA(Selu)
.NumInputs(1)
.NumOutputs(1)
.AllowInplace({{0, 0}})
.IdenticalTypeAndShape()
.SetDoc(R"DOC(
The *Selu* op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise *Selu* operation, defined as
$$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$
The default value of *alpha* is 1.6732632423543772848170429916717 and the default value of *scale* is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information.
Github Links:
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc
<details>
<summary> <b>Example</b> </summary>
**Code**
```
workspace.ResetWorkspace()
op = core.CreateOperator(
"Selu",
["X"],
["Y"],
)
workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
print("X:\n", workspace.FetchBlob("X"), "\n")
workspace.RunOperatorOnce(op)
print("Y:\n", workspace.FetchBlob("Y"))
```
**Result**
```
X:
[[ 1.1613879 -0.27111396 -1.2076733 ]
[ 1.3442237 -1.0701777 1.2070968 ]
[ 0.23810555 0.9740916 -1.7872391 ]]
Y:
[[ 1.2202715 -0.4174965 -1.2326177 ]
[ 1.4123772 -1.1551634 1.2682979 ]
[ 0.25017774 1.023479 -1.4637551 ]]
```
</details>
)DOC")
.Arg(
"alpha",
"*(type: float; default: 1.673263~)* Alpha constant in equation.")
.Arg(
"scale",
"*(type: float; default: 1.050700~; must be > 1.0)* Scale constant in equation.")
.Input(0, "X", "Input tensor of data to be operated on.")
.Output(0, "Y", "Output tensor with same shape as input.")
.InheritOnnxSchema();
// Input: Y, dY; output: dX
OPERATOR_SCHEMA(SeluGradient)
.NumInputs(2)
.NumOutputs(1)
.AllowInplace({{1, 0}})
.SetDoc(R"DOC(
SeluGradient takes both Y and dY and uses this to update dX according to the
chain rule and derivatives of the selu function.
)DOC")
.Arg(
"alpha",
"(float) default to 1.6732~; affects the activation function itself."
"This should go with the weight initialization in the paper. "
" See https://arxiv.org/abs/1706.02515 ")
.Arg(
"scale",
"(float) default to 1.0507~; affects the activation function itself.")
.Input(0, "Y", "input tensor")
.Input(1, "dY", "input tensor");
class GetSeluGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
def_.type() + "Gradient",
"",
vector<string>{O(0), GO(0)},
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(Selu, GetSeluGradient);
} // namespace caffe2