| #include "caffe2/utils/math/reduce.h" |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <functional> |
| #include <numeric> |
| #include <vector> |
| |
| #ifdef CAFFE2_USE_ACCELERATE |
| #include <Accelerate/Accelerate.h> |
| #endif // CAFFE2_USE_ACCELERATE |
| |
| #ifdef CAFFE2_USE_MKL |
| #include <mkl.h> |
| #endif // CAFFE2_USE_MKL |
| |
| #include <c10/util/accumulate.h> |
| #include "caffe2/core/context.h" |
| #include "caffe2/utils/eigen_utils.h" |
| #include "caffe2/utils/math.h" |
| #include "caffe2/utils/math/broadcast.h" |
| #include "caffe2/utils/math/elementwise.h" |
| #include "caffe2/utils/math/utils.h" |
| |
| namespace caffe2 { |
| namespace math { |
| |
| namespace { |
| |
| #define DELEGATE_ROWWISE_REDUCE_FUNCTION(Func, EigenFunc) \ |
| template <typename T> \ |
| void Rowwise##Func( \ |
| const int rows, \ |
| const int cols, \ |
| const T alpha, \ |
| const T* X, \ |
| T* Y, \ |
| CPUContext* /* context */) { \ |
| EigenVectorMap<T>(Y, rows) = ConstEigenMatrixMap<T>(X, cols, rows) \ |
| .colwise() \ |
| .EigenFunc() \ |
| .transpose() * \ |
| alpha; \ |
| } |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMin, minCoeff) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMax, maxCoeff) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceSum, sum) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMean, mean) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceL1, template lpNorm<1>) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceL2, norm) |
| #undef DELEGATE_ROWWISE_REDUCE_FUNCTION |
| |
| #ifndef CAFFE2_USE_EIGEN_FOR_BLAS |
| |
| #define DELEGATE_ROWWISE_REDUCE_FUNCTION(T, Func, BLASFunc) \ |
| template <> \ |
| void Rowwise##Func( \ |
| const int rows, \ |
| const int cols, \ |
| const T alpha, \ |
| const T* X, \ |
| T* Y, \ |
| CPUContext* /* context */) { \ |
| for (int i = 0; i < rows; ++i) { \ |
| Y[i] = BLASFunc(cols, X + i * cols, 1) * alpha; \ |
| } \ |
| } |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(float, ReduceL1, cblas_sasum) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(double, ReduceL1, cblas_dasum) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(float, ReduceL2, cblas_snrm2) |
| DELEGATE_ROWWISE_REDUCE_FUNCTION(double, ReduceL2, cblas_dnrm2) |
| #undef DELEGATE_ROWWISE_REDUCE_FUNCTION |
| |
| #endif // CAFFE2_USE_EIGEN_FOR_BLAS |
| |
| #define DELEGATE_COLWISE_REDUCE_FUNCTION(Func, MathFunc) \ |
| template <typename T> \ |
| void Colwise##Func( \ |
| const int rows, \ |
| const int cols, \ |
| const T alpha, \ |
| const T* X, \ |
| T* Y, \ |
| CPUContext* context) { \ |
| std::memcpy(Y, X, sizeof(T) * cols); \ |
| for (int i = 1; i < rows; ++i) { \ |
| MathFunc<T, CPUContext>(cols, Y, X + i * cols, Y, context); \ |
| } \ |
| Scale<T, T, CPUContext>(cols, alpha, Y, Y, context); \ |
| } |
| DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceMin, Min) |
| DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceMax, Max) |
| DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceSum, Add) |
| #undef DELEGATE_COLWISE_REDUCE_FUNCTION |
| |
| template <typename T> |
| void ColwiseReduceMean( |
| const int rows, |
| const int cols, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| ColwiseReduceSum<T>(rows, cols, alpha / static_cast<T>(rows), X, Y, context); |
| } |
| |
| template <typename T> |
| void ColwiseReduceL1( |
| const int rows, |
| const int cols, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| ConstEigenArrayMap<T> X_arr(X, cols, rows); |
| EigenVectorArrayMap<T> Y_arr(Y, cols); |
| Y_arr = X_arr.col(0).abs(); |
| for (int i = 1; i < rows; ++i) { |
| Y_arr += X_arr.col(i).abs(); |
| } |
| Scale<T, T, CPUContext>(cols, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void ColwiseReduceL2( |
| const int rows, |
| const int cols, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* /* context */) { |
| ConstEigenArrayMap<T> X_arr(X, cols, rows); |
| EigenVectorArrayMap<T> Y_arr(Y, cols); |
| Y_arr = X_arr.col(0).square(); |
| for (int i = 1; i < rows; ++i) { |
| Y_arr += X_arr.col(i).square(); |
| } |
| Y_arr = Y_arr.sqrt() * alpha; |
| } |
| |
| template <typename T> |
| void BothEndsReduceMin( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| EigenVectorArrayMap<T> Y_arr(Y, N); |
| Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().minCoeff(); |
| for (int i = 1; i < M; ++i) { |
| ConstEigenArrayMap<T> X_arr(X + i * N * K, K, N); |
| for (int j = 0; j < N; ++j) { |
| Y[j] = std::min(Y[j], X_arr.col(j).minCoeff()); |
| } |
| } |
| Scale<T, T, CPUContext>(N, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void BothEndsReduceMax( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| EigenVectorArrayMap<T> Y_arr(Y, N); |
| Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().maxCoeff(); |
| for (int i = 1; i < M; ++i) { |
| ConstEigenArrayMap<T> X_arr(X + i * N * K, K, N); |
| for (int j = 0; j < N; ++j) { |
| Y[j] = std::max(Y[j], X_arr.col(j).maxCoeff()); |
| } |
| } |
| Scale<T, T, CPUContext>(N, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void BothEndsReduceSum( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| EigenVectorArrayMap<T> Y_arr(Y, N); |
| Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().sum(); |
| for (int i = 1; i < M; ++i) { |
| Y_arr += |
| ConstEigenArrayMap<T>(X + i * N * K, K, N).colwise().sum().transpose(); |
| } |
| Scale<T, T, CPUContext>(N, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void BothEndsReduceMean( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| EigenVectorArrayMap<T> Y_arr(Y, N); |
| Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().sum(); |
| for (int i = 1; i < M; ++i) { |
| Y_arr += |
| ConstEigenArrayMap<T>(X + i * N * K, K, N).colwise().sum().transpose(); |
| } |
| Scale<T, T, CPUContext>(N, alpha / static_cast<T>(M * K), Y, Y, context); |
| } |
| |
| template <typename T> |
| void BothEndsReduceL1( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context) { |
| EigenVectorMap<T> Y_vec(Y, N); |
| Y_vec = ConstEigenMatrixMap<T>(X, K, N).colwise().template lpNorm<1>(); |
| for (int i = 1; i < M; ++i) { |
| Y_vec += ConstEigenMatrixMap<T>(X + i * N * K, K, N) |
| .colwise() |
| .template lpNorm<1>() |
| .transpose(); |
| } |
| Scale<T, T, CPUContext>(N, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void BothEndsReduceL2( |
| const int M, |
| const int N, |
| const int K, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* /* context */) { |
| ConstEigenArrayMap<T> X0_arr(X, K, N); |
| EigenVectorArrayMap<T> Y_arr(Y, N); |
| for (int i = 0; i < N; ++i) { |
| Y_arr(i) = X0_arr.col(i).square().sum(); |
| } |
| for (int i = 1; i < M; ++i) { |
| ConstEigenArrayMap<T> Xi_arr(X + i * N * K, K, N); |
| for (int j = 0; j < N; ++j) { |
| Y_arr(j) += Xi_arr.col(j).square().sum(); |
| } |
| } |
| Y_arr = Y_arr.sqrt() * alpha; |
| } |
| |
| template <typename T, class Reducer> |
| void ReduceTensorImplFastpath( |
| const int X_size, |
| const int Y_size, |
| const Reducer& reducer, |
| const T* X, |
| T* Y) { |
| int Y_index = 0; |
| for (int X_index = 0; X_index < X_size; ++X_index) { |
| Y[Y_index] = reducer(Y[Y_index], X[X_index]); |
| Y_index++; |
| if (Y_index >= Y_size) { |
| Y_index = 0; |
| } |
| } |
| } |
| |
| template <typename T, class Reducer> |
| void ReduceTensorImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const Reducer& reducer, |
| const T init, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| const auto X_size = c10::multiply_integers(X_dims, X_dims + ndim); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Set<T, CPUContext>(Y_size, init, Y, context); |
| if (allow_broadcast_fastpath && can_use_broadcast_fastpath(ndim, Y_dims)) { |
| ReduceTensorImplFastpath(X_size, Y_size, reducer, X, Y); |
| return; |
| } |
| std::vector<int> index(ndim, 0); |
| for (int X_index = 0; X_index < X_size; ++X_index) { |
| const int Y_index = utils::GetIndexFromDims(ndim, Y_dims, index.data()); |
| Y[Y_index] = reducer(Y[Y_index], X[X_index]); |
| utils::IncreaseIndexInDims(ndim, X_dims, index.data()); |
| } |
| } |
| |
| template <typename T> |
| void ReduceMinImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl( |
| ndim, |
| X_dims, |
| Y_dims, |
| [](const T a, const T b) { return std::min(a, b); }, |
| std::numeric_limits<T>::max(), |
| X, |
| Y, |
| context, |
| allow_broadcast_fastpath); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void ReduceMaxImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl( |
| ndim, |
| X_dims, |
| Y_dims, |
| [](const T a, const T b) { return std::max(a, b); }, |
| std::numeric_limits<T>::lowest(), |
| X, |
| Y, |
| context, |
| allow_broadcast_fastpath); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void ReduceSumImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl(ndim, X_dims, Y_dims, std::plus<T>(), T(0), X, Y, context, allow_broadcast_fastpath); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void ReduceMeanImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl(ndim, X_dims, Y_dims, std::plus<T>(), T(0), X, Y, context, allow_broadcast_fastpath); |
| const auto X_size = c10::multiply_integers(X_dims, X_dims + ndim); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Scale<T, T, CPUContext>( |
| Y_size, |
| alpha * static_cast<T>(Y_size) / static_cast<T>(X_size), |
| Y, |
| Y, |
| context); |
| } |
| |
| template <typename T> |
| void ReduceL1Impl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl( |
| ndim, |
| X_dims, |
| Y_dims, |
| [](const T a, const T b) { return a + std::abs(b); }, |
| T(0), |
| X, |
| Y, |
| context, |
| allow_broadcast_fastpath); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context); |
| } |
| |
| template <typename T> |
| void ReduceL2Impl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T alpha, |
| const T* X, |
| T* Y, |
| CPUContext* context, |
| bool allow_broadcast_fastpath) { |
| ReduceTensorImpl( |
| ndim, |
| X_dims, |
| Y_dims, |
| [](const T a, const T b) { return a + b * b; }, |
| T(0), |
| X, |
| Y, |
| context, |
| allow_broadcast_fastpath); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| EigenVectorArrayMap<T> Y_arr(Y, Y_size); |
| Y_arr = Y_arr.sqrt() * alpha; |
| } |
| |
| template <typename T> |
| void RowwiseMoments( |
| const int rows, |
| const int cols, |
| const T* X, |
| T* mean, |
| T* var) { |
| ConstEigenArrayMap<T> X_arr(X, cols, rows); |
| for (int i = 0; i < rows; ++i) { |
| const T m = X_arr.col(i).mean(); |
| mean[i] = m; |
| var[i] = (X_arr.col(i) - m).square().mean(); |
| } |
| } |
| |
| template <typename T> |
| void ColwiseMoments( |
| const int rows, |
| const int cols, |
| const T* X, |
| T* mean, |
| T* var) { |
| ConstEigenArrayMap<T> X_arr(X, cols, rows); |
| EigenVectorArrayMap<T> mean_arr(mean, cols); |
| EigenVectorArrayMap<T> var_arr(var, cols); |
| EArrXt<T> delta_arr(cols); |
| mean_arr.setZero(); |
| var_arr.setZero(); |
| for (int i = 0; i < rows; ++i) { |
| delta_arr = X_arr.col(i) - mean_arr; |
| mean_arr += delta_arr / static_cast<T>(i + 1); |
| var_arr += delta_arr * (X_arr.col(i) - mean_arr); |
| } |
| var_arr /= static_cast<T>(rows); |
| } |
| |
| template <typename T> |
| void BothEndsMoments( |
| const int M, |
| const int N, |
| const int K, |
| const T* X, |
| T* mean, |
| T* var) { |
| ConstEigenArrayMap<T> X_arr(X, K, M * N); |
| EigenVectorArrayMap<T> mean_arr(mean, N); |
| EigenVectorArrayMap<T> var_arr(var, N); |
| for (int i = 0; i < N; ++i) { |
| mean_arr(i) = X_arr.col(i).sum(); |
| var_arr(i) = X_arr.col(i).square().sum(); |
| } |
| for (int i = 1; i < M; ++i) { |
| for (int j = 0; j < N; ++j) { |
| const int c = i * N + j; |
| mean_arr(j) += X_arr.col(c).sum(); |
| var_arr(j) += X_arr.col(c).square().sum(); |
| } |
| } |
| const T scale = T(1) / static_cast<T>(M * K); |
| mean_arr *= scale; |
| var_arr = var_arr * scale - mean_arr.square(); |
| } |
| |
| template <typename T> |
| void MomentsImpl( |
| const int ndim, |
| const int* X_dims, |
| const int* Y_dims, |
| const T* X, |
| T* mean, |
| T* var, |
| CPUContext* /* context */, |
| bool allow_broadcast_fastpath) { |
| const auto X_size = c10::multiply_integers(X_dims, X_dims + ndim); |
| const auto Y_size = c10::multiply_integers(Y_dims, Y_dims + ndim); |
| if (X_size == 0) { |
| std::memset(mean, 0, sizeof(T) * Y_size); |
| std::memset(var, 0, sizeof(T) * Y_size); |
| return; |
| } |
| if (std::equal(X_dims, X_dims + ndim, Y_dims)) { |
| std::memcpy(mean, X, sizeof(T) * Y_size); |
| std::memset(var, 0, sizeof(T) * Y_size); |
| return; |
| } |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int rows; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int cols; |
| if (utils::IsRowwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { |
| RowwiseMoments<T>(rows, cols, X, mean, var); |
| return; |
| } |
| if (utils::IsColwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { |
| ColwiseMoments<T>(rows, cols, X, mean, var); |
| return; |
| } |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int pre; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int mid; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| int nxt; |
| if (utils::IsBothEndsReduce(ndim, X_dims, Y_dims, &pre, &mid, &nxt)) { |
| BothEndsMoments<T>(pre, mid, nxt, X, mean, var); |
| return; |
| } |
| std::memset(mean, 0, sizeof(T) * Y_size); |
| std::memset(var, 0, sizeof(T) * Y_size); |
| std::vector<int> index(ndim, 0); |
| for (int X_index = 0; X_index < X_size; ++X_index) { |
| const int Y_index = utils::GetIndexFromDims(ndim, Y_dims, index.data()); |
| mean[Y_index] += X[X_index]; |
| var[Y_index] += X[X_index] * X[X_index]; |
| utils::IncreaseIndexInDims(ndim, X_dims, index.data()); |
| } |
| const T scale = static_cast<T>(Y_size) / static_cast<T>(X_size); |
| EigenVectorArrayMap<T> mean_arr(mean, Y_size); |
| EigenVectorArrayMap<T> var_arr(var, Y_size); |
| mean_arr *= scale; |
| var_arr = var_arr * scale - mean_arr.square(); |
| } |
| |
| } // namespace |
| |
| #define DELEGATE_GLOBAL_REDUCE_FUNCTION(T, Func, EigenFunc) \ |
| template <> \ |
| C10_EXPORT void Func<T, CPUContext>( \ |
| const int N, \ |
| const T* X, \ |
| T* Y, \ |
| Tensor* /* scratch_ptr */, \ |
| CPUContext* /* context */) { \ |
| *Y = ConstEigenVectorArrayMap<T>(X, N).EigenFunc(); \ |
| } |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(float, ReduceMin, minCoeff) |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int32_t, ReduceMin, minCoeff) |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int64_t, ReduceMin, minCoeff) |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(float, ReduceMax, maxCoeff) |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int32_t, ReduceMax, maxCoeff) |
| DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int64_t, ReduceMax, maxCoeff) |
| #undef DELEGATE_GLOBAL_REDUCE_FUNCTION |
| |
| #define DELEGATE_REDUCE_FUNCTION(T, Func, kInit, kIsNorm) \ |
| template <> \ |
| C10_EXPORT void Func<T, CPUContext>( \ |
| const int ndim, \ |
| const int* X_dims, \ |
| const int* Y_dims, \ |
| const T alpha, \ |
| const T* X, \ |
| T* Y, \ |
| CPUContext* context, \ |
| bool allow_broadcast_fastpath) { \ |
| const int X_size = \ |
| std::accumulate(X_dims, X_dims + ndim, 1, std::multiplies<int>()); \ |
| const int Y_size = \ |
| std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>()); \ |
| if (X_size == 0) { \ |
| Set<T, CPUContext>(Y_size, alpha * kInit, Y, context); \ |
| return; \ |
| } \ |
| if (alpha == T(0)) { \ |
| std::memset(Y, 0, sizeof(T) * Y_size); \ |
| return; \ |
| } \ |
| if (std::equal(X_dims, X_dims + ndim, Y_dims)) { \ |
| if (kIsNorm) { \ |
| EigenVectorArrayMap<T>(Y, Y_size) = \ |
| ConstEigenVectorArrayMap<T>(X, X_size).abs() * alpha; \ |
| } else { \ |
| Scale<T, T, CPUContext>(Y_size, alpha, X, Y, context); \ |
| } \ |
| return; \ |
| } \ |
| int rows; \ |
| int cols; \ |
| if (utils::IsRowwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { \ |
| Rowwise##Func<T>(rows, cols, alpha, X, Y, context); \ |
| return; \ |
| } \ |
| if (utils::IsColwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { \ |
| Colwise##Func<T>(rows, cols, alpha, X, Y, context); \ |
| return; \ |
| } \ |
| int M; \ |
| int N; \ |
| int K; \ |
| if (utils::IsBothEndsReduce(ndim, X_dims, Y_dims, &M, &N, &K)) { \ |
| BothEnds##Func<T>(M, N, K, alpha, X, Y, context); \ |
| return; \ |
| } \ |
| Func##Impl<T>(ndim, X_dims, Y_dims, alpha, X, Y, \ |
| context, allow_broadcast_fastpath); \ |
| } |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| float, |
| ReduceMin, |
| std::numeric_limits<float>::max(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| double, |
| ReduceMin, |
| std::numeric_limits<double>::max(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| std::int32_t, |
| ReduceMin, |
| std::numeric_limits<std::int32_t>::max(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| std::int64_t, |
| ReduceMin, |
| std::numeric_limits<std::int64_t>::max(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| float, |
| ReduceMax, |
| std::numeric_limits<float>::lowest(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| double, |
| ReduceMax, |
| std::numeric_limits<double>::lowest(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| std::int32_t, |
| ReduceMax, |
| std::numeric_limits<std::int32_t>::lowest(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION( |
| std::int64_t, |
| ReduceMax, |
| std::numeric_limits<std::int64_t>::lowest(), |
| false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(float, ReduceSum, 0.0f, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(double, ReduceSum, 0.0, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(std::int32_t, ReduceSum, 0, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(std::int64_t, ReduceSum, 0LL, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(float, ReduceMean, 0.0f, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(double, ReduceMean, 0.0, false) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(float, ReduceL1, 0.0f, true) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(double, ReduceL1, 0.0, true) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(std::int32_t, ReduceL1, 0, true) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(std::int64_t, ReduceL1, 0LL, true) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(float, ReduceL2, 0.0f, true) |
| // NOLINTNEXTLINE(modernize-use-transparent-functors) |
| DELEGATE_REDUCE_FUNCTION(double, ReduceL2, 0.0, true) |
| #undef DELEGATE_REDUCE_FUNCTION |
| |
| #define CAFFE2_SPECIALIZED_MOMENTS(T) \ |
| template <> \ |
| C10_EXPORT void Moments<T, CPUContext>( \ |
| const int ndim, \ |
| const int* X_dims, \ |
| const int* Y_dims, \ |
| const T* X, \ |
| T* mean, \ |
| T* var, \ |
| CPUContext* context, \ |
| bool allow_broadcast_fastpath) { \ |
| MomentsImpl<T>(ndim, X_dims, Y_dims, X, mean, var, \ |
| context, allow_broadcast_fastpath); \ |
| } |
| CAFFE2_SPECIALIZED_MOMENTS(float) |
| CAFFE2_SPECIALIZED_MOMENTS(double) |
| #undef CAFFE2_SPECIALIZED_MOMENTS |
| |
| } // namespace math |
| } // namespace caffe2 |