| #include <array> |
| #include <memory> |
| #include <vector> |
| |
| #include <gtest/gtest.h> |
| |
| #include "caffe2/core/blob.h" |
| #include "caffe2/core/context.h" |
| #include "caffe2/core/tensor.h" |
| #include "caffe2/proto/caffe2_pb.h" |
| #include "caffe2/utils/conversions.h" |
| #include "caffe2/utils/math.h" |
| |
| #include <c10/util/irange.h> |
| |
| namespace caffe2 { |
| |
| TEST(MathTest, GemmNoTransNoTrans) { |
| DeviceOption option; |
| CPUContext cpu_context(option); |
| Tensor X(std::vector<int>{5, 10}, CPU); |
| Tensor W(std::vector<int>{10, 6}, CPU); |
| Tensor Y(std::vector<int>{5, 6}, CPU); |
| EXPECT_EQ(X.numel(), 50); |
| EXPECT_EQ(W.numel(), 60); |
| math::Set<float, CPUContext>( |
| X.numel(), 1, X.mutable_data<float>(), &cpu_context); |
| math::Set<float, CPUContext>( |
| W.numel(), 1, W.mutable_data<float>(), &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < X.numel(); ++i) { |
| TORCH_CHECK_EQ(X.data<float>()[i], 1); |
| } |
| for (int i = 0; i < W.numel(); ++i) { |
| TORCH_CHECK_EQ(W.data<float>()[i], 1); |
| } |
| |
| const float kOne = 1.0; |
| const float kPointFive = 0.5; |
| const float kZero = 0.0; |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasNoTrans, |
| 5, |
| 6, |
| 10, |
| kOne, |
| X.data<float>(), |
| W.data<float>(), |
| kZero, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 10) << i; |
| } |
| // Test Accumulate |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasNoTrans, |
| 5, |
| 6, |
| 10, |
| kOne, |
| X.data<float>(), |
| W.data<float>(), |
| kPointFive, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 15) << i; |
| } |
| // Test Accumulate |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasNoTrans, |
| 5, |
| 6, |
| 10, |
| kPointFive, |
| X.data<float>(), |
| W.data<float>(), |
| kOne, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 20) << i; |
| } |
| } |
| |
| TEST(MathTest, GemmNoTransTrans) { |
| DeviceOption option; |
| CPUContext cpu_context(option); |
| Tensor X(std::vector<int>{5, 10}, CPU); |
| Tensor W(std::vector<int>{6, 10}, CPU); |
| Tensor Y(std::vector<int>{5, 6}, CPU); |
| EXPECT_EQ(X.numel(), 50); |
| EXPECT_EQ(W.numel(), 60); |
| math::Set<float, CPUContext>( |
| X.numel(), 1, X.mutable_data<float>(), &cpu_context); |
| math::Set<float, CPUContext>( |
| W.numel(), 1, W.mutable_data<float>(), &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < X.numel(); ++i) { |
| TORCH_CHECK_EQ(X.data<float>()[i], 1); |
| } |
| for (int i = 0; i < W.numel(); ++i) { |
| TORCH_CHECK_EQ(W.data<float>()[i], 1); |
| } |
| |
| const float kOne = 1.0; |
| const float kPointFive = 0.5; |
| const float kZero = 0.0; |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasTrans, |
| 5, |
| 6, |
| 10, |
| kOne, |
| X.data<float>(), |
| W.data<float>(), |
| kZero, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 10) << i; |
| } |
| // Test Accumulate |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasTrans, |
| 5, |
| 6, |
| 10, |
| kOne, |
| X.data<float>(), |
| W.data<float>(), |
| kPointFive, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 15) << i; |
| } |
| math::Gemm<float, CPUContext>( |
| CblasNoTrans, |
| CblasTrans, |
| 5, |
| 6, |
| 10, |
| kPointFive, |
| X.data<float>(), |
| W.data<float>(), |
| kOne, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| EXPECT_EQ(Y.numel(), 30); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 20) << i; |
| } |
| } |
| |
| namespace { |
| |
| constexpr float kEps = 1e-5; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) |
| class GemmBatchedTest |
| : public testing::TestWithParam<testing::tuple<bool, bool>> { |
| protected: |
| void SetUp() override { |
| cpu_context_ = make_unique<CPUContext>(option_); |
| ReinitializeTensor( |
| &X_, std::vector<int64_t>{3, 5, 10}, at::dtype<float>().device(CPU)); |
| ReinitializeTensor( |
| &W_, std::vector<int64_t>{3, 6, 10}, at::dtype<float>().device(CPU)); |
| ReinitializeTensor( |
| &Y_, std::vector<int64_t>{3, 5, 6}, at::dtype<float>().device(CPU)); |
| math::Set<float, CPUContext>( |
| X_.numel(), 1, X_.mutable_data<float>(), cpu_context_.get()); |
| math::Set<float, CPUContext>( |
| W_.numel(), 1, W_.mutable_data<float>(), cpu_context_.get()); |
| trans_X_ = std::get<0>(GetParam()); |
| trans_W_ = std::get<1>(GetParam()); |
| } |
| |
| void RunGemmBatched(const float alpha, const float beta) { |
| const float* X_data = X_.template data<float>(); |
| const float* W_data = W_.template data<float>(); |
| float* Y_data = Y_.template mutable_data<float>(); |
| const int X_stride = 5 * 10; |
| const int W_stride = 6 * 10; |
| const int Y_stride = 5 * 6; |
| std::array<const float*, 3> X_array = { |
| X_data, X_data + X_stride, X_data + 2 * X_stride}; |
| std::array<const float*, 3> W_array = { |
| W_data, W_data + W_stride, W_data + 2 * W_stride}; |
| std::array<float*, 3> Y_array = { |
| Y_data, Y_data + Y_stride, Y_data + 2 * Y_stride}; |
| math::GemmBatched( |
| trans_X_ ? CblasTrans : CblasNoTrans, |
| trans_W_ ? CblasTrans : CblasNoTrans, |
| 3, |
| 5, |
| 6, |
| 10, |
| alpha, |
| X_array.data(), |
| W_array.data(), |
| beta, |
| Y_array.data(), |
| cpu_context_.get()); |
| } |
| |
| void RunGemmStridedBatched(const float alpha, const float beta) { |
| const float* X_data = X_.template data<float>(); |
| const float* W_data = W_.template data<float>(); |
| float* Y_data = Y_.template mutable_data<float>(); |
| const int X_stride = 5 * 10; |
| const int W_stride = 6 * 10; |
| const int Y_stride = 5 * 6; |
| math::GemmStridedBatched<float, CPUContext>( |
| trans_X_ ? CblasTrans : CblasNoTrans, |
| trans_W_ ? CblasTrans : CblasNoTrans, |
| 3, |
| 5, |
| 6, |
| 10, |
| alpha, |
| X_data, |
| X_stride, |
| W_data, |
| W_stride, |
| beta, |
| Y_data, |
| Y_stride, |
| cpu_context_.get()); |
| } |
| |
| void VerifyOutput(const float value) const { |
| for (int i = 0; i < Y_.numel(); ++i) { |
| EXPECT_FLOAT_EQ(value, Y_.template data<float>()[i]); |
| } |
| } |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| DeviceOption option_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| std::unique_ptr<CPUContext> cpu_context_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| Tensor X_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| Tensor W_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| Tensor Y_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| bool trans_X_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| bool trans_W_; |
| }; |
| |
| TEST_P(GemmBatchedTest, GemmBatchedFloatTest) { |
| RunGemmBatched(1.0f, 0.0f); |
| VerifyOutput(10.0f); |
| RunGemmBatched(1.0f, 0.5f); |
| VerifyOutput(15.0f); |
| RunGemmBatched(0.5f, 1.0f); |
| VerifyOutput(20.0f); |
| } |
| |
| TEST_P(GemmBatchedTest, GemmStridedBatchedFloatTest) { |
| RunGemmStridedBatched(1.0f, 0.0f); |
| VerifyOutput(10.0f); |
| RunGemmStridedBatched(1.0f, 0.5f); |
| VerifyOutput(15.0f); |
| RunGemmStridedBatched(0.5f, 1.0f); |
| VerifyOutput(20.0f); |
| } |
| |
| INSTANTIATE_TEST_CASE_P( |
| GemmBatchedTrans, |
| GemmBatchedTest, |
| testing::Combine(testing::Bool(), testing::Bool())); |
| |
| } // namespace |
| |
| TEST(MathTest, GemvNoTrans) { |
| DeviceOption option; |
| CPUContext cpu_context(option); |
| Tensor A(std::vector<int>{5, 10}, CPU); |
| Tensor X(std::vector<int>{10}, CPU); |
| Tensor Y(std::vector<int>{5}, CPU); |
| EXPECT_EQ(A.numel(), 50); |
| EXPECT_EQ(X.numel(), 10); |
| math::Set<float, CPUContext>( |
| A.numel(), 1, A.mutable_data<float>(), &cpu_context); |
| math::Set<float, CPUContext>( |
| X.numel(), 1, X.mutable_data<float>(), &cpu_context); |
| EXPECT_EQ(Y.numel(), 5); |
| for (int i = 0; i < A.numel(); ++i) { |
| TORCH_CHECK_EQ(A.data<float>()[i], 1); |
| } |
| for (int i = 0; i < X.numel(); ++i) { |
| TORCH_CHECK_EQ(X.data<float>()[i], 1); |
| } |
| |
| const float kOne = 1.0; |
| const float kPointFive = 0.5; |
| const float kZero = 0.0; |
| math::Gemv<float, CPUContext>( |
| CblasNoTrans, |
| 5, |
| 10, |
| kOne, |
| A.data<float>(), |
| X.data<float>(), |
| kZero, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 10) << i; |
| } |
| // Test Accumulate |
| math::Gemv<float, CPUContext>( |
| CblasNoTrans, |
| 5, |
| 10, |
| kOne, |
| A.data<float>(), |
| X.data<float>(), |
| kPointFive, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 15) << i; |
| } |
| // Test Accumulate |
| math::Gemv<float, CPUContext>( |
| CblasNoTrans, |
| 5, |
| 10, |
| kPointFive, |
| A.data<float>(), |
| X.data<float>(), |
| kOne, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 20) << i; |
| } |
| } |
| |
| TEST(MathTest, GemvTrans) { |
| DeviceOption option; |
| CPUContext cpu_context(option); |
| Tensor A(std::vector<int>{6, 10}, CPU); |
| Tensor X(std::vector<int>{6}, CPU); |
| Tensor Y(std::vector<int>{10}, CPU); |
| EXPECT_EQ(A.numel(), 60); |
| EXPECT_EQ(X.numel(), 6); |
| math::Set<float, CPUContext>( |
| A.numel(), 1, A.mutable_data<float>(), &cpu_context); |
| math::Set<float, CPUContext>( |
| X.numel(), 1, X.mutable_data<float>(), &cpu_context); |
| EXPECT_EQ(Y.numel(), 10); |
| for (int i = 0; i < A.numel(); ++i) { |
| TORCH_CHECK_EQ(A.data<float>()[i], 1); |
| } |
| for (int i = 0; i < X.numel(); ++i) { |
| TORCH_CHECK_EQ(X.data<float>()[i], 1); |
| } |
| |
| const float kOne = 1.0; |
| const float kPointFive = 0.5; |
| const float kZero = 0.0; |
| math::Gemv<float, CPUContext>( |
| CblasTrans, |
| 6, |
| 10, |
| kOne, |
| A.data<float>(), |
| X.data<float>(), |
| kZero, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 6) << i; |
| } |
| // Test Accumulate |
| math::Gemv<float, CPUContext>( |
| CblasTrans, |
| 6, |
| 10, |
| kOne, |
| A.data<float>(), |
| X.data<float>(), |
| kPointFive, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 9) << i; |
| } |
| // Test Accumulate |
| math::Gemv<float, CPUContext>( |
| CblasTrans, |
| 6, |
| 10, |
| kPointFive, |
| A.data<float>(), |
| X.data<float>(), |
| kOne, |
| Y.mutable_data<float>(), |
| &cpu_context); |
| for (int i = 0; i < Y.numel(); ++i) { |
| TORCH_CHECK_EQ(Y.data<float>()[i], 12) << i; |
| } |
| } |
| |
| TEST(MathTest, FloatToHalfConversion) { |
| float a = 1.0f; |
| float b = 1.75f; |
| float c = 128.125f; |
| |
| float converted_a = static_cast<float>(at::Half(a)); |
| float converted_b = static_cast<float>(at::Half(b)); |
| float converted_c = static_cast<float>(at::Half(c)); |
| |
| TORCH_CHECK_EQ(a, converted_a); |
| TORCH_CHECK_EQ(b, converted_b); |
| TORCH_CHECK_EQ(c, converted_c); |
| } |
| |
| namespace { |
| |
| class BroadcastTest : public testing::Test { |
| protected: |
| void SetUp() override { |
| cpu_context_ = make_unique<CPUContext>(option_); |
| } |
| |
| void RunBroadcastTest( |
| const std::vector<int>& X_dims, |
| const std::vector<int>& Y_dims, |
| const std::vector<float>& X_data, |
| const std::vector<float>& Y_data) { |
| std::vector<int64_t> X_dims_64; |
| std::vector<int64_t> Y_dims_64; |
| std::copy(X_dims.cbegin(), X_dims.cend(), std::back_inserter(X_dims_64)); |
| std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64)); |
| ReinitializeTensor(&X_, X_dims_64, at::dtype<float>().device(CPU)); |
| ReinitializeTensor(&Y_, Y_dims_64, at::dtype<float>().device(CPU)); |
| ASSERT_EQ(X_data.size(), X_.numel()); |
| cpu_context_->CopyFromCPU<float>( |
| X_data.size(), X_data.data(), X_.mutable_data<float>()); |
| for (bool allow_broadcast_fastpath : {false, true}) { |
| math::Broadcast<float, CPUContext>( |
| X_dims.size(), |
| X_dims.data(), |
| Y_dims.size(), |
| Y_dims.data(), |
| 1.0f, |
| X_.data<float>(), |
| Y_.mutable_data<float>(), |
| cpu_context_.get(), |
| allow_broadcast_fastpath); |
| ASSERT_EQ(Y_data.size(), Y_.numel()); |
| for (const auto i : c10::irange(Y_data.size())) { |
| EXPECT_FLOAT_EQ(Y_data[i], Y_.data<float>()[i]); |
| } |
| } |
| } |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| DeviceOption option_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| std::unique_ptr<CPUContext> cpu_context_; |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| Tensor X_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| Tensor Y_; |
| }; |
| |
| TEST_F(BroadcastTest, BroadcastFloatTest) { |
| RunBroadcastTest({2}, {2}, {1.0f, 2.0f}, {1.0f, 2.0f}); |
| RunBroadcastTest({1}, {2}, {1.0f}, {1.0f, 1.0f}); |
| RunBroadcastTest({1}, {2, 2}, {1.0f}, {1.0f, 1.0f, 1.0f, 1.0f}); |
| RunBroadcastTest({2, 1}, {2, 2}, {1.0f, 2.0f}, {1.0f, 1.0f, 2.0f, 2.0f}); |
| RunBroadcastTest({1, 2}, {2, 2}, {1.0f, 2.0f}, {1.0f, 2.0f, 1.0f, 2.0f}); |
| RunBroadcastTest( |
| {2, 1}, |
| {2, 2, 2}, |
| {1.0f, 2.0f}, |
| {1.0f, 1.0f, 2.0f, 2.0f, 1.0f, 1.0f, 2.0f, 2.0f}); |
| RunBroadcastTest( |
| {1, 2}, |
| {2, 2, 2}, |
| {1.0f, 2.0f}, |
| {1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}); |
| } |
| |
| class RandFixedSumTest : public testing::Test { |
| protected: |
| void SetUp() override { |
| cpu_context_ = make_unique<CPUContext>(option_); |
| } |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| DeviceOption option_; |
| // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) |
| std::unique_ptr<CPUContext> cpu_context_; |
| }; |
| |
| TEST_F(RandFixedSumTest, UpperBound) { |
| std::vector<int> l(20); |
| math::RandFixedSum<int, CPUContext>( |
| 20, 1, 1000, 1000, l.data(), cpu_context_.get()); |
| } |
| |
| } // namespace |
| |
| } // namespace caffe2 |