common/cpu_operations/LayerNormLSTMTest.cpp - platform/packages/modules/NeuralNetworks - Git at Google

 /*
  * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <android-base/logging.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>

 #include <sstream>
 #include <string>
 #include <vector>

 #include "LSTM.h"
 #include "NeuralNetworksWrapper.h"

 namespace android {
 namespace nn {
 namespace wrapper {

 using ::testing::Each;
 using ::testing::FloatNear;
 using ::testing::Matcher;

 namespace {

 std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float>& values,
                                            float max_abs_error = 1.e-6) {
     std::vector<Matcher<float>> matchers;
     matchers.reserve(values.size());
     for (const float& v : values) {
         matchers.emplace_back(FloatNear(v, max_abs_error));
     }
     return matchers;
 }

 }  // anonymous namespace

 #define FOR_ALL_INPUT_AND_WEIGHT_TENSORS(ACTION) \
     ACTION(Input)                                \
     ACTION(InputToInputWeights)                  \
     ACTION(InputToCellWeights)                   \
     ACTION(InputToForgetWeights)                 \
     ACTION(InputToOutputWeights)                 \
     ACTION(RecurrentToInputWeights)              \
     ACTION(RecurrentToCellWeights)               \
     ACTION(RecurrentToForgetWeights)             \
     ACTION(RecurrentToOutputWeights)             \
     ACTION(CellToInputWeights)                   \
     ACTION(CellToForgetWeights)                  \
     ACTION(CellToOutputWeights)                  \
     ACTION(InputGateBias)                        \
     ACTION(CellGateBias)                         \
     ACTION(ForgetGateBias)                       \
     ACTION(OutputGateBias)                       \
     ACTION(ProjectionWeights)                    \
     ACTION(ProjectionBias)                       \
     ACTION(OutputStateIn)                        \
     ACTION(CellStateIn)

 #define FOR_ALL_LAYER_NORM_WEIGHTS(ACTION) \
     ACTION(InputLayerNormWeights)          \
     ACTION(ForgetLayerNormWeights)         \
     ACTION(CellLayerNormWeights)           \
     ACTION(OutputLayerNormWeights)

 // For all output and intermediate states
 #define FOR_ALL_OUTPUT_TENSORS(ACTION) \
     ACTION(ScratchBuffer)              \
     ACTION(OutputStateOut)             \
     ACTION(CellStateOut)               \
     ACTION(Output)

 class LayerNormLSTMOpModel {
    public:
     LayerNormLSTMOpModel(uint32_t n_batch, uint32_t n_input, uint32_t n_cell, uint32_t n_output,
                          bool use_cifg, bool use_peephole, bool use_projection_weights,
                          bool use_projection_bias, float cell_clip, float proj_clip,
                          const std::vector<std::vector<uint32_t>>& input_shapes0)
         : n_input_(n_input),
           n_output_(n_output),
           use_cifg_(use_cifg),
           use_peephole_(use_peephole),
           use_projection_weights_(use_projection_weights),
           use_projection_bias_(use_projection_bias),
           activation_(ActivationFn::kActivationTanh),
           cell_clip_(cell_clip),
           proj_clip_(proj_clip) {
         std::vector<uint32_t> inputs;
         std::vector<std::vector<uint32_t>> input_shapes(input_shapes0);

         auto it = input_shapes.begin();

         // Input and weights
 #define AddInput(X)                                     \
     CHECK(it != input_shapes.end());                    \
     OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it++); \
     inputs.push_back(model_.addOperand(&X##OpndTy));

         FOR_ALL_INPUT_AND_WEIGHT_TENSORS(AddInput);

         // Parameters
         OperandType ActivationOpndTy(Type::INT32, {});
         inputs.push_back(model_.addOperand(&ActivationOpndTy));
         OperandType CellClipOpndTy(Type::FLOAT32, {});
         inputs.push_back(model_.addOperand(&CellClipOpndTy));
         OperandType ProjClipOpndTy(Type::FLOAT32, {});
         inputs.push_back(model_.addOperand(&ProjClipOpndTy));

         FOR_ALL_LAYER_NORM_WEIGHTS(AddInput);

 #undef AddOperand

         // Output and other intermediate state
         std::vector<std::vector<uint32_t>> output_shapes{
                 {n_batch, n_cell * (use_cifg ? 3 : 4)},
                 {n_batch, n_output},
                 {n_batch, n_cell},
                 {n_batch, n_output},
         };
         std::vector<uint32_t> outputs;

         auto it2 = output_shapes.begin();

 #define AddOutput(X)                                     \
     CHECK(it2 != output_shapes.end());                   \
     OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it2++); \
     outputs.push_back(model_.addOperand(&X##OpndTy));

         FOR_ALL_OUTPUT_TENSORS(AddOutput);

 #undef AddOutput

         model_.addOperation(ANEURALNETWORKS_LSTM, inputs, outputs);
         model_.identifyInputsAndOutputs(inputs, outputs);

         Input_.insert(Input_.end(), n_batch * n_input, 0.f);
         OutputStateIn_.insert(OutputStateIn_.end(), n_batch * n_output, 0.f);
         CellStateIn_.insert(CellStateIn_.end(), n_batch * n_cell, 0.f);

         auto multiAll = [](const std::vector<uint32_t>& dims) -> uint32_t {
             uint32_t sz = 1;
             for (uint32_t d : dims) {
                 sz *= d;
             }
             return sz;
         };

         it2 = output_shapes.begin();

 #define ReserveOutput(X) X##_.insert(X##_.end(), multiAll(*it2++), 0.f);

         FOR_ALL_OUTPUT_TENSORS(ReserveOutput);

 #undef ReserveOutput

         model_.finish();
     }

 #define DefineSetter(X) \
     void Set##X(const std::vector<float>& f) { X##_.insert(X##_.end(), f.begin(), f.end()); }

     FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineSetter);
     FOR_ALL_LAYER_NORM_WEIGHTS(DefineSetter);

 #undef DefineSetter

     void ResetOutputState() {
         std::fill(OutputStateIn_.begin(), OutputStateIn_.end(), 0.f);
         std::fill(OutputStateOut_.begin(), OutputStateOut_.end(), 0.f);
     }

     void ResetCellState() {
         std::fill(CellStateIn_.begin(), CellStateIn_.end(), 0.f);
         std::fill(CellStateOut_.begin(), CellStateOut_.end(), 0.f);
     }

     void SetInput(int offset, const float* begin, const float* end) {
         for (; begin != end; begin++, offset++) {
             Input_[offset] = *begin;
         }
     }

     uint32_t num_inputs() const { return n_input_; }
     uint32_t num_outputs() const { return n_output_; }

     const std::vector<float>& GetOutput() const { return Output_; }

     void Invoke() {
         ASSERT_TRUE(model_.isValid());

         OutputStateIn_.swap(OutputStateOut_);
         CellStateIn_.swap(CellStateOut_);

         Compilation compilation(&model_);
         compilation.finish();
         Execution execution(&compilation);
 #define SetInputOrWeight(X)                                                                       \
     ASSERT_EQ(                                                                                    \
             execution.setInput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
             Result::NO_ERROR);

         FOR_ALL_INPUT_AND_WEIGHT_TENSORS(SetInputOrWeight);
         FOR_ALL_LAYER_NORM_WEIGHTS(SetInputOrWeight);

 #undef SetInputOrWeight

 #define SetOutput(X)                                                                               \
     ASSERT_EQ(                                                                                     \
             execution.setOutput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
             Result::NO_ERROR);

         FOR_ALL_OUTPUT_TENSORS(SetOutput);

 #undef SetOutput

         if (use_cifg_) {
             execution.setInput(LSTMCell::kInputToInputWeightsTensor, nullptr, 0);
             execution.setInput(LSTMCell::kRecurrentToInputWeightsTensor, nullptr, 0);
         }

         if (use_peephole_) {
             if (use_cifg_) {
                 execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
             }
         } else {
             execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
             execution.setInput(LSTMCell::kCellToForgetWeightsTensor, nullptr, 0);
             execution.setInput(LSTMCell::kCellToOutputWeightsTensor, nullptr, 0);
         }

         if (use_projection_weights_) {
             if (!use_projection_bias_) {
                 execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
             }
         } else {
             execution.setInput(LSTMCell::kProjectionWeightsTensor, nullptr, 0);
             execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
         }

         ASSERT_EQ(execution.setInput(LSTMCell::kActivationParam, &activation_, sizeof(activation_)),
                   Result::NO_ERROR);
         ASSERT_EQ(execution.setInput(LSTMCell::kCellClipParam, &cell_clip_, sizeof(cell_clip_)),
                   Result::NO_ERROR);
         ASSERT_EQ(execution.setInput(LSTMCell::kProjClipParam, &proj_clip_, sizeof(proj_clip_)),
                   Result::NO_ERROR);

         ASSERT_EQ(execution.compute(), Result::NO_ERROR);
     }

    private:
     Model model_;
     // Execution execution_;
     const uint32_t n_input_;
     const uint32_t n_output_;

     const bool use_cifg_;
     const bool use_peephole_;
     const bool use_projection_weights_;
     const bool use_projection_bias_;

     const int activation_;
     const float cell_clip_;
     const float proj_clip_;

 #define DefineTensor(X) std::vector<float> X##_;

     FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineTensor);
     FOR_ALL_LAYER_NORM_WEIGHTS(DefineTensor);
     FOR_ALL_OUTPUT_TENSORS(DefineTensor);

 #undef DefineTensor
 };

 TEST(LSTMOpTest, LayerNormNoCifgPeepholeProjectionNoClipping) {
     const int n_batch = 2;
     const int n_input = 5;
     // n_cell and n_output have the same size when there is no projection.
     const int n_cell = 4;
     const int n_output = 3;

     LayerNormLSTMOpModel lstm(n_batch, n_input, n_cell, n_output,
                               /*use_cifg=*/false, /*use_peephole=*/true,
                               /*use_projection_weights=*/true,
                               /*use_projection_bias=*/false,
                               /*cell_clip=*/0.0, /*proj_clip=*/0.0,
                               {
                                       {n_batch, n_input},  // input tensor

                                       {n_cell, n_input},  // input_to_input_weight tensor
                                       {n_cell, n_input},  // input_to_forget_weight tensor
                                       {n_cell, n_input},  // input_to_cell_weight tensor
                                       {n_cell, n_input},  // input_to_output_weight tensor

                                       {n_cell, n_output},  // recurrent_to_input_weight tensor
                                       {n_cell, n_output},  // recurrent_to_forget_weight tensor
                                       {n_cell, n_output},  // recurrent_to_cell_weight tensor
                                       {n_cell, n_output},  // recurrent_to_output_weight tensor

                                       {n_cell},  // cell_to_input_weight tensor
                                       {n_cell},  // cell_to_forget_weight tensor
                                       {n_cell},  // cell_to_output_weight tensor

                                       {n_cell},  // input_gate_bias tensor
                                       {n_cell},  // forget_gate_bias tensor
                                       {n_cell},  // cell_bias tensor
                                       {n_cell},  // output_gate_bias tensor

                                       {n_output, n_cell},  // projection_weight tensor
                                       {0},                 // projection_bias tensor

                                       {n_batch, n_output},  // output_state_in tensor
                                       {n_batch, n_cell},    // cell_state_in tensor

                                       {n_cell},  // input_layer_norm_weights tensor
                                       {n_cell},  // forget_layer_norm_weights tensor
                                       {n_cell},  // cell_layer_norm_weights tensor
                                       {n_cell},  // output_layer_norm_weights tensor
                               });

     lstm.SetInputToInputWeights({0.5,  0.6, 0.7,  -0.8, -0.9, 0.1,  0.2,  0.3,  -0.4, 0.5,
                                  -0.8, 0.7, -0.6, 0.5,  -0.4, -0.5, -0.4, -0.3, -0.2, -0.1});

     lstm.SetInputToForgetWeights({-0.6, -0.1, 0.3,  0.2,  0.9,  -0.5, -0.2, -0.4, 0.3,  -0.8,
                                   -0.4, 0.3,  -0.5, -0.4, -0.6, 0.3,  -0.4, -0.6, -0.5, -0.5});

     lstm.SetInputToCellWeights({-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6,
                                 0.6,  -0.1, -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8,  0.6});

     lstm.SetInputToOutputWeights({-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2,
                                   0.6,  -0.2, 0.4,  -0.7, -0.3, -0.5, 0.1, 0.5,  -0.6, -0.4});

     lstm.SetInputGateBias({0.03, 0.15, 0.22, 0.38});

     lstm.SetForgetGateBias({0.1, -0.3, -0.2, 0.1});

     lstm.SetCellGateBias({-0.05, 0.72, 0.25, 0.08});

     lstm.SetOutputGateBias({0.05, -0.01, 0.2, 0.1});

     lstm.SetRecurrentToInputWeights(
             {-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6});

     lstm.SetRecurrentToCellWeights(
             {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2});

     lstm.SetRecurrentToForgetWeights(
             {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2});

     lstm.SetRecurrentToOutputWeights(
             {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2});

     lstm.SetCellToInputWeights({0.05, 0.1, 0.25, 0.15});
     lstm.SetCellToForgetWeights({-0.02, -0.15, -0.25, -0.03});
     lstm.SetCellToOutputWeights({0.1, -0.1, -0.5, 0.05});

     lstm.SetProjectionWeights({-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2});

     lstm.SetInputLayerNormWeights({0.1, 0.2, 0.3, 0.5});
     lstm.SetForgetLayerNormWeights({0.2, 0.2, 0.4, 0.3});
     lstm.SetCellLayerNormWeights({0.7, 0.2, 0.3, 0.8});
     lstm.SetOutputLayerNormWeights({0.6, 0.2, 0.2, 0.5});

     const std::vector<std::vector<float>> lstm_input = {
             {                           // Batch0: 3 (input_sequence_size) * 5 (n_input)
              0.7, 0.8, 0.1, 0.2, 0.3,   // seq 0
              0.8, 0.1, 0.2, 0.4, 0.5,   // seq 1
              0.2, 0.7, 0.7, 0.1, 0.7},  // seq 2

             {                           // Batch1: 3 (input_sequence_size) * 5 (n_input)
              0.3, 0.2, 0.9, 0.8, 0.1,   // seq 0
              0.1, 0.5, 0.2, 0.4, 0.2,   // seq 1
              0.6, 0.9, 0.2, 0.5, 0.7},  // seq 2
     };

     const std::vector<std::vector<float>> lstm_golden_output = {
             {
                     // Batch0: 3 (input_sequence_size) * 3 (n_output)
                     0.0244077, 0.128027, -0.00170918,  // seq 0
                     0.0137642, 0.140751, 0.0395835,    // seq 1
                     -0.00459231, 0.155278, 0.0837377,  // seq 2
             },
             {
                     // Batch1: 3 (input_sequence_size) * 3 (n_output)
                     -0.00692428, 0.0848741, 0.063445,  // seq 0
                     -0.00403912, 0.139963, 0.072681,   // seq 1
                     0.00752706, 0.161903, 0.0561371,   // seq 2
             }};

     // Resetting cell_state and output_state
     lstm.ResetCellState();
     lstm.ResetOutputState();

     const int input_sequence_size = lstm_input[0].size() / n_input;
     for (int i = 0; i < input_sequence_size; i++) {
         for (int b = 0; b < n_batch; ++b) {
             const float* batch_start = lstm_input[b].data() + i * n_input;
             const float* batch_end = batch_start + n_input;

             lstm.SetInput(b * n_input, batch_start, batch_end);
         }

         lstm.Invoke();

         std::vector<float> expected;
         for (int b = 0; b < n_batch; ++b) {
             const float* golden_start = lstm_golden_output[b].data() + i * n_output;
             const float* golden_end = golden_start + n_output;
             expected.insert(expected.end(), golden_start, golden_end);
         }
         EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
     }
 }

 }  // namespace wrapper
 }  // namespace nn
 }  // namespace android
	/*
	* Copyright (C) 2017 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <android-base/logging.h>
	#include <gmock/gmock.h>
	#include <gtest/gtest.h>

	#include <sstream>
	#include <string>
	#include <vector>

	#include "LSTM.h"
	#include "NeuralNetworksWrapper.h"

	namespace android {
	namespace nn {
	namespace wrapper {

	using ::testing::Each;
	using ::testing::FloatNear;
	using ::testing::Matcher;

	namespace {

	std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float>& values,
	float max_abs_error = 1.e-6) {
	std::vector<Matcher<float>> matchers;
	matchers.reserve(values.size());
	for (const float& v : values) {
	matchers.emplace_back(FloatNear(v, max_abs_error));
	}
	return matchers;
	}

	} // anonymous namespace

	#define FOR_ALL_INPUT_AND_WEIGHT_TENSORS(ACTION) \
	ACTION(Input) \
	ACTION(InputToInputWeights) \
	ACTION(InputToCellWeights) \
	ACTION(InputToForgetWeights) \
	ACTION(InputToOutputWeights) \
	ACTION(RecurrentToInputWeights) \
	ACTION(RecurrentToCellWeights) \
	ACTION(RecurrentToForgetWeights) \
	ACTION(RecurrentToOutputWeights) \
	ACTION(CellToInputWeights) \
	ACTION(CellToForgetWeights) \
	ACTION(CellToOutputWeights) \
	ACTION(InputGateBias) \
	ACTION(CellGateBias) \
	ACTION(ForgetGateBias) \
	ACTION(OutputGateBias) \
	ACTION(ProjectionWeights) \
	ACTION(ProjectionBias) \
	ACTION(OutputStateIn) \
	ACTION(CellStateIn)

	#define FOR_ALL_LAYER_NORM_WEIGHTS(ACTION) \
	ACTION(InputLayerNormWeights) \
	ACTION(ForgetLayerNormWeights) \
	ACTION(CellLayerNormWeights) \
	ACTION(OutputLayerNormWeights)

	// For all output and intermediate states
	#define FOR_ALL_OUTPUT_TENSORS(ACTION) \
	ACTION(ScratchBuffer) \
	ACTION(OutputStateOut) \
	ACTION(CellStateOut) \
	ACTION(Output)

	class LayerNormLSTMOpModel {
	public:
	LayerNormLSTMOpModel(uint32_t n_batch, uint32_t n_input, uint32_t n_cell, uint32_t n_output,
	bool use_cifg, bool use_peephole, bool use_projection_weights,
	bool use_projection_bias, float cell_clip, float proj_clip,
	const std::vector<std::vector<uint32_t>>& input_shapes0)
	: n_input_(n_input),
	n_output_(n_output),
	use_cifg_(use_cifg),
	use_peephole_(use_peephole),
	use_projection_weights_(use_projection_weights),
	use_projection_bias_(use_projection_bias),
	activation_(ActivationFn::kActivationTanh),
	cell_clip_(cell_clip),
	proj_clip_(proj_clip) {
	std::vector<uint32_t> inputs;
	std::vector<std::vector<uint32_t>> input_shapes(input_shapes0);

	auto it = input_shapes.begin();

	// Input and weights
	#define AddInput(X) \
	CHECK(it != input_shapes.end()); \
	OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it++); \
	inputs.push_back(model_.addOperand(&X##OpndTy));

	FOR_ALL_INPUT_AND_WEIGHT_TENSORS(AddInput);

	// Parameters
	OperandType ActivationOpndTy(Type::INT32, {});
	inputs.push_back(model_.addOperand(&ActivationOpndTy));
	OperandType CellClipOpndTy(Type::FLOAT32, {});
	inputs.push_back(model_.addOperand(&CellClipOpndTy));
	OperandType ProjClipOpndTy(Type::FLOAT32, {});
	inputs.push_back(model_.addOperand(&ProjClipOpndTy));

	FOR_ALL_LAYER_NORM_WEIGHTS(AddInput);

	#undef AddOperand

	// Output and other intermediate state
	std::vector<std::vector<uint32_t>> output_shapes{
	{n_batch, n_cell * (use_cifg ? 3 : 4)},
	{n_batch, n_output},
	{n_batch, n_cell},
	{n_batch, n_output},
	};
	std::vector<uint32_t> outputs;

	auto it2 = output_shapes.begin();

	#define AddOutput(X) \
	CHECK(it2 != output_shapes.end()); \
	OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it2++); \
	outputs.push_back(model_.addOperand(&X##OpndTy));

	FOR_ALL_OUTPUT_TENSORS(AddOutput);

	#undef AddOutput

	model_.addOperation(ANEURALNETWORKS_LSTM, inputs, outputs);
	model_.identifyInputsAndOutputs(inputs, outputs);

	Input_.insert(Input_.end(), n_batch * n_input, 0.f);
	OutputStateIn_.insert(OutputStateIn_.end(), n_batch * n_output, 0.f);
	CellStateIn_.insert(CellStateIn_.end(), n_batch * n_cell, 0.f);

	auto multiAll = [](const std::vector<uint32_t>& dims) -> uint32_t {
	uint32_t sz = 1;
	for (uint32_t d : dims) {
	sz *= d;
	}
	return sz;
	};

	it2 = output_shapes.begin();

	#define ReserveOutput(X) X##_.insert(X##_.end(), multiAll(*it2++), 0.f);

	FOR_ALL_OUTPUT_TENSORS(ReserveOutput);

	#undef ReserveOutput

	model_.finish();
	}

	#define DefineSetter(X) \
	void Set##X(const std::vector<float>& f) { X##_.insert(X##_.end(), f.begin(), f.end()); }

	FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineSetter);
	FOR_ALL_LAYER_NORM_WEIGHTS(DefineSetter);

	#undef DefineSetter

	void ResetOutputState() {
	std::fill(OutputStateIn_.begin(), OutputStateIn_.end(), 0.f);
	std::fill(OutputStateOut_.begin(), OutputStateOut_.end(), 0.f);
	}

	void ResetCellState() {
	std::fill(CellStateIn_.begin(), CellStateIn_.end(), 0.f);
	std::fill(CellStateOut_.begin(), CellStateOut_.end(), 0.f);
	}

	void SetInput(int offset, const float* begin, const float* end) {
	for (; begin != end; begin++, offset++) {
	Input_[offset] = *begin;
	}
	}

	uint32_t num_inputs() const { return n_input_; }
	uint32_t num_outputs() const { return n_output_; }

	const std::vector<float>& GetOutput() const { return Output_; }

	void Invoke() {
	ASSERT_TRUE(model_.isValid());

	OutputStateIn_.swap(OutputStateOut_);
	CellStateIn_.swap(CellStateOut_);

	Compilation compilation(&model_);
	compilation.finish();
	Execution execution(&compilation);
	#define SetInputOrWeight(X) \
	ASSERT_EQ( \
	execution.setInput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
	Result::NO_ERROR);

	FOR_ALL_INPUT_AND_WEIGHT_TENSORS(SetInputOrWeight);
	FOR_ALL_LAYER_NORM_WEIGHTS(SetInputOrWeight);

	#undef SetInputOrWeight

	#define SetOutput(X) \
	ASSERT_EQ( \
	execution.setOutput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
	Result::NO_ERROR);

	FOR_ALL_OUTPUT_TENSORS(SetOutput);

	#undef SetOutput

	if (use_cifg_) {
	execution.setInput(LSTMCell::kInputToInputWeightsTensor, nullptr, 0);
	execution.setInput(LSTMCell::kRecurrentToInputWeightsTensor, nullptr, 0);
	}

	if (use_peephole_) {
	if (use_cifg_) {
	execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
	}
	} else {
	execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
	execution.setInput(LSTMCell::kCellToForgetWeightsTensor, nullptr, 0);
	execution.setInput(LSTMCell::kCellToOutputWeightsTensor, nullptr, 0);
	}

	if (use_projection_weights_) {
	if (!use_projection_bias_) {
	execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
	}
	} else {
	execution.setInput(LSTMCell::kProjectionWeightsTensor, nullptr, 0);
	execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
	}

	ASSERT_EQ(execution.setInput(LSTMCell::kActivationParam, &activation_, sizeof(activation_)),
	Result::NO_ERROR);
	ASSERT_EQ(execution.setInput(LSTMCell::kCellClipParam, &cell_clip_, sizeof(cell_clip_)),
	Result::NO_ERROR);
	ASSERT_EQ(execution.setInput(LSTMCell::kProjClipParam, &proj_clip_, sizeof(proj_clip_)),
	Result::NO_ERROR);

	ASSERT_EQ(execution.compute(), Result::NO_ERROR);
	}

	private:
	Model model_;
	// Execution execution_;
	const uint32_t n_input_;
	const uint32_t n_output_;

	const bool use_cifg_;
	const bool use_peephole_;
	const bool use_projection_weights_;
	const bool use_projection_bias_;

	const int activation_;
	const float cell_clip_;
	const float proj_clip_;

	#define DefineTensor(X) std::vector<float> X##_;

	FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineTensor);
	FOR_ALL_LAYER_NORM_WEIGHTS(DefineTensor);
	FOR_ALL_OUTPUT_TENSORS(DefineTensor);

	#undef DefineTensor
	};

	TEST(LSTMOpTest, LayerNormNoCifgPeepholeProjectionNoClipping) {
	const int n_batch = 2;
	const int n_input = 5;
	// n_cell and n_output have the same size when there is no projection.
	const int n_cell = 4;
	const int n_output = 3;

	LayerNormLSTMOpModel lstm(n_batch, n_input, n_cell, n_output,
	/use_cifg=/false, /use_peephole=/true,
	/use_projection_weights=/true,
	/use_projection_bias=/false,
	/cell_clip=/0.0, /proj_clip=/0.0,
	{
	{n_batch, n_input}, // input tensor

	{n_cell, n_input}, // input_to_input_weight tensor
	{n_cell, n_input}, // input_to_forget_weight tensor
	{n_cell, n_input}, // input_to_cell_weight tensor
	{n_cell, n_input}, // input_to_output_weight tensor

	{n_cell, n_output}, // recurrent_to_input_weight tensor
	{n_cell, n_output}, // recurrent_to_forget_weight tensor
	{n_cell, n_output}, // recurrent_to_cell_weight tensor
	{n_cell, n_output}, // recurrent_to_output_weight tensor

	{n_cell}, // cell_to_input_weight tensor
	{n_cell}, // cell_to_forget_weight tensor
	{n_cell}, // cell_to_output_weight tensor

	{n_cell}, // input_gate_bias tensor
	{n_cell}, // forget_gate_bias tensor
	{n_cell}, // cell_bias tensor
	{n_cell}, // output_gate_bias tensor

	{n_output, n_cell}, // projection_weight tensor
	{0}, // projection_bias tensor

	{n_batch, n_output}, // output_state_in tensor
	{n_batch, n_cell}, // cell_state_in tensor

	{n_cell}, // input_layer_norm_weights tensor
	{n_cell}, // forget_layer_norm_weights tensor
	{n_cell}, // cell_layer_norm_weights tensor
	{n_cell}, // output_layer_norm_weights tensor
	});

	lstm.SetInputToInputWeights({0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5,
	-0.8, 0.7, -0.6, 0.5, -0.4, -0.5, -0.4, -0.3, -0.2, -0.1});

	lstm.SetInputToForgetWeights({-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8,
	-0.4, 0.3, -0.5, -0.4, -0.6, 0.3, -0.4, -0.6, -0.5, -0.5});

	lstm.SetInputToCellWeights({-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6,
	0.6, -0.1, -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8, 0.6});

	lstm.SetInputToOutputWeights({-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2,
	0.6, -0.2, 0.4, -0.7, -0.3, -0.5, 0.1, 0.5, -0.6, -0.4});

	lstm.SetInputGateBias({0.03, 0.15, 0.22, 0.38});

	lstm.SetForgetGateBias({0.1, -0.3, -0.2, 0.1});

	lstm.SetCellGateBias({-0.05, 0.72, 0.25, 0.08});

	lstm.SetOutputGateBias({0.05, -0.01, 0.2, 0.1});

	lstm.SetRecurrentToInputWeights(
	{-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6});

	lstm.SetRecurrentToCellWeights(
	{-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2});

	lstm.SetRecurrentToForgetWeights(
	{-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2});

	lstm.SetRecurrentToOutputWeights(
	{0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2});

	lstm.SetCellToInputWeights({0.05, 0.1, 0.25, 0.15});
	lstm.SetCellToForgetWeights({-0.02, -0.15, -0.25, -0.03});
	lstm.SetCellToOutputWeights({0.1, -0.1, -0.5, 0.05});

	lstm.SetProjectionWeights({-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2});

	lstm.SetInputLayerNormWeights({0.1, 0.2, 0.3, 0.5});
	lstm.SetForgetLayerNormWeights({0.2, 0.2, 0.4, 0.3});
	lstm.SetCellLayerNormWeights({0.7, 0.2, 0.3, 0.8});
	lstm.SetOutputLayerNormWeights({0.6, 0.2, 0.2, 0.5});

	const std::vector<std::vector<float>> lstm_input = {
	{ // Batch0: 3 (input_sequence_size) * 5 (n_input)
	0.7, 0.8, 0.1, 0.2, 0.3, // seq 0
	0.8, 0.1, 0.2, 0.4, 0.5, // seq 1
	0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2

	{ // Batch1: 3 (input_sequence_size) * 5 (n_input)
	0.3, 0.2, 0.9, 0.8, 0.1, // seq 0
	0.1, 0.5, 0.2, 0.4, 0.2, // seq 1
	0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2
	};

	const std::vector<std::vector<float>> lstm_golden_output = {
	{
	// Batch0: 3 (input_sequence_size) * 3 (n_output)
	0.0244077, 0.128027, -0.00170918, // seq 0
	0.0137642, 0.140751, 0.0395835, // seq 1
	-0.00459231, 0.155278, 0.0837377, // seq 2
	},
	{
	// Batch1: 3 (input_sequence_size) * 3 (n_output)
	-0.00692428, 0.0848741, 0.063445, // seq 0
	-0.00403912, 0.139963, 0.072681, // seq 1
	0.00752706, 0.161903, 0.0561371, // seq 2
	}};

	// Resetting cell_state and output_state
	lstm.ResetCellState();
	lstm.ResetOutputState();

	const int input_sequence_size = lstm_input[0].size() / n_input;
	for (int i = 0; i < input_sequence_size; i++) {
	for (int b = 0; b < n_batch; ++b) {
	const float* batch_start = lstm_input[b].data() + i * n_input;
	const float* batch_end = batch_start + n_input;

	lstm.SetInput(b * n_input, batch_start, batch_end);
	}

	lstm.Invoke();

	std::vector<float> expected;
	for (int b = 0; b < n_batch; ++b) {
	const float* golden_start = lstm_golden_output[b].data() + i * n_output;
	const float* golden_end = golden_start + n_output;
	expected.insert(expected.end(), golden_start, golden_end);
	}
	EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
	}
	}

	} // namespace wrapper
	} // namespace nn
	} // namespace android