Add layer normalization support to LSTM op

* Add new test to operations test
* Add new test to validation testing
* Make Prepare and CheckInputTensorDimensions non-static.
    This makes it possible to reuse at preparation the inputs that was
    already read in constructor. This is needed to use upcasting between
    versions that is implemented in the constructor by assigning dummy
    values to the inputs that are non-existent in older versions.

Fix: 113562577
Test: NeuralNetworksTest_static
Test: VtsHalNeuralnetworksV1_2TargetTest
Change-Id: I90676abfcdb3d9a969a1418f8474ce383bf7fb07
Merged-In: I90676abfcdb3d9a969a1418f8474ce383bf7fb07
(cherry picked from commit d24a1bb943a970fbf71b25ac64216a60a16ffc37)
diff --git a/common/Android.bp b/common/Android.bp
index 668d79b..afa4289 100644
--- a/common/Android.bp
+++ b/common/Android.bp
@@ -147,7 +147,9 @@
         "[email protected]",
     ],
     static_libs: [
+        "libbase",
         "libgmock",
+        "liblog",
         "libneuralnetworks_common",
     ],
     cflags: [
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index 6bd2e6a..11613a7 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -1463,26 +1463,20 @@
                 lsh.Eval();
         } break;
         case OperationType::LSTM: {
-            RunTimeOperandInfo &scratch =
-                mOperands[outs[LSTMCell::kScratchBufferTensor]];
-            RunTimeOperandInfo &outputStateOut =
-                mOperands[outs[LSTMCell::kOutputStateOutTensor]];
-            RunTimeOperandInfo &cellStateOut =
-                mOperands[outs[LSTMCell::kCellStateOutTensor]];
-            RunTimeOperandInfo &output =
-                mOperands[outs[LSTMCell::kOutputTensor]];
+            RunTimeOperandInfo& scratch = mOperands[outs[LSTMCell::kScratchBufferTensor]];
+            RunTimeOperandInfo& outputStateOut = mOperands[outs[LSTMCell::kOutputStateOutTensor]];
+            RunTimeOperandInfo& cellStateOut = mOperands[outs[LSTMCell::kCellStateOutTensor]];
+            RunTimeOperandInfo& output = mOperands[outs[LSTMCell::kOutputTensor]];
 
             Shape scratchShape, outputStateShape, cellStateShape, outputShape;
             LSTMCell lstm_cell(operation, mOperands);
 
-            success = LSTMCell::Prepare(operation, mOperands,
-                                        &scratchShape, &outputStateShape,
+            success = lstm_cell.Prepare(operation, mOperands, &scratchShape, &outputStateShape,
                                         &cellStateShape, &outputShape) &&
-                setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
-                setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
-                setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
-                setInfoAndAllocateIfNeeded(&output, outputShape) &&
-                lstm_cell.Eval();
+                      setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
+                      setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
+                      setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
+                      setInfoAndAllocateIfNeeded(&output, outputShape) && lstm_cell.Eval();
         } break;
         case OperationType::RANDOM_MULTINOMIAL: {
             const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
diff --git a/common/Utils.cpp b/common/Utils.cpp
index d82bb65..d233183 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -1353,38 +1353,48 @@
                                                  outExpectedTypes);
         }
         case ANEURALNETWORKS_LSTM: {
-            if (inputCount != 23 || outputCount != 4) {
-                logInvalidInOutNumber(23, 4);
+            std::vector<OperandType> inExpectedTypes;
+            std::vector<OperandType> outExpectedTypes;
+            if (inputCount == 23 && outputCount == 4) {
+                inExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::INT32,          OperandType::FLOAT32,
+                                   OperandType::FLOAT32};
+                outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                    OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32};
+                *minSupportedHalVersion = HalVersion::V1_0;
+            } else if (inputCount == 27 && outputCount == 4) {
+                inExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::INT32,          OperandType::FLOAT32,
+                                   OperandType::FLOAT32,        OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                   OperandType::TENSOR_FLOAT32};
+                outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                                    OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32};
+                *minSupportedHalVersion = HalVersion::V1_2;
+            } else {
+                LOG(ERROR) << "Invalid number of input operands (" << inputCount
+                           << ", expected 23 or 27) or output operands (" << outputCount
+                           << ", expected 4) for operation " << kOperationNames[opType];
                 return ANEURALNETWORKS_BAD_DATA;
             }
-            std::vector<OperandType> inExpectedTypes = {OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::INT32,
-                                                        OperandType::FLOAT32,
-                                                        OperandType::FLOAT32};
-            std::vector<OperandType> outExpectedTypes = {OperandType::TENSOR_FLOAT32,
-                                                         OperandType::TENSOR_FLOAT32,
-                                                         OperandType::TENSOR_FLOAT32,
-                                                         OperandType::TENSOR_FLOAT32};
-            *minSupportedHalVersion = HalVersion::V1_0;
             return validateOperationOperandTypes(operands,
                                                  inputCount, inputIndexes,
                                                  inExpectedTypes,
diff --git a/common/operations/LSTM.cpp b/common/operations/LSTM.cpp
index 7667c66..ca9f42b 100644
--- a/common/operations/LSTM.cpp
+++ b/common/operations/LSTM.cpp
@@ -21,6 +21,8 @@
 
 #include "Tracing.h"
 
+// TODO(levp): Format the file.
+// clang-format off
 namespace android {
 namespace nn {
 
@@ -73,6 +75,24 @@
   params_.cell_clip_ = getScalarData<float>(*GetInput(operation, operands, kCellClipParam));
   params_.proj_clip_ = getScalarData<float>(*GetInput(operation, operands, kProjClipParam));
 
+  // We check the version of LSTM by checking the number of the inputs to the
+  // op. For LSTM version 1.0 there were 23 inputs and for 1.2 there are 27.
+  if (operation.inputs.size() == 27) {
+    input_layer_norm_weights_ = GetInput(operation, operands, kInputLayerNormWeightsTensor);
+    forget_layer_norm_weights_ = GetInput(operation, operands, kForgetLayerNormWeightsTensor);
+    cell_layer_norm_weights_ = GetInput(operation, operands, kCellLayerNormWeightsTensor);
+    output_layer_norm_weights_ = GetInput(operation, operands, kOutputLayerNormWeightsTensor);
+  } else {
+    // For LSTM from HAL v1.0 assign operands with no values
+    static RunTimeOperandInfo no_value;
+    no_value.lifetime = OperandLifeTime::NO_VALUE;
+
+    input_layer_norm_weights_ = &no_value;
+    forget_layer_norm_weights_ = &no_value;
+    cell_layer_norm_weights_ = &no_value;
+    output_layer_norm_weights_ = &no_value;
+  }
+
   output_state_out_ = GetOutput(operation, operands, kOutputStateOutTensor);
   cell_state_out_ = GetOutput(operation, operands, kCellStateOutTensor);
   output_ = GetOutput(operation, operands, kOutputTensor);
@@ -96,125 +116,95 @@
   NN_CHECK(params.cell_clip_ >= 0);
   NN_CHECK(params.proj_clip_ >= 0);
 
-  const RunTimeOperandInfo *input_to_input_weights =
-      GetInput(operation, operands, LSTMCell::kInputToInputWeightsTensor);
-  if (!IsNullInput(input_to_input_weights)) {
-    NN_CHECK_EQ(NumDimensions(input_to_input_weights), 2);
-    NN_CHECK_EQ(SizeOfDimension(input_to_input_weights, 0), n_cell);
-    NN_CHECK_EQ(SizeOfDimension(input_to_input_weights, 1), n_input);
+  if (!IsNullInput(input_to_input_weights_)) {
+    NN_CHECK_EQ(NumDimensions(input_to_input_weights_), 2);
+    NN_CHECK_EQ(SizeOfDimension(input_to_input_weights_, 0), n_cell);
+    NN_CHECK_EQ(SizeOfDimension(input_to_input_weights_, 1), n_input);
   }
 
-  const RunTimeOperandInfo *input_to_forget_weights =
-      GetInput(operation, operands, LSTMCell::kInputToForgetWeightsTensor);
-  NN_CHECK_EQ(NumDimensions(input_to_forget_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(input_to_forget_weights, 0), n_cell);
-  NN_CHECK_EQ(SizeOfDimension(input_to_forget_weights, 1), n_input);
+  NN_CHECK_EQ(NumDimensions(input_to_forget_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(input_to_forget_weights_, 0), n_cell);
+  NN_CHECK_EQ(SizeOfDimension(input_to_forget_weights_, 1), n_input);
 
-  const RunTimeOperandInfo *input_to_cell_weights =
-      GetInput(operation, operands, LSTMCell::kInputToCellWeightsTensor);
-  NN_CHECK_EQ(NumDimensions(input_to_cell_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(input_to_cell_weights, 0), n_cell);
-  NN_CHECK_EQ(SizeOfDimension(input_to_cell_weights, 1), n_input);
+  NN_CHECK_EQ(NumDimensions(input_to_cell_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(input_to_cell_weights_, 0), n_cell);
+  NN_CHECK_EQ(SizeOfDimension(input_to_cell_weights_, 1), n_input);
 
-  const RunTimeOperandInfo *recurrent_to_input_weights =
-      GetInput(operation, operands, LSTMCell::kRecurrentToInputWeightsTensor);
-  if (!IsNullInput(recurrent_to_input_weights)) {
-    NN_CHECK_EQ(NumDimensions(recurrent_to_input_weights), 2);
-    NN_CHECK_EQ(SizeOfDimension(recurrent_to_input_weights, 0), n_cell);
-    NN_CHECK_EQ(SizeOfDimension(recurrent_to_input_weights, 1), n_output);
+  if (!IsNullInput(recurrent_to_input_weights_)) {
+    NN_CHECK_EQ(NumDimensions(recurrent_to_input_weights_), 2);
+    NN_CHECK_EQ(SizeOfDimension(recurrent_to_input_weights_, 0), n_cell);
+    NN_CHECK_EQ(SizeOfDimension(recurrent_to_input_weights_, 1), n_output);
   }
 
-  const RunTimeOperandInfo *recurrent_to_forget_weights =
-      GetInput(operation, operands, LSTMCell::kRecurrentToForgetWeightsTensor);
-  NN_CHECK_EQ(NumDimensions(recurrent_to_forget_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(recurrent_to_forget_weights, 0), n_cell);
-  NN_CHECK_EQ(SizeOfDimension(recurrent_to_forget_weights, 1), n_output);
+  NN_CHECK_EQ(NumDimensions(recurrent_to_forget_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(recurrent_to_forget_weights_, 0), n_cell);
+  NN_CHECK_EQ(SizeOfDimension(recurrent_to_forget_weights_, 1), n_output);
 
-  const RunTimeOperandInfo *recurrent_to_cell_weights =
-      GetInput(operation, operands, LSTMCell::kRecurrentToCellWeightsTensor);
-  NN_CHECK_EQ(NumDimensions(recurrent_to_cell_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(recurrent_to_cell_weights, 0), n_cell);
-  NN_CHECK_EQ(SizeOfDimension(recurrent_to_cell_weights, 1), n_output);
+  NN_CHECK_EQ(NumDimensions(recurrent_to_cell_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(recurrent_to_cell_weights_, 0), n_cell);
+  NN_CHECK_EQ(SizeOfDimension(recurrent_to_cell_weights_, 1), n_output);
 
   // We make sure the input-gate's parameters are either both present (regular
   // LSTM) or not at all (CIFG-LSTM).
   const bool cifg_weights_all_or_none =
-      (!IsNullInput(input_to_input_weights) &&
-       !IsNullInput(recurrent_to_input_weights)) ||
-      (IsNullInput(input_to_input_weights) &&
-       IsNullInput(recurrent_to_input_weights));
+      (!IsNullInput(input_to_input_weights_) &&
+       !IsNullInput(recurrent_to_input_weights_)) ||
+      (IsNullInput(input_to_input_weights_) &&
+       IsNullInput(recurrent_to_input_weights_));
   NN_CHECK(cifg_weights_all_or_none);
 
-  const RunTimeOperandInfo *cell_to_input_weights =
-      GetInput(operation, operands, LSTMCell::kCellToInputWeightsTensor);
-  if (!IsNullInput(cell_to_input_weights)) {
-    NN_CHECK_EQ(NumDimensions(cell_to_input_weights), 1);
-    NN_CHECK_EQ(SizeOfDimension(cell_to_input_weights, 0), n_cell);
+  if (!IsNullInput(cell_to_input_weights_)) {
+    NN_CHECK_EQ(NumDimensions(cell_to_input_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(cell_to_input_weights_, 0), n_cell);
   }
 
-  const RunTimeOperandInfo *cell_to_forget_weights =
-      GetInput(operation, operands, LSTMCell::kCellToForgetWeightsTensor);
-  if (!IsNullInput(cell_to_forget_weights)) {
-    NN_CHECK_EQ(NumDimensions(cell_to_forget_weights), 1);
-    NN_CHECK_EQ(SizeOfDimension(cell_to_forget_weights, 0), n_cell);
+  if (!IsNullInput(cell_to_forget_weights_)) {
+    NN_CHECK_EQ(NumDimensions(cell_to_forget_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(cell_to_forget_weights_, 0), n_cell);
   }
 
-  const RunTimeOperandInfo *cell_to_output_weights =
-      GetInput(operation, operands, LSTMCell::kCellToOutputWeightsTensor);
-  if (!IsNullInput(cell_to_output_weights)) {
-    NN_CHECK_EQ(NumDimensions(cell_to_output_weights), 1);
-    NN_CHECK_EQ(SizeOfDimension(cell_to_output_weights, 0), n_cell);
+  if (!IsNullInput(cell_to_output_weights_)) {
+    NN_CHECK_EQ(NumDimensions(cell_to_output_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(cell_to_output_weights_, 0), n_cell);
   }
 
   // Making sure the peephole weights are there all or none.
-  const bool use_cifg = IsNullInput(input_to_input_weights);
+  const bool use_cifg = IsNullInput(input_to_input_weights_);
   const bool peephole_weights_all_or_none =
-      ((!IsNullInput(cell_to_input_weights) || use_cifg) &&
-       !IsNullInput(cell_to_forget_weights) &&
-       !IsNullInput(cell_to_output_weights)) ||
-      (IsNullInput(cell_to_input_weights) &&
-       IsNullInput(cell_to_forget_weights) &&
-       IsNullInput(cell_to_output_weights));
+      ((!IsNullInput(cell_to_input_weights_) || use_cifg) &&
+       !IsNullInput(cell_to_forget_weights_) &&
+       !IsNullInput(cell_to_output_weights_)) ||
+      (IsNullInput(cell_to_input_weights_) &&
+       IsNullInput(cell_to_forget_weights_) &&
+       IsNullInput(cell_to_output_weights_));
   NN_CHECK(peephole_weights_all_or_none);
 
   // Make sure the input gate bias is present only when not a CIFG-LSTM.
-  const RunTimeOperandInfo* input_gate_bias =
-      GetInput(operation, operands, LSTMCell::kInputGateBiasTensor);
   if (use_cifg) {
-    NN_CHECK(IsNullInput(input_gate_bias));
+    NN_CHECK(IsNullInput(input_gate_bias_));
   } else {
-    NN_CHECK_EQ(NumDimensions(input_gate_bias), 1);
-    NN_CHECK_EQ(SizeOfDimension(input_gate_bias, 0), n_cell);
+    NN_CHECK_EQ(NumDimensions(input_gate_bias_), 1);
+    NN_CHECK_EQ(SizeOfDimension(input_gate_bias_, 0), n_cell);
   }
 
-  const RunTimeOperandInfo *forget_gate_bias =
-      GetInput(operation, operands, LSTMCell::kForgetGateBiasTensor);
-  NN_CHECK_EQ(NumDimensions(forget_gate_bias), 1);
-  NN_CHECK_EQ(SizeOfDimension(forget_gate_bias, 0), n_cell);
+  NN_CHECK_EQ(NumDimensions(forget_gate_bias_), 1);
+  NN_CHECK_EQ(SizeOfDimension(forget_gate_bias_, 0), n_cell);
 
-  const RunTimeOperandInfo *cell_bias =
-      GetInput(operation, operands, LSTMCell::kCellGateBiasTensor);
-  NN_CHECK_EQ(NumDimensions(cell_bias), 1);
-  NN_CHECK_EQ(SizeOfDimension(cell_bias, 0), n_cell);
+  NN_CHECK_EQ(NumDimensions(cell_bias_), 1);
+  NN_CHECK_EQ(SizeOfDimension(cell_bias_, 0), n_cell);
 
-  const RunTimeOperandInfo *output_gate_bias =
-      GetInput(operation, operands, LSTMCell::kOutputGateBiasTensor);
-  NN_CHECK_EQ(NumDimensions(output_gate_bias), 1);
-  NN_CHECK_EQ(SizeOfDimension(output_gate_bias, 0), n_cell);
+  NN_CHECK_EQ(NumDimensions(output_gate_bias_), 1);
+  NN_CHECK_EQ(SizeOfDimension(output_gate_bias_, 0), n_cell);
 
-  const RunTimeOperandInfo *projection_weights =
-      GetInput(operation, operands, LSTMCell::kProjectionWeightsTensor);
-  if (!IsNullInput(projection_weights)) {
-    NN_CHECK_EQ(NumDimensions(projection_weights), 2);
-    NN_CHECK_EQ(SizeOfDimension(projection_weights, 0), n_output);
-    NN_CHECK_EQ(SizeOfDimension(projection_weights, 1), n_cell);
+  if (!IsNullInput(projection_weights_)) {
+    NN_CHECK_EQ(NumDimensions(projection_weights_), 2);
+    NN_CHECK_EQ(SizeOfDimension(projection_weights_, 0), n_output);
+    NN_CHECK_EQ(SizeOfDimension(projection_weights_, 1), n_cell);
   }
 
-  const RunTimeOperandInfo *projection_bias =
-      GetInput(operation, operands, LSTMCell::kProjectionBiasTensor);
-  if (!IsNullInput(projection_bias)) {
-    NN_CHECK_EQ(NumDimensions(projection_bias), 1);
-    NN_CHECK_EQ(SizeOfDimension(projection_bias, 0), n_output);
+  if (!IsNullInput(projection_bias_)) {
+    NN_CHECK_EQ(NumDimensions(projection_bias_), 1);
+    NN_CHECK_EQ(SizeOfDimension(projection_bias_, 0), n_output);
   }
 
   // Making sure the projection tensors are consistent:
@@ -223,9 +213,37 @@
   // 2) If projection weight is present, then projection bias is optional.
   // TODO: make sure this is correct.
   const bool projecton_tensors_consistent =
-      (!IsNullInput(projection_weights) || IsNullInput(projection_bias));
+      (!IsNullInput(projection_weights_) || IsNullInput(projection_bias_));
   NN_CHECK(projecton_tensors_consistent == true);
 
+  if (!IsNullInput(input_layer_norm_weights_)) {
+    NN_CHECK_EQ(NumDimensions(input_layer_norm_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(input_layer_norm_weights_, 0), n_cell);
+  }
+  if (!IsNullInput(forget_layer_norm_weights_)) {
+    NN_CHECK_EQ(NumDimensions(forget_layer_norm_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(forget_layer_norm_weights_, 0), n_cell);
+  }
+  if (!IsNullInput(cell_layer_norm_weights_)) {
+    NN_CHECK_EQ(NumDimensions(cell_layer_norm_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(cell_layer_norm_weights_, 0), n_cell);
+  }
+  if (!IsNullInput(output_layer_norm_weights_)) {
+    NN_CHECK_EQ(NumDimensions(output_layer_norm_weights_), 1);
+    NN_CHECK_EQ(SizeOfDimension(output_layer_norm_weights_, 0), n_cell);
+  }
+
+  const bool layer_norm_weights_all_or_none =
+      (IsNullInput(input_layer_norm_weights_) &&
+       IsNullInput(forget_layer_norm_weights_) &&
+       IsNullInput(cell_layer_norm_weights_) &&
+       IsNullInput(input_layer_norm_weights_)) ||
+      (!IsNullInput(input_layer_norm_weights_) &&
+       !IsNullInput(forget_layer_norm_weights_) &&
+       !IsNullInput(cell_layer_norm_weights_) &&
+       !IsNullInput(input_layer_norm_weights_));
+  NN_CHECK(layer_norm_weights_all_or_none);
+
   return true;
 }
 
@@ -237,29 +255,22 @@
                        Shape *outputShape) {
   // Check we have all the inputs and outputs we need.
   NN_CHECK(NumInputsWithValues(operation, operands) >= 15 &&
-           NumInputsWithValues(operation, operands) <= 23);
+           NumInputsWithValues(operation, operands) <= 27);
   NN_CHECK_EQ(NumOutputs(operation), 4);
 
   // Inferring batch size, number of outputs and number of cells from the
   // input tensors.
-  const RunTimeOperandInfo *input =
-      GetInput(operation, operands, LSTMCell::kInputTensor);
-  NN_CHECK(NumDimensions(input) > 1);
-  const uint32_t n_batch = SizeOfDimension(input, 0);
-  const uint32_t n_input = SizeOfDimension(input, 1);
+  NN_CHECK(NumDimensions(input_) > 1);
+  const uint32_t n_batch = SizeOfDimension(input_, 0);
+  const uint32_t n_input = SizeOfDimension(input_, 1);
 
-  const RunTimeOperandInfo *input_to_output_weights =
-      GetInput(operation, operands, LSTMCell::kInputToOutputWeightsTensor);
-  const uint32_t n_cell = SizeOfDimension(input_to_output_weights, 0);
-  NN_CHECK_EQ(NumDimensions(input_to_output_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(input_to_output_weights, 1), n_input);
+  const uint32_t n_cell = SizeOfDimension(input_to_output_weights_, 0);
+  NN_CHECK_EQ(NumDimensions(input_to_output_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(input_to_output_weights_, 1), n_input);
 
-  const RunTimeOperandInfo *recurrent_to_output_weights =
-      GetInput(operation, operands, LSTMCell::kRecurrentToOutputWeightsTensor);
-  NN_CHECK_EQ(NumDimensions(recurrent_to_output_weights), 2);
-  NN_CHECK_EQ(SizeOfDimension(recurrent_to_output_weights, 0),
-                    n_cell);
-  const uint32_t n_output = SizeOfDimension(recurrent_to_output_weights, 1);
+  NN_CHECK_EQ(NumDimensions(recurrent_to_output_weights_), 2);
+  NN_CHECK_EQ(SizeOfDimension(recurrent_to_output_weights_, 0), n_cell);
+  const uint32_t n_output = SizeOfDimension(recurrent_to_output_weights_, 1);
 
   // Check that input tensor dimensions matches with each other.
   if (!CheckInputTensorDimensions(operation, operands, n_input, n_output, n_cell)) {
@@ -267,7 +278,7 @@
   }
 
   // Resize the output and output_state tensors.
-  const Shape &inputShape = input->shape();
+  const Shape &inputShape = input_->shape();
 
   outputShape->type = inputShape.type;
   outputShape->dimensions = { n_batch, n_output };
@@ -284,9 +295,7 @@
   cellStateShape->offset = inputShape.offset;
   cellStateShape->scale = inputShape.scale;
 
-  const RunTimeOperandInfo *input_to_input_weights =
-      GetInput(operation, operands, LSTMCell::kInputToInputWeightsTensor);
-  const bool use_cifg = IsNullInput(input_to_input_weights);
+  const bool use_cifg = IsNullInput(input_to_input_weights_);
   if (use_cifg) {
     // Reserving space for Cell, Forget, Output gates
     scratchShape->dimensions = { n_batch, n_cell * 3 };
@@ -312,8 +321,9 @@
 
   // Since we have already checked that weights are all there or none, we can
   // check the existence of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights_->lifetime == OperandLifeTime::NO_VALUE);
-  const bool use_peephole = (cell_to_output_weights_->lifetime != OperandLifeTime::NO_VALUE);
+  const bool use_cifg = IsNullInput(input_to_input_weights_);
+  const bool use_peephole = !IsNullInput(cell_to_output_weights_);
+  const bool use_layer_norm = !IsNullInput(input_layer_norm_weights_);
 
   // Index the scratch buffers pointers to the global scratch buffer.
   float* input_gate_scratch = nullptr;
@@ -331,17 +341,27 @@
     output_gate_scratch = input_gate_scratch + 3 * n_cell * n_batch;
   }
 
-  // Initialize scratch buffers with bias.
-  if (!use_cifg) {
-    tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(input_gate_bias_),
-                                                  n_cell, n_batch, input_gate_scratch);
+  if (!use_layer_norm) {
+    // Initialize scratch buffers with bias.
+    if (!use_cifg) {
+      tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(input_gate_bias_),
+                                                    n_cell, n_batch, input_gate_scratch);
+    }
+    tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(forget_gate_bias_),
+                                                  n_cell, n_batch, forget_gate_scratch);
+    tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(cell_bias_),
+                                                  n_cell, n_batch, cell_scratch);
+    tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(output_gate_bias_),
+                                                  n_cell, n_batch, output_gate_scratch);
+  } else {
+    // Initialize scratch buffers with zeroes.
+    if (!use_cifg) {
+      tflite::tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
+    }
+    tflite::tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
+    tflite::tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
+    tflite::tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
   }
-  tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(forget_gate_bias_),
-                                                n_cell, n_batch, forget_gate_scratch);
-  tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(cell_bias_),
-                                                n_cell, n_batch, cell_scratch);
-  tflite::tensor_utils::VectorBatchVectorAssign(GetBuffer<float>(output_gate_bias_),
-                                                n_cell, n_batch, output_gate_scratch);
 
   // For each batch and cell: compute input_weight * input.
   if (!use_cifg) {
@@ -382,6 +402,16 @@
           GetBuffer<float>(cell_to_input_weights_), n_cell,
           GetBuffer<float>(cell_state_in_), n_batch, input_gate_scratch);
     }
+    if (use_layer_norm) {
+      tflite::tensor_utils::MeanStddevNormalization(input_gate_scratch,
+                                                    input_gate_scratch, n_cell, n_batch,
+                                                    kLayerNormEpsilon);
+      tflite::tensor_utils::VectorBatchVectorCwiseProduct(GetBuffer<float>(input_layer_norm_weights_),
+                                                          n_cell, input_gate_scratch,
+                                                          n_batch, input_gate_scratch);
+      tflite::tensor_utils::VectorBatchVectorAdd(GetBuffer<float>(input_gate_bias_), n_cell, n_batch,
+                                                 input_gate_scratch);
+    }
     tflite::tensor_utils::ApplySigmoidToVector(input_gate_scratch,
                                                n_cell * n_batch,
                                                input_gate_scratch);
@@ -393,11 +423,29 @@
         GetBuffer<float>(cell_to_forget_weights_), n_cell,
         GetBuffer<float>(cell_state_in_), n_batch, forget_gate_scratch);
   }
+  if (use_layer_norm) {
+    tflite::tensor_utils::MeanStddevNormalization(forget_gate_scratch,
+                                                  forget_gate_scratch, n_cell, n_batch,
+                                                  kLayerNormEpsilon);
+    tflite::tensor_utils::VectorBatchVectorCwiseProduct(GetBuffer<float>(forget_layer_norm_weights_),
+                                                        n_cell, forget_gate_scratch,
+                                                        n_batch, forget_gate_scratch);
+    tflite::tensor_utils::VectorBatchVectorAdd(GetBuffer<float>(forget_gate_bias_), n_cell, n_batch,
+                                               forget_gate_scratch);
+  }
   tflite::tensor_utils::ApplySigmoidToVector(forget_gate_scratch,
                                              n_cell * n_batch,
                                              forget_gate_scratch);
 
   // For each batch and cell: update the cell.
+  if (use_layer_norm) {
+    tflite::tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
+						  n_batch, kLayerNormEpsilon);
+    tflite::tensor_utils::VectorBatchVectorCwiseProduct(
+	GetBuffer<float>(cell_layer_norm_weights_), n_cell, cell_scratch, n_batch, cell_scratch);
+    tflite::tensor_utils::VectorBatchVectorAdd(GetBuffer<float>(cell_bias_), n_cell, n_batch,
+					       cell_scratch);
+  }
   tflite::tensor_utils::VectorVectorCwiseProduct(
       forget_gate_scratch, GetBuffer<float>(cell_state_in_), n_batch * n_cell,
       GetBuffer<float>(cell_state_out_));
@@ -426,6 +474,16 @@
         GetBuffer<float>(cell_to_output_weights_), n_cell,
         GetBuffer<float>(cell_state_out_), n_batch, output_gate_scratch);
   }
+  if (use_layer_norm) {
+    tflite::tensor_utils::MeanStddevNormalization(output_gate_scratch,
+                                                  output_gate_scratch, n_cell, n_batch,
+                                                  kLayerNormEpsilon);
+    tflite::tensor_utils::VectorBatchVectorCwiseProduct(GetBuffer<float>(output_layer_norm_weights_),
+                                                        n_cell, output_gate_scratch,
+                                                        n_batch, output_gate_scratch);
+    tflite::tensor_utils::VectorBatchVectorAdd(GetBuffer<float>(output_gate_bias_), n_cell, n_batch,
+                                               output_gate_scratch);
+  }
   tflite::tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
                                              output_gate_scratch);
   tflite::tensor_utils::ApplyActivationToVector(GetBuffer<float>(cell_state_out_),
diff --git a/common/operations/LSTM.h b/common/operations/LSTM.h
index 5305e2b..04051aa 100644
--- a/common/operations/LSTM.h
+++ b/common/operations/LSTM.h
@@ -24,6 +24,8 @@
 #include <algorithm>
 #include <cmath>
 
+// TODO(levp): Format the file.
+// clang-format off
 namespace android {
 namespace nn {
 
@@ -41,12 +43,12 @@
   LSTMCell(const Operation &operation,
            std::vector<RunTimeOperandInfo> &operands);
 
-  static bool Prepare(const Operation &operation,
-                      std::vector<RunTimeOperandInfo> &operands,
-                      Shape *scratchShape,
-                      Shape *outputStateShape,
-                      Shape *cellStateShape,
-                      Shape *outputShape);
+  bool Prepare(const Operation &operation,
+               std::vector<RunTimeOperandInfo> &operands,
+               Shape *scratchShape,
+               Shape *outputStateShape,
+               Shape *cellStateShape,
+               Shape *outputShape);
   bool Eval();
 
   // Input Tensors of size {n_batch, n_input}
@@ -87,17 +89,24 @@
   static constexpr int kCellClipParam = 21;
   static constexpr int kProjClipParam = 22;
 
+  // Layer norm weights tensors of size {n_cell}, representing a diagonal matrix.
+  static constexpr int kInputLayerNormWeightsTensor = 23;
+  static constexpr int kForgetLayerNormWeightsTensor = 24;
+  static constexpr int kCellLayerNormWeightsTensor = 25;
+  static constexpr int kOutputLayerNormWeightsTensor = 26;
+
   // Output tensors.
   static constexpr int kScratchBufferTensor = 0;
   static constexpr int kOutputStateOutTensor = 1;
   static constexpr int kCellStateOutTensor = 2;
   static constexpr int kOutputTensor = 3;
 
+  static constexpr float kLayerNormEpsilon = 1e-8;
+
  private:
-  static bool CheckInputTensorDimensions(
-      const Operation &operation,
-      std::vector<RunTimeOperandInfo> &operands, uint32_t n_input,
-      uint32_t n_output, uint32_t n_cell);
+  bool CheckInputTensorDimensions(const Operation& operation,
+                                  std::vector<RunTimeOperandInfo>& operands,
+                                  uint32_t n_input, uint32_t n_output, uint32_t n_cell);
   LSTMParams params_;
 
   const RunTimeOperandInfo *input_;
@@ -127,6 +136,11 @@
   const RunTimeOperandInfo *output_state_in_;
   const RunTimeOperandInfo *cell_state_in_;
 
+  const RunTimeOperandInfo *input_layer_norm_weights_;
+  const RunTimeOperandInfo *forget_layer_norm_weights_;
+  const RunTimeOperandInfo *cell_layer_norm_weights_;
+  const RunTimeOperandInfo *output_layer_norm_weights_;
+
   RunTimeOperandInfo *output_state_out_;
   RunTimeOperandInfo *cell_state_out_;
   RunTimeOperandInfo *output_;
diff --git a/common/operations/LayerNormLSTMTest.cpp b/common/operations/LayerNormLSTMTest.cpp
new file mode 100644
index 0000000..faf7fef
--- /dev/null
+++ b/common/operations/LayerNormLSTMTest.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "LSTM.h"
+
+#include <android-base/logging.h>
+
+#include "NeuralNetworksWrapper.h"
+#include "gmock/gmock-matchers.h"
+#include "gtest/gtest.h"
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace android {
+namespace nn {
+namespace wrapper {
+
+using ::testing::Each;
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+namespace {
+
+std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float>& values,
+                                           float max_abs_error = 1.e-6) {
+    std::vector<Matcher<float>> matchers;
+    matchers.reserve(values.size());
+    for (const float& v : values) {
+        matchers.emplace_back(FloatNear(v, max_abs_error));
+    }
+    return matchers;
+}
+
+}  // anonymous namespace
+
+#define FOR_ALL_INPUT_AND_WEIGHT_TENSORS(ACTION) \
+    ACTION(Input)                                \
+    ACTION(InputToInputWeights)                  \
+    ACTION(InputToCellWeights)                   \
+    ACTION(InputToForgetWeights)                 \
+    ACTION(InputToOutputWeights)                 \
+    ACTION(RecurrentToInputWeights)              \
+    ACTION(RecurrentToCellWeights)               \
+    ACTION(RecurrentToForgetWeights)             \
+    ACTION(RecurrentToOutputWeights)             \
+    ACTION(CellToInputWeights)                   \
+    ACTION(CellToForgetWeights)                  \
+    ACTION(CellToOutputWeights)                  \
+    ACTION(InputGateBias)                        \
+    ACTION(CellGateBias)                         \
+    ACTION(ForgetGateBias)                       \
+    ACTION(OutputGateBias)                       \
+    ACTION(ProjectionWeights)                    \
+    ACTION(ProjectionBias)                       \
+    ACTION(OutputStateIn)                        \
+    ACTION(CellStateIn)
+
+#define FOR_ALL_LAYER_NORM_WEIGHTS(ACTION) \
+    ACTION(InputLayerNormWeights)          \
+    ACTION(ForgetLayerNormWeights)         \
+    ACTION(CellLayerNormWeights)           \
+    ACTION(OutputLayerNormWeights)
+
+// For all output and intermediate states
+#define FOR_ALL_OUTPUT_TENSORS(ACTION) \
+    ACTION(ScratchBuffer)              \
+    ACTION(OutputStateOut)             \
+    ACTION(CellStateOut)               \
+    ACTION(Output)
+
+class LayerNormLSTMOpModel {
+   public:
+    LayerNormLSTMOpModel(uint32_t n_batch, uint32_t n_input, uint32_t n_cell, uint32_t n_output,
+                         bool use_cifg, bool use_peephole, bool use_projection_weights,
+                         bool use_projection_bias, float cell_clip, float proj_clip,
+                         const std::vector<std::vector<uint32_t>>& input_shapes0)
+        : n_input_(n_input),
+          n_output_(n_output),
+          use_cifg_(use_cifg),
+          use_peephole_(use_peephole),
+          use_projection_weights_(use_projection_weights),
+          use_projection_bias_(use_projection_bias),
+          activation_(ActivationFn::kActivationTanh),
+          cell_clip_(cell_clip),
+          proj_clip_(proj_clip) {
+        std::vector<uint32_t> inputs;
+        std::vector<std::vector<uint32_t>> input_shapes(input_shapes0);
+
+        auto it = input_shapes.begin();
+
+        // Input and weights
+#define AddInput(X)                                     \
+    CHECK(it != input_shapes.end());                    \
+    OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it++); \
+    inputs.push_back(model_.addOperand(&X##OpndTy));
+
+        FOR_ALL_INPUT_AND_WEIGHT_TENSORS(AddInput);
+
+        // Parameters
+        OperandType ActivationOpndTy(Type::INT32, {});
+        inputs.push_back(model_.addOperand(&ActivationOpndTy));
+        OperandType CellClipOpndTy(Type::FLOAT32, {});
+        inputs.push_back(model_.addOperand(&CellClipOpndTy));
+        OperandType ProjClipOpndTy(Type::FLOAT32, {});
+        inputs.push_back(model_.addOperand(&ProjClipOpndTy));
+
+        FOR_ALL_LAYER_NORM_WEIGHTS(AddInput);
+
+#undef AddOperand
+
+        // Output and other intermediate state
+        std::vector<std::vector<uint32_t>> output_shapes{
+                {n_batch, n_cell * (use_cifg ? 3 : 4)},
+                {n_batch, n_output},
+                {n_batch, n_cell},
+                {n_batch, n_output},
+        };
+        std::vector<uint32_t> outputs;
+
+        auto it2 = output_shapes.begin();
+
+#define AddOutput(X)                                     \
+    CHECK(it2 != output_shapes.end());                   \
+    OperandType X##OpndTy(Type::TENSOR_FLOAT32, *it2++); \
+    outputs.push_back(model_.addOperand(&X##OpndTy));
+
+        FOR_ALL_OUTPUT_TENSORS(AddOutput);
+
+#undef AddOutput
+
+        model_.addOperation(ANEURALNETWORKS_LSTM, inputs, outputs);
+        model_.identifyInputsAndOutputs(inputs, outputs);
+
+        Input_.insert(Input_.end(), n_batch * n_input, 0.f);
+        OutputStateIn_.insert(OutputStateIn_.end(), n_batch * n_output, 0.f);
+        CellStateIn_.insert(CellStateIn_.end(), n_batch * n_cell, 0.f);
+
+        auto multiAll = [](const std::vector<uint32_t>& dims) -> uint32_t {
+            uint32_t sz = 1;
+            for (uint32_t d : dims) {
+                sz *= d;
+            }
+            return sz;
+        };
+
+        it2 = output_shapes.begin();
+
+#define ReserveOutput(X) X##_.insert(X##_.end(), multiAll(*it2++), 0.f);
+
+        FOR_ALL_OUTPUT_TENSORS(ReserveOutput);
+
+#undef ReserveOutput
+
+        model_.finish();
+    }
+
+#define DefineSetter(X) \
+    void Set##X(const std::vector<float>& f) { X##_.insert(X##_.end(), f.begin(), f.end()); }
+
+    FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineSetter);
+    FOR_ALL_LAYER_NORM_WEIGHTS(DefineSetter);
+
+#undef DefineSetter
+
+    void ResetOutputState() {
+        std::fill(OutputStateIn_.begin(), OutputStateIn_.end(), 0.f);
+        std::fill(OutputStateOut_.begin(), OutputStateOut_.end(), 0.f);
+    }
+
+    void ResetCellState() {
+        std::fill(CellStateIn_.begin(), CellStateIn_.end(), 0.f);
+        std::fill(CellStateOut_.begin(), CellStateOut_.end(), 0.f);
+    }
+
+    void SetInput(int offset, const float* begin, const float* end) {
+        for (; begin != end; begin++, offset++) {
+            Input_[offset] = *begin;
+        }
+    }
+
+    uint32_t num_inputs() const { return n_input_; }
+    uint32_t num_outputs() const { return n_output_; }
+
+    const std::vector<float>& GetOutput() const { return Output_; }
+
+    void Invoke() {
+        ASSERT_TRUE(model_.isValid());
+
+        OutputStateIn_.swap(OutputStateOut_);
+        CellStateIn_.swap(CellStateOut_);
+
+        Compilation compilation(&model_);
+        compilation.finish();
+        Execution execution(&compilation);
+#define SetInputOrWeight(X)                                                                       \
+    ASSERT_EQ(                                                                                    \
+            execution.setInput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
+            Result::NO_ERROR);
+
+        FOR_ALL_INPUT_AND_WEIGHT_TENSORS(SetInputOrWeight);
+        FOR_ALL_LAYER_NORM_WEIGHTS(SetInputOrWeight);
+
+#undef SetInputOrWeight
+
+#define SetOutput(X)                                                                               \
+    ASSERT_EQ(                                                                                     \
+            execution.setOutput(LSTMCell::k##X##Tensor, X##_.data(), sizeof(float) * X##_.size()), \
+            Result::NO_ERROR);
+
+        FOR_ALL_OUTPUT_TENSORS(SetOutput);
+
+#undef SetOutput
+
+        if (use_cifg_) {
+            execution.setInput(LSTMCell::kInputToInputWeightsTensor, nullptr, 0);
+            execution.setInput(LSTMCell::kRecurrentToInputWeightsTensor, nullptr, 0);
+        }
+
+        if (use_peephole_) {
+            if (use_cifg_) {
+                execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
+            }
+        } else {
+            execution.setInput(LSTMCell::kCellToInputWeightsTensor, nullptr, 0);
+            execution.setInput(LSTMCell::kCellToForgetWeightsTensor, nullptr, 0);
+            execution.setInput(LSTMCell::kCellToOutputWeightsTensor, nullptr, 0);
+        }
+
+        if (use_projection_weights_) {
+            if (!use_projection_bias_) {
+                execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
+            }
+        } else {
+            execution.setInput(LSTMCell::kProjectionWeightsTensor, nullptr, 0);
+            execution.setInput(LSTMCell::kProjectionBiasTensor, nullptr, 0);
+        }
+
+        ASSERT_EQ(execution.setInput(LSTMCell::kActivationParam, &activation_, sizeof(activation_)),
+                  Result::NO_ERROR);
+        ASSERT_EQ(execution.setInput(LSTMCell::kCellClipParam, &cell_clip_, sizeof(cell_clip_)),
+                  Result::NO_ERROR);
+        ASSERT_EQ(execution.setInput(LSTMCell::kProjClipParam, &proj_clip_, sizeof(proj_clip_)),
+                  Result::NO_ERROR);
+
+        ASSERT_EQ(execution.compute(), Result::NO_ERROR);
+    }
+
+   private:
+    Model model_;
+    // Execution execution_;
+    const uint32_t n_input_;
+    const uint32_t n_output_;
+
+    const bool use_cifg_;
+    const bool use_peephole_;
+    const bool use_projection_weights_;
+    const bool use_projection_bias_;
+
+    const int activation_;
+    const float cell_clip_;
+    const float proj_clip_;
+
+#define DefineTensor(X) std::vector<float> X##_;
+
+    FOR_ALL_INPUT_AND_WEIGHT_TENSORS(DefineTensor);
+    FOR_ALL_LAYER_NORM_WEIGHTS(DefineTensor);
+    FOR_ALL_OUTPUT_TENSORS(DefineTensor);
+
+#undef DefineTensor
+};
+
+TEST(LSTMOpTest, LayerNormNoCifgPeepholeProjectionNoClipping) {
+    const int n_batch = 2;
+    const int n_input = 5;
+    // n_cell and n_output have the same size when there is no projection.
+    const int n_cell = 4;
+    const int n_output = 3;
+
+    LayerNormLSTMOpModel lstm(n_batch, n_input, n_cell, n_output,
+                              /*use_cifg=*/false, /*use_peephole=*/true,
+                              /*use_projection_weights=*/true,
+                              /*use_projection_bias=*/false,
+                              /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+                              {
+                                      {n_batch, n_input},  // input tensor
+
+                                      {n_cell, n_input},  // input_to_input_weight tensor
+                                      {n_cell, n_input},  // input_to_forget_weight tensor
+                                      {n_cell, n_input},  // input_to_cell_weight tensor
+                                      {n_cell, n_input},  // input_to_output_weight tensor
+
+                                      {n_cell, n_output},  // recurrent_to_input_weight tensor
+                                      {n_cell, n_output},  // recurrent_to_forget_weight tensor
+                                      {n_cell, n_output},  // recurrent_to_cell_weight tensor
+                                      {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+                                      {n_cell},  // cell_to_input_weight tensor
+                                      {n_cell},  // cell_to_forget_weight tensor
+                                      {n_cell},  // cell_to_output_weight tensor
+
+                                      {n_cell},  // input_gate_bias tensor
+                                      {n_cell},  // forget_gate_bias tensor
+                                      {n_cell},  // cell_bias tensor
+                                      {n_cell},  // output_gate_bias tensor
+
+                                      {n_output, n_cell},  // projection_weight tensor
+                                      {0},                 // projection_bias tensor
+
+                                      {n_batch, n_output},  // output_state_in tensor
+                                      {n_batch, n_cell},    // cell_state_in tensor
+
+                                      {n_cell},  // input_layer_norm_weights tensor
+                                      {n_cell},  // forget_layer_norm_weights tensor
+                                      {n_cell},  // cell_layer_norm_weights tensor
+                                      {n_cell},  // output_layer_norm_weights tensor
+                              });
+
+    lstm.SetInputToInputWeights({0.5,  0.6, 0.7,  -0.8, -0.9, 0.1,  0.2,  0.3,  -0.4, 0.5,
+                                 -0.8, 0.7, -0.6, 0.5,  -0.4, -0.5, -0.4, -0.3, -0.2, -0.1});
+
+    lstm.SetInputToForgetWeights({-0.6, -0.1, 0.3,  0.2,  0.9,  -0.5, -0.2, -0.4, 0.3,  -0.8,
+                                  -0.4, 0.3,  -0.5, -0.4, -0.6, 0.3,  -0.4, -0.6, -0.5, -0.5});
+
+    lstm.SetInputToCellWeights({-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6,
+                                0.6,  -0.1, -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8,  0.6});
+
+    lstm.SetInputToOutputWeights({-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2,
+                                  0.6,  -0.2, 0.4,  -0.7, -0.3, -0.5, 0.1, 0.5,  -0.6, -0.4});
+
+    lstm.SetInputGateBias({0.03, 0.15, 0.22, 0.38});
+
+    lstm.SetForgetGateBias({0.1, -0.3, -0.2, 0.1});
+
+    lstm.SetCellGateBias({-0.05, 0.72, 0.25, 0.08});
+
+    lstm.SetOutputGateBias({0.05, -0.01, 0.2, 0.1});
+
+    lstm.SetRecurrentToInputWeights(
+            {-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6});
+
+    lstm.SetRecurrentToCellWeights(
+            {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2});
+
+    lstm.SetRecurrentToForgetWeights(
+            {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2});
+
+    lstm.SetRecurrentToOutputWeights(
+            {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2});
+
+    lstm.SetCellToInputWeights({0.05, 0.1, 0.25, 0.15});
+    lstm.SetCellToForgetWeights({-0.02, -0.15, -0.25, -0.03});
+    lstm.SetCellToOutputWeights({0.1, -0.1, -0.5, 0.05});
+
+    lstm.SetProjectionWeights({-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2});
+
+    lstm.SetInputLayerNormWeights({0.1, 0.2, 0.3, 0.5});
+    lstm.SetForgetLayerNormWeights({0.2, 0.2, 0.4, 0.3});
+    lstm.SetCellLayerNormWeights({0.7, 0.2, 0.3, 0.8});
+    lstm.SetOutputLayerNormWeights({0.6, 0.2, 0.2, 0.5});
+
+    const std::vector<std::vector<float>> lstm_input = {
+            {                           // Batch0: 3 (input_sequence_size) * 5 (n_input)
+             0.7, 0.8, 0.1, 0.2, 0.3,   // seq 0
+             0.8, 0.1, 0.2, 0.4, 0.5,   // seq 1
+             0.2, 0.7, 0.7, 0.1, 0.7},  // seq 2
+
+            {                           // Batch1: 3 (input_sequence_size) * 5 (n_input)
+             0.3, 0.2, 0.9, 0.8, 0.1,   // seq 0
+             0.1, 0.5, 0.2, 0.4, 0.2,   // seq 1
+             0.6, 0.9, 0.2, 0.5, 0.7},  // seq 2
+    };
+
+    const std::vector<std::vector<float>> lstm_golden_output = {
+            {
+                    // Batch0: 3 (input_sequence_size) * 3 (n_output)
+                    0.0244077, 0.128027, -0.00170918,  // seq 0
+                    0.0137642, 0.140751, 0.0395835,    // seq 1
+                    -0.00459231, 0.155278, 0.0837377,  // seq 2
+            },
+            {
+                    // Batch1: 3 (input_sequence_size) * 3 (n_output)
+                    -0.00692428, 0.0848741, 0.063445,  // seq 0
+                    -0.00403912, 0.139963, 0.072681,   // seq 1
+                    0.00752706, 0.161903, 0.0561371,   // seq 2
+            }};
+
+    // Resetting cell_state and output_state
+    lstm.ResetCellState();
+    lstm.ResetOutputState();
+
+    const int input_sequence_size = lstm_input[0].size() / n_input;
+    for (int i = 0; i < input_sequence_size; i++) {
+        for (int b = 0; b < n_batch; ++b) {
+            const float* batch_start = lstm_input[b].data() + i * n_input;
+            const float* batch_end = batch_start + n_input;
+
+            lstm.SetInput(b * n_input, batch_start, batch_end);
+        }
+
+        lstm.Invoke();
+
+        std::vector<float> expected;
+        for (int b = 0; b < n_batch; ++b) {
+            const float* golden_start = lstm_golden_output[b].data() + i * n_output;
+            const float* golden_end = golden_start + n_output;
+            expected.insert(expected.end(), golden_start, golden_end);
+        }
+        EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
+    }
+}
+
+}  // namespace wrapper
+}  // namespace nn
+}  // namespace android
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 47c011c..50fcdd0 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -983,6 +983,12 @@
      *   matrix, each element of which is the product of the corresponding
      *   elements of the input matrices.
      *
+     * Since API level 29 LSTM supports layer normalization.
+     * In case layer normalization is used, the inputs to internal activation
+     * functions (sigmoid and \f$g\f$) are normalized, rescaled and recentered
+     * following an approach from section 3.1 from
+     * https://arxiv.org/pdf/1607.06450.pdf
+     *
      * The operation has the following independently optional inputs:
      * * The input-to-input weights (\f$W_{xi}\f$), recurrent-to-input weights
      *   (\f$W_{hi}\f$), cell-to-input (\f$W_{ci}\f$) weights, and input gate
@@ -1003,6 +1009,9 @@
      * * The projection bias (\f$b_{proj}\f$) may (but not required to) have a
      *   value if the recurrent projection layer exists, and should otherwise
      *   have no value.
+     * * (API level >= 29) The four layer normalization weights either all have
+     *   values or none of them have values. Layer normalization is used when
+     *   values are present.
      *
      * References:
      *
@@ -1023,6 +1032,10 @@
      * http://arxiv.org/pdf/1503.04069.pdf
      * Greff et al. "LSTM: A Search Space Odyssey"
      *
+     * The layer normalization is based on:
+     * https://arxiv.org/pdf/1607.06450.pdf
+     * Jimmy Ba et al. "Layer Normalization"
+     *
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      *
@@ -1106,6 +1119,23 @@
      * * 22:The clipping threshold (\f$t_{proj}\f$) for the output from the
      *      projection layer, such that values are bound within
      *      [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     * Since API level 29 there are additional inputs to this op:
+     * * 23:The input layer normalization weights.
+     *      A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
+     *      [num_units]. Used to rescale normalized inputs to activation at
+     *      input gate.
+     * * 24:The forget layer normalization weights.
+     *      A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
+     *      [num_units]. Used to rescale normalized inputs to activation at
+     *      forget gate.
+     * * 25:The cell layer normalization weights.
+     *      A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
+     *      [num_units]. Used to rescale normalized inputs to activation at
+     *      cell gate.
+     * * 26:The output layer normalization weights.
+     *      A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
+     *      [num_units]. Used to rescale normalized inputs to activation at
+     *      output gate.
      *
      * Outputs:
      * * 0: The scratch buffer.
diff --git a/runtime/test/TestValidateOperations.cpp b/runtime/test/TestValidateOperations.cpp
index 8145740..5d4ac6e 100644
--- a/runtime/test/TestValidateOperations.cpp
+++ b/runtime/test/TestValidateOperations.cpp
@@ -132,9 +132,9 @@
         return true;
     }
 
-    bool testMutatingInputOperandCounts() {
+    bool testMutatingInputOperandCounts(uint32_t numToAdd = 5) {
         std::vector<ANeuralNetworksOperandType> inputs = mValidInputs;
-        for (uint32_t i = 0; i < 5; i++) {
+        for (uint32_t i = 0; i < numToAdd; i++) {
             inputs.push_back(inputs[0]);
             if (ANEURALNETWORKS_NO_ERROR == addOperation(inputs, mValidOutputs)) {
                 return false;
@@ -1112,6 +1112,99 @@
         {scratch, outputStateOut, cellStateOut, output});
 
     EXPECT_TRUE(lstmTest.testMutatingInputOperandCode());
+    EXPECT_TRUE(lstmTest.testMutatingInputOperandCounts(3));
+    EXPECT_TRUE(lstmTest.testMutatingOutputOperandCode());
+    EXPECT_TRUE(lstmTest.testMutatingOutputOperandCounts());
+}
+
+TEST(OperationValidationTest, LSTM_V1_2_float32) {
+    uint32_t oneDimensional[1] = {5};
+    uint32_t twoDimensional[2] = {5, 5};
+    ANeuralNetworksOperandType floatTensor1D = {.type = ANEURALNETWORKS_TENSOR_FLOAT32,
+                                                .dimensionCount = 1,
+                                                .dimensions = oneDimensional,
+                                                .scale = 0.0f,
+                                                .zeroPoint = 0};
+    ANeuralNetworksOperandType floatTensor2D = {.type = ANEURALNETWORKS_TENSOR_FLOAT32,
+                                                .dimensionCount = 2,
+                                                .dimensions = twoDimensional,
+                                                .scale = 0.0f,
+                                                .zeroPoint = 0};
+    ANeuralNetworksOperandType intScalar = {.type = ANEURALNETWORKS_INT32,
+                                            .dimensionCount = 0,
+                                            .dimensions = nullptr,
+                                            .scale = 0.0f,
+                                            .zeroPoint = 0};
+    ANeuralNetworksOperandType floatScalar = {.type = ANEURALNETWORKS_FLOAT32,
+                                              .dimensionCount = 0,
+                                              .dimensions = nullptr,
+                                              .scale = 0.0f,
+                                              .zeroPoint = 0};
+
+    ANeuralNetworksOperandType input = floatTensor2D;
+    ANeuralNetworksOperandType inputToInput = floatTensor2D;
+    ANeuralNetworksOperandType inputToForget = floatTensor2D;
+    ANeuralNetworksOperandType inputToCell = floatTensor2D;
+    ANeuralNetworksOperandType inputToOutput = floatTensor2D;
+    ANeuralNetworksOperandType recurrentToInput = floatTensor2D;
+    ANeuralNetworksOperandType recurrentToForget = floatTensor2D;
+    ANeuralNetworksOperandType recurrentToCell = floatTensor2D;
+    ANeuralNetworksOperandType recurrentToOutput = floatTensor2D;
+    ANeuralNetworksOperandType cellToInput = floatTensor1D;
+    ANeuralNetworksOperandType cellToForget = floatTensor1D;
+    ANeuralNetworksOperandType cellToOutput = floatTensor1D;
+    ANeuralNetworksOperandType inputGateBias = floatTensor1D;
+    ANeuralNetworksOperandType forgetGateBias = floatTensor1D;
+    ANeuralNetworksOperandType cellBias = floatTensor1D;
+    ANeuralNetworksOperandType outputGateBias = floatTensor1D;
+    ANeuralNetworksOperandType projWeights = floatTensor2D;
+    ANeuralNetworksOperandType projBias = floatTensor1D;
+    ANeuralNetworksOperandType outputStateIn = floatTensor2D;
+    ANeuralNetworksOperandType cellStateIn = floatTensor2D;
+    ANeuralNetworksOperandType activation = intScalar;
+    ANeuralNetworksOperandType clipCellState = floatScalar;
+    ANeuralNetworksOperandType clipProjLayer = floatScalar;
+    ANeuralNetworksOperandType inputLayerNormWeights = floatTensor1D;
+    ANeuralNetworksOperandType forgetLayerNormWeights = floatTensor1D;
+    ANeuralNetworksOperandType cellLayerNormWeights = floatTensor1D;
+    ANeuralNetworksOperandType outputLayerNormWeights = floatTensor1D;
+
+    ANeuralNetworksOperandType scratch = floatTensor2D;
+    ANeuralNetworksOperandType outputStateOut = floatTensor2D;
+    ANeuralNetworksOperandType cellStateOut = floatTensor2D;
+    ANeuralNetworksOperandType output = floatTensor2D;
+
+    OperationTestBase lstmTest(ANEURALNETWORKS_LSTM,
+                               {input,
+                                inputToInput,
+                                inputToForget,
+                                inputToCell,
+                                inputToOutput,
+                                recurrentToInput,
+                                recurrentToForget,
+                                recurrentToCell,
+                                recurrentToOutput,
+                                cellToInput,
+                                cellToForget,
+                                cellToOutput,
+                                inputGateBias,
+                                forgetGateBias,
+                                cellBias,
+                                outputGateBias,
+                                projWeights,
+                                projBias,
+                                outputStateIn,
+                                cellStateIn,
+                                activation,
+                                clipCellState,
+                                clipProjLayer,
+                                inputLayerNormWeights,
+                                forgetLayerNormWeights,
+                                cellLayerNormWeights,
+                                outputLayerNormWeights},
+                               {scratch, outputStateOut, cellStateOut, output});
+
+    EXPECT_TRUE(lstmTest.testMutatingInputOperandCode());
     EXPECT_TRUE(lstmTest.testMutatingInputOperandCounts());
     EXPECT_TRUE(lstmTest.testMutatingOutputOperandCode());
     EXPECT_TRUE(lstmTest.testMutatingOutputOperandCounts());
diff --git a/runtime/test/for-cts/TestGeneratedOneFile.cpp b/runtime/test/for-cts/TestGeneratedOneFile.cpp
index 997b086..fbc35fc 100644
--- a/runtime/test/for-cts/TestGeneratedOneFile.cpp
+++ b/runtime/test/for-cts/TestGeneratedOneFile.cpp
@@ -358,6 +358,7 @@
 #include "../generated/tests/heatmap_max_keypoint.mod.py.cpp"
 #include "../generated/tests/l2_normalization_v1_2.mod.py.cpp"
 #include "../generated/tests/l2_pool_v1_2.mod.py.cpp"
+#include "../generated/tests/layer_norm_lstm.mod.py.cpp"
 #include "../generated/tests/local_response_normalization_v1_2.mod.py.cpp"
 #include "../generated/tests/lsh_projection_3_relaxed.mod.py.cpp"
 #include "../generated/tests/lsh_projection_4_relaxed.mod.py.cpp"
diff --git a/runtime/test/generated/all_generated_V1_2_vts_tests.cpp b/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
index 792b1fc..af9bf52 100644
--- a/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
+++ b/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
@@ -7420,6 +7420,29 @@
 }
 
 
+// Generated from: layer_norm_lstm.mod.py.
+namespace layer_norm_lstm {
+// Generated layer_norm_lstm test
+#include "examples/layer_norm_lstm.example.cpp"
+// Generated model constructor
+#include "vts_models/layer_norm_lstm.model.cpp"
+} // namespace layer_norm_lstm
+
+TEST_F(NeuralnetworksHidlTest, layer_norm_lstm) {
+  generated_tests::Execute(device,
+                           layer_norm_lstm::createTestModel,
+                           layer_norm_lstm::is_ignored,
+                           layer_norm_lstm::examples);
+}
+
+TEST_F(ValidationTest, layer_norm_lstm) {
+  const Model model = layer_norm_lstm::createTestModel();
+  const std::vector<Request> requests = createRequests(layer_norm_lstm::examples);
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
 // Generated from: local_response_normalization_v1_2.mod.py.
 namespace local_response_normalization_v1_2 {
 // Generated local_response_normalization_v1_2 test
diff --git a/runtime/test/generated/examples/layer_norm_lstm.example.cpp b/runtime/test/generated/examples/layer_norm_lstm.example.cpp
new file mode 100644
index 0000000..58e42b7
--- /dev/null
+++ b/runtime/test/generated/examples/layer_norm_lstm.example.cpp
@@ -0,0 +1,98 @@
+// clang-format off
+// Generated file (from: layer_norm_lstm.mod.py). Do not edit
+std::vector<MixedTypedExample> examples = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.7f, 0.8f, 0.1f, 0.2f, 0.3f, 0.3f, 0.2f, 0.9f, 0.8f, 0.1f}}, {1, {0.5f, 0.6f, 0.7f, -0.8f, -0.9f, 0.1f, 0.2f, 0.3f, -0.4f, 0.5f, -0.8f, 0.7f, -0.6f, 0.5f, -0.4f, -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}}, {2, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f, -0.5f, -0.2f, -0.4f, 0.3f, -0.8f, -0.4f, 0.3f, -0.5f, -0.4f, -0.6f, 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}}, {3, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f, 0.5f, -0.2f, -0.3f, -0.2f, -0.6f, 0.6f, -0.1f, -0.4f, -0.3f, -0.7f, 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}}, {4, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f, -0.7f, 0.3f, -0.3f, -0.8f, -0.2f, 0.6f, -0.2f, 0.4f, -0.7f, -0.3f, -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}}, {5, {-0.2f, -0.3f, 0.4f, 0.1f, -0.5f, 0.9f, -0.2f, -0.3f, -0.7f, 0.05f, -0.2f, -0.6f}}, {6, {-0.5f, -0.3f, -0.5f, -0.2f, 0.6f, 0.4f, 0.9f, 0.3f, -0.1f, 0.2f, 0.5f, 0.2f}}, {7, {-0.3f, 0.2f, 0.1f, -0.3f, 0.8f, -0.08f, -0.2f, 0.3f, 0.8f, -0.6f, -0.1f, 0.2f}}, {8, {0.3f, -0.1f, 0.1f, -0.2f, -0.5f, -0.7f, -0.2f, -0.6f, -0.1f, -0.4f, -0.7f, -0.2f}}, {9, {0.05f, 0.1f, 0.25f, 0.15f}}, {10, {-0.02f, -0.15f, -0.25f, -0.03f}}, {11, {0.1f, -0.1f, -0.5f, 0.05f}}, {12, {0.03f, 0.15f, 0.22f, 0.38f}}, {13, {0.1f, -0.3f, -0.2f, 0.1f}}, {14, {-0.05f, 0.72f, 0.25f, 0.08f}}, {15, {0.05f, -0.01f, 0.2f, 0.1f}}, {16, {-0.1f, 0.2f, 0.01f, -0.2f, 0.1f, 0.5f, 0.3f, 0.08f, 0.07f, 0.2f, -0.4f, 0.2f}}, {17, {}}, {18, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {19, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {20, {0.1f, 0.2f, 0.3f, 0.5f}}, {21, {0.2f, 0.2f, 0.4f, 0.3f}}, {22, {0.7f, 0.2f, 0.3f, 0.8f}}, {23, {0.6f, 0.2f, 0.2f, 0.5f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {1, {0.024407668039203f, 0.128027379512787f, -0.001709178090096f, -0.006924282759428f, 0.08487406373024f, 0.06344497948885f}}, {2, {-0.451771229505539f, 0.376915663480759f, 0.225425109267235f, 0.232406347990036f, -0.252585828304291f, 0.330421179533005f, 0.017305245622993f, 0.366601228713989f}}, {3, {0.024407668039203f, 0.128027379512787f, -0.001709178090096f, -0.006924282759428f, 0.08487406373024f, 0.06344497948885f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+}
+},
+}, // End of an example
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.8f, 0.1f, 0.2f, 0.4f, 0.5f, 0.1f, 0.5f, 0.2f, 0.4f, 0.2f}}, {1, {0.5f, 0.6f, 0.7f, -0.8f, -0.9f, 0.1f, 0.2f, 0.3f, -0.4f, 0.5f, -0.8f, 0.7f, -0.6f, 0.5f, -0.4f, -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}}, {2, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f, -0.5f, -0.2f, -0.4f, 0.3f, -0.8f, -0.4f, 0.3f, -0.5f, -0.4f, -0.6f, 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}}, {3, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f, 0.5f, -0.2f, -0.3f, -0.2f, -0.6f, 0.6f, -0.1f, -0.4f, -0.3f, -0.7f, 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}}, {4, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f, -0.7f, 0.3f, -0.3f, -0.8f, -0.2f, 0.6f, -0.2f, 0.4f, -0.7f, -0.3f, -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}}, {5, {-0.2f, -0.3f, 0.4f, 0.1f, -0.5f, 0.9f, -0.2f, -0.3f, -0.7f, 0.05f, -0.2f, -0.6f}}, {6, {-0.5f, -0.3f, -0.5f, -0.2f, 0.6f, 0.4f, 0.9f, 0.3f, -0.1f, 0.2f, 0.5f, 0.2f}}, {7, {-0.3f, 0.2f, 0.1f, -0.3f, 0.8f, -0.08f, -0.2f, 0.3f, 0.8f, -0.6f, -0.1f, 0.2f}}, {8, {0.3f, -0.1f, 0.1f, -0.2f, -0.5f, -0.7f, -0.2f, -0.6f, -0.1f, -0.4f, -0.7f, -0.2f}}, {9, {0.05f, 0.1f, 0.25f, 0.15f}}, {10, {-0.02f, -0.15f, -0.25f, -0.03f}}, {11, {0.1f, -0.1f, -0.5f, 0.05f}}, {12, {0.03f, 0.15f, 0.22f, 0.38f}}, {13, {0.1f, -0.3f, -0.2f, 0.1f}}, {14, {-0.05f, 0.72f, 0.25f, 0.08f}}, {15, {0.05f, -0.01f, 0.2f, 0.1f}}, {16, {-0.1f, 0.2f, 0.01f, -0.2f, 0.1f, 0.5f, 0.3f, 0.08f, 0.07f, 0.2f, -0.4f, 0.2f}}, {17, {}}, {18, {0.024407668039203f, 0.128027379512787f, -0.001709178090096f, -0.006924282759428f, 0.08487406373024f, 0.06344497948885f}}, {19, {-0.451771229505539f, 0.376915663480759f, 0.225425109267235f, 0.232406347990036f, -0.252585828304291f, 0.330421179533005f, 0.017305245622993f, 0.366601228713989f}}, {20, {0.1f, 0.2f, 0.3f, 0.5f}}, {21, {0.2f, 0.2f, 0.4f, 0.3f}}, {22, {0.7f, 0.2f, 0.3f, 0.8f}}, {23, {0.6f, 0.2f, 0.2f, 0.5f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {1, {0.013764165341854f, 0.140751048922539f, 0.039583537727594f, -0.004039138555527f, 0.139963015913963f, 0.072681039571762f}}, {2, {-0.645632147789001f, 0.518238246440887f, 0.168679088354111f, 0.555787742137909f, -0.49367481470108f, 0.475847363471985f, 0.106874041259289f, 0.50430965423584f}}, {3, {0.013764165341854f, 0.140751048922539f, 0.039583537727594f, -0.004039138555527f, 0.139963015913963f, 0.072681039571762f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+}
+},
+}, // End of an example
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.2f, 0.7f, 0.7f, 0.1f, 0.7f, 0.6f, 0.9f, 0.2f, 0.5f, 0.7f}}, {1, {0.5f, 0.6f, 0.7f, -0.8f, -0.9f, 0.1f, 0.2f, 0.3f, -0.4f, 0.5f, -0.8f, 0.7f, -0.6f, 0.5f, -0.4f, -0.5f, -0.4f, -0.3f, -0.2f, -0.1f}}, {2, {-0.6f, -0.1f, 0.3f, 0.2f, 0.9f, -0.5f, -0.2f, -0.4f, 0.3f, -0.8f, -0.4f, 0.3f, -0.5f, -0.4f, -0.6f, 0.3f, -0.4f, -0.6f, -0.5f, -0.5f}}, {3, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f, 0.5f, -0.2f, -0.3f, -0.2f, -0.6f, 0.6f, -0.1f, -0.4f, -0.3f, -0.7f, 0.7f, -0.9f, -0.5f, 0.8f, 0.6f}}, {4, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f, -0.7f, 0.3f, -0.3f, -0.8f, -0.2f, 0.6f, -0.2f, 0.4f, -0.7f, -0.3f, -0.5f, 0.1f, 0.5f, -0.6f, -0.4f}}, {5, {-0.2f, -0.3f, 0.4f, 0.1f, -0.5f, 0.9f, -0.2f, -0.3f, -0.7f, 0.05f, -0.2f, -0.6f}}, {6, {-0.5f, -0.3f, -0.5f, -0.2f, 0.6f, 0.4f, 0.9f, 0.3f, -0.1f, 0.2f, 0.5f, 0.2f}}, {7, {-0.3f, 0.2f, 0.1f, -0.3f, 0.8f, -0.08f, -0.2f, 0.3f, 0.8f, -0.6f, -0.1f, 0.2f}}, {8, {0.3f, -0.1f, 0.1f, -0.2f, -0.5f, -0.7f, -0.2f, -0.6f, -0.1f, -0.4f, -0.7f, -0.2f}}, {9, {0.05f, 0.1f, 0.25f, 0.15f}}, {10, {-0.02f, -0.15f, -0.25f, -0.03f}}, {11, {0.1f, -0.1f, -0.5f, 0.05f}}, {12, {0.03f, 0.15f, 0.22f, 0.38f}}, {13, {0.1f, -0.3f, -0.2f, 0.1f}}, {14, {-0.05f, 0.72f, 0.25f, 0.08f}}, {15, {0.05f, -0.01f, 0.2f, 0.1f}}, {16, {-0.1f, 0.2f, 0.01f, -0.2f, 0.1f, 0.5f, 0.3f, 0.08f, 0.07f, 0.2f, -0.4f, 0.2f}}, {17, {}}, {18, {0.013764165341854f, 0.140751048922539f, 0.039583537727594f, -0.004039138555527f, 0.139963015913963f, 0.072681039571762f}}, {19, {-0.645632147789001f, 0.518238246440887f, 0.168679088354111f, 0.555787742137909f, -0.49367481470108f, 0.475847363471985f, 0.106874041259289f, 0.50430965423584f}}, {20, {0.1f, 0.2f, 0.3f, 0.5f}}, {21, {0.2f, 0.2f, 0.4f, 0.3f}}, {22, {0.7f, 0.2f, 0.3f, 0.8f}}, {23, {0.6f, 0.2f, 0.2f, 0.5f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {{0, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {1, {-0.004592306911945f, 0.155278354883194f, 0.083737745881081f, 0.007527053356171f, 0.161902531981468f, 0.056137066334486f}}, {2, {-0.742560744285583f, 0.579139292240143f, 0.114988230168819f, 0.649957716464996f, -0.686565399169922f, 0.548869132995605f, 0.17313876748085f, 0.587379336357117f}}, {3, {-0.004592306911945f, 0.155278354883194f, 0.083737745881081f, 0.007527053356171f, 0.161902531981468f, 0.056137066334486f}}},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_ASYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+}
+},
+}, // End of an example
+};
+
diff --git a/runtime/test/generated/models/layer_norm_lstm.model.cpp b/runtime/test/generated/models/layer_norm_lstm.model.cpp
new file mode 100644
index 0000000..b7c8c17
--- /dev/null
+++ b/runtime/test/generated/models/layer_norm_lstm.model.cpp
@@ -0,0 +1,66 @@
+// clang-format off
+// Generated file (from: layer_norm_lstm.mod.py). Do not edit
+void CreateModel(Model *model) {
+  OperandType type0(Type::TENSOR_FLOAT32, {2, 5});
+  OperandType type1(Type::TENSOR_FLOAT32, {4, 5});
+  OperandType type10(Type::TENSOR_FLOAT32, {2, 16});
+  OperandType type2(Type::TENSOR_FLOAT32, {4, 3});
+  OperandType type3(Type::TENSOR_FLOAT32, {4});
+  OperandType type4(Type::TENSOR_FLOAT32, {3, 4});
+  OperandType type5(Type::TENSOR_FLOAT32, {0});
+  OperandType type6(Type::TENSOR_FLOAT32, {2, 3});
+  OperandType type7(Type::TENSOR_FLOAT32, {2, 4});
+  OperandType type8(Type::INT32, {});
+  OperandType type9(Type::FLOAT32, {});
+  // Phase 1, operands
+  auto input = model->addOperand(&type0);
+  auto input_to_input_weights = model->addOperand(&type1);
+  auto input_to_forget_weights = model->addOperand(&type1);
+  auto input_to_cell_weights = model->addOperand(&type1);
+  auto input_to_output_weights = model->addOperand(&type1);
+  auto recurrent_to_intput_weights = model->addOperand(&type2);
+  auto recurrent_to_forget_weights = model->addOperand(&type2);
+  auto recurrent_to_cell_weights = model->addOperand(&type2);
+  auto recurrent_to_output_weights = model->addOperand(&type2);
+  auto cell_to_input_weights = model->addOperand(&type3);
+  auto cell_to_forget_weights = model->addOperand(&type3);
+  auto cell_to_output_weights = model->addOperand(&type3);
+  auto input_gate_bias = model->addOperand(&type3);
+  auto forget_gate_bias = model->addOperand(&type3);
+  auto cell_gate_bias = model->addOperand(&type3);
+  auto output_gate_bias = model->addOperand(&type3);
+  auto projection_weights = model->addOperand(&type4);
+  auto projection_bias = model->addOperand(&type5);
+  auto output_state_in = model->addOperand(&type6);
+  auto cell_state_in = model->addOperand(&type7);
+  auto activation_param = model->addOperand(&type8);
+  auto cell_clip_param = model->addOperand(&type9);
+  auto proj_clip_param = model->addOperand(&type9);
+  auto input_layer_norm_weights = model->addOperand(&type3);
+  auto forget_layer_norm_weights = model->addOperand(&type3);
+  auto cell_layer_norm_weights = model->addOperand(&type3);
+  auto output_layer_norm_weights = model->addOperand(&type3);
+  auto scratch_buffer = model->addOperand(&type10);
+  auto output_state_out = model->addOperand(&type6);
+  auto cell_state_out = model->addOperand(&type7);
+  auto output = model->addOperand(&type6);
+  // Phase 2, operations
+  static int32_t activation_param_init[] = {4};
+  model->setOperandValue(activation_param, activation_param_init, sizeof(int32_t) * 1);
+  static float cell_clip_param_init[] = {0.0f};
+  model->setOperandValue(cell_clip_param, cell_clip_param_init, sizeof(float) * 1);
+  static float proj_clip_param_init[] = {0.0f};
+  model->setOperandValue(proj_clip_param, proj_clip_param_init, sizeof(float) * 1);
+  model->addOperation(ANEURALNETWORKS_LSTM, {input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_intput_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias, cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state_in, cell_state_in, activation_param, cell_clip_param, proj_clip_param, input_layer_norm_weights, forget_layer_norm_weights, cell_layer_norm_weights, output_layer_norm_weights}, {scratch_buffer, output_state_out, cell_state_out, output});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_intput_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias, cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state_in, cell_state_in, input_layer_norm_weights, forget_layer_norm_weights, cell_layer_norm_weights, output_layer_norm_weights},
+    {scratch_buffer, output_state_out, cell_state_out, output});
+  assert(model->isValid());
+}
+
+inline bool is_ignored(int i) {
+  static std::set<int> ignore = {0};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/runtime/test/generated/tests/layer_norm_lstm.mod.py.cpp b/runtime/test/generated/tests/layer_norm_lstm.mod.py.cpp
new file mode 100644
index 0000000..5cc89db
--- /dev/null
+++ b/runtime/test/generated/tests/layer_norm_lstm.mod.py.cpp
@@ -0,0 +1,17 @@
+// clang-format off
+// Generated file (from: layer_norm_lstm.mod.py). Do not edit
+#include "../../TestGenerated.h"
+
+namespace layer_norm_lstm {
+// Generated layer_norm_lstm test
+#include "generated/examples/layer_norm_lstm.example.cpp"
+// Generated model constructor
+#include "generated/models/layer_norm_lstm.model.cpp"
+} // namespace layer_norm_lstm
+
+TEST_F(GeneratedTests, layer_norm_lstm) {
+    execute(layer_norm_lstm::CreateModel,
+            layer_norm_lstm::is_ignored,
+            layer_norm_lstm::examples);
+}
+
diff --git a/runtime/test/generated/vts_models/layer_norm_lstm.model.cpp b/runtime/test/generated/vts_models/layer_norm_lstm.model.cpp
new file mode 100644
index 0000000..e37c922
--- /dev/null
+++ b/runtime/test/generated/vts_models/layer_norm_lstm.model.cpp
@@ -0,0 +1,316 @@
+// clang-format off
+// Generated file (from: layer_norm_lstm.mod.py). Do not edit
+// Create the model
+Model createTestModel() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 5},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 5},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 5},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 5},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 5},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {0},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::FLOAT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 4, .length = 4},
+        },
+        {
+            .type = OperandType::FLOAT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 16},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 3},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 4},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {2, 3},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::LSTM,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26},
+            .outputs = {27, 28, 29, 30},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 23, 24, 25, 26};
+    const std::vector<uint32_t> outputIndexes = {27, 28, 29, 30};
+    std::vector<uint8_t> operandValues = {
+      4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored(int i) {
+  static std::set<int> ignore = {0};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/runtime/test/specs/V1_2/layer_norm_lstm.mod.py b/runtime/test/specs/V1_2/layer_norm_lstm.mod.py
new file mode 100644
index 0000000..2b426cd
--- /dev/null
+++ b/runtime/test/specs/V1_2/layer_norm_lstm.mod.py
@@ -0,0 +1,189 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# LSTM Test: Layer Normalization, No Cifg, Peephole, Projection, and No Clipping.
+import copy
+
+model = Model()
+
+n_batch = 2
+n_input = 5
+# n_cell and n_output have the same size when there is no projection.
+n_cell = 4
+n_output = 3
+
+input = Input("input", "TENSOR_FLOAT32", "{%d, %d}" % (n_batch, n_input))
+
+input_to_input_weights = Input("input_to_input_weights", "TENSOR_FLOAT32",
+                               "{%d, %d}" % (n_cell, n_input))
+input_to_forget_weights = Input("input_to_forget_weights", "TENSOR_FLOAT32",
+                                "{%d, %d}" % (n_cell, n_input))
+input_to_cell_weights = Input("input_to_cell_weights", "TENSOR_FLOAT32",
+                              "{%d, %d}" % (n_cell, n_input))
+input_to_output_weights = Input("input_to_output_weights", "TENSOR_FLOAT32",
+                                "{%d, %d}" % (n_cell, n_input))
+
+recurrent_to_input_weights = Input("recurrent_to_intput_weights",
+                                   "TENSOR_FLOAT32",
+                                   "{%d, %d}" % (n_cell, n_output))
+recurrent_to_forget_weights = Input("recurrent_to_forget_weights",
+                                    "TENSOR_FLOAT32",
+                                    "{%d, %d}" % (n_cell, n_output))
+recurrent_to_cell_weights = Input("recurrent_to_cell_weights", "TENSOR_FLOAT32",
+                                  "{%d, %d}" % (n_cell, n_output))
+recurrent_to_output_weights = Input("recurrent_to_output_weights",
+                                    "TENSOR_FLOAT32",
+                                    "{%d, %d}" % (n_cell, n_output))
+
+cell_to_input_weights = Input("cell_to_input_weights", "TENSOR_FLOAT32",
+                              "{%d}" % (n_cell))
+cell_to_forget_weights = Input("cell_to_forget_weights", "TENSOR_FLOAT32",
+                               "{%d}" % (n_cell))
+cell_to_output_weights = Input("cell_to_output_weights", "TENSOR_FLOAT32",
+                               "{%d}" % (n_cell))
+
+input_gate_bias = Input("input_gate_bias", "TENSOR_FLOAT32", "{%d}" % (n_cell))
+forget_gate_bias = Input("forget_gate_bias", "TENSOR_FLOAT32",
+                         "{%d}" % (n_cell))
+cell_gate_bias = Input("cell_gate_bias", "TENSOR_FLOAT32", "{%d}" % (n_cell))
+output_gate_bias = Input("output_gate_bias", "TENSOR_FLOAT32",
+                         "{%d}" % (n_cell))
+
+projection_weights = Input("projection_weights", "TENSOR_FLOAT32",
+                           "{%d,%d}" % (n_output, n_cell))
+projection_bias = Input("projection_bias", "TENSOR_FLOAT32", "{0}")
+
+output_state_in = Input("output_state_in", "TENSOR_FLOAT32",
+                        "{%d, %d}" % (n_batch, n_output))
+cell_state_in = Input("cell_state_in", "TENSOR_FLOAT32",
+                      "{%d, %d}" % (n_batch, n_cell))
+
+activation_param = Int32Scalar("activation_param", 4)  # Tanh
+cell_clip_param = Float32Scalar("cell_clip_param", 0.)
+proj_clip_param = Float32Scalar("proj_clip_param", 0.)
+
+input_layer_norm_weights = Input("input_layer_norm_weights", "TENSOR_FLOAT32",
+                                 "{%d}" % n_cell)
+forget_layer_norm_weights = Input("forget_layer_norm_weights", "TENSOR_FLOAT32",
+                                  "{%d}" % n_cell)
+cell_layer_norm_weights = Input("cell_layer_norm_weights", "TENSOR_FLOAT32",
+                                "{%d}" % n_cell)
+output_layer_norm_weights = Input("output_layer_norm_weights", "TENSOR_FLOAT32",
+                                  "{%d}" % n_cell)
+
+scratch_buffer = IgnoredOutput("scratch_buffer", "TENSOR_FLOAT32",
+                               "{%d, %d}" % (n_batch, (n_cell * 4)))
+output_state_out = Output("output_state_out", "TENSOR_FLOAT32",
+                          "{%d, %d}" % (n_batch, n_output))
+cell_state_out = Output("cell_state_out", "TENSOR_FLOAT32",
+                        "{%d, %d}" % (n_batch, n_cell))
+output = Output("output", "TENSOR_FLOAT32", "{%d, %d}" % (n_batch, n_output))
+
+model = model.Operation(
+    "LSTM", input, input_to_input_weights, input_to_forget_weights,
+    input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+    recurrent_to_forget_weights, recurrent_to_cell_weights,
+    recurrent_to_output_weights, cell_to_input_weights, cell_to_forget_weights,
+    cell_to_output_weights, input_gate_bias, forget_gate_bias, cell_gate_bias,
+    output_gate_bias, projection_weights, projection_bias, output_state_in,
+    cell_state_in, activation_param, cell_clip_param, proj_clip_param,
+    input_layer_norm_weights, forget_layer_norm_weights,
+    cell_layer_norm_weights, output_layer_norm_weights).To(
+        [scratch_buffer, output_state_out, cell_state_out, output])
+
+# Example 1. Input in operand 0,
+input0 = {
+    input_to_input_weights: [
+        0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5, -0.8, 0.7, -0.6,
+        0.5, -0.4, -0.5, -0.4, -0.3, -0.2, -0.1
+    ],
+    input_to_forget_weights: [
+        -0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8, -0.4, 0.3, -0.5,
+        -0.4, -0.6, 0.3, -0.4, -0.6, -0.5, -0.5
+    ],
+    input_to_cell_weights: [
+        -0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6, 0.6, -0.1,
+        -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8, 0.6
+    ],
+    input_to_output_weights: [
+        -0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2, 0.6, -0.2,
+        0.4, -0.7, -0.3, -0.5, 0.1, 0.5, -0.6, -0.4
+    ],
+    input_gate_bias: [0.03, 0.15, 0.22, 0.38],
+    forget_gate_bias: [0.1, -0.3, -0.2, 0.1],
+    cell_gate_bias: [-0.05, 0.72, 0.25, 0.08],
+    output_gate_bias: [0.05, -0.01, 0.2, 0.1],
+    recurrent_to_input_weights: [
+        -0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6
+    ],
+    recurrent_to_cell_weights: [
+        -0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2
+    ],
+    recurrent_to_forget_weights: [
+        -0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2
+    ],
+    recurrent_to_output_weights: [
+        0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2
+    ],
+    cell_to_input_weights: [0.05, 0.1, 0.25, 0.15],
+    cell_to_forget_weights: [-0.02, -0.15, -0.25, -0.03],
+    cell_to_output_weights: [0.1, -0.1, -0.5, 0.05],
+    projection_weights: [
+        -0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2
+    ],
+    projection_bias: [],
+    input_layer_norm_weights: [0.1, 0.2, 0.3, 0.5],
+    forget_layer_norm_weights: [0.2, 0.2, 0.4, 0.3],
+    cell_layer_norm_weights: [0.7, 0.2, 0.3, 0.8],
+    output_layer_norm_weights: [0.6, 0.2, 0.2, 0.5]
+}
+
+test_inputs = [[0.7, 0.8, 0.1, 0.2, 0.3, 0.3, 0.2, 0.9, 0.8, 0.1],
+               [0.8, 0.1, 0.2, 0.4, 0.5, 0.1, 0.5, 0.2, 0.4, 0.2],
+               [0.2, 0.7, 0.7, 0.1, 0.7, 0.6, 0.9, 0.2, 0.5, 0.7]]
+golden_cell_states = [
+    [
+        -0.451771229505539, 0.376915663480759, 0.225425109267235, 0.232406347990036, -0.252585828304291, 0.330421179533005, 0.017305245622993, 0.366601228713989
+    ],
+    [
+        -0.645632147789001, 0.518238246440887, 0.168679088354111, 0.555787742137909, -0.493674814701080, 0.475847363471985, 0.106874041259289, 0.504309654235840
+    ],
+    [-0.742560744285583, 0.579139292240143, 0.114988230168819, 0.649957716464996, -0.686565399169922, 0.548869132995605, 0.173138767480850, 0.587379336357117],
+]
+cell_states = [[0, 0, 0, 0, 0, 0, 0, 0]] + golden_cell_states[:2]
+
+golden_outputs = [
+    [0.024407668039203, 0.128027379512787, -0.001709178090096, -0.006924282759428, 0.084874063730240, 0.063444979488850],
+    [0.013764165341854, 0.140751048922539, 0.039583537727594, -0.004039138555527, 0.139963015913963, 0.072681039571762],
+    [-0.004592306911945, 0.155278354883194, 0.083737745881081, 0.007527053356171, 0.161902531981468, 0.056137066334486],
+]
+output_states = [[0, 0, 0, 0, 0, 0]] + golden_outputs[:2]
+
+tests = zip(
+    test_inputs, output_states, cell_states, golden_cell_states, golden_outputs)
+
+for test_input, output_state, cell_state, golden_state, golden_output in tests:
+  cur_input = copy.deepcopy(input0)
+  cur_input[input] = test_input
+  cur_input[output_state_in] = output_state
+  cur_input[cell_state_in] = cell_state
+  cur_output = {
+      scratch_buffer: [0] * (n_batch * n_cell * 4),
+      cell_state_out: golden_state,
+      output_state_out: golden_output,
+      output: golden_output
+  }
+  Example((cur_input, cur_output))