Porting CPU implementation from TFLite for the following ops:

 - Div
 - Sub
 - Pad
 - Mean
 - Squeeze
 - Transpose
 - BatchToSpace
 - SpaceToBatch
 - StridedSlice

Test: mm
Bug: 63911257
Change-Id: Ia7d8f175598a5587118d506178392c1e8de6795a
diff --git a/common/OperationsUtils.cpp b/common/OperationsUtils.cpp
index 0975c56..7ced5b2 100644
--- a/common/OperationsUtils.cpp
+++ b/common/OperationsUtils.cpp
@@ -376,7 +376,7 @@
     NN_OPS_CHECK(axis >= 0);
     NN_OPS_CHECK(axis < (int32_t)num_dimensions);
 
-    int sum_axis = getSizeOfDimension(inputShapes[0], axis);
+    int sumAxis = getSizeOfDimension(inputShapes[0], axis);
     for (int i = 1; i < num_inputs; ++i) {
         NN_OPS_CHECK(getNumberOfDimensions(inputShapes[i]) == num_dimensions);
         NN_OPS_CHECK(inputShapes[i].type == inputShapes[0].type);
@@ -386,7 +386,7 @@
         }
         for (int d = 0; d < (int32_t)num_dimensions; ++d) {
             if (d == axis) {
-                sum_axis += getSizeOfDimension(inputShapes[i], axis);
+                sumAxis += getSizeOfDimension(inputShapes[i], axis);
             } else {
                 NN_OPS_CHECK(getSizeOfDimension(inputShapes[0], d) ==
                            getSizeOfDimension(inputShapes[i], d));
@@ -396,7 +396,7 @@
 
     output->type = input_type;
     output->dimensions = inputShapes[0].dimensions;
-    output->dimensions[axis] = sum_axis;
+    output->dimensions[axis] = sumAxis;
 
     if (input_type == OperandType::TENSOR_QUANT8_ASYMM) {
         NN_OPS_CHECK(inputShapes[0].offset == output->offset);
@@ -563,5 +563,310 @@
     return true;
 }
 
+bool padPrepare(const Shape& input,
+                const int32_t* paddingsData,
+                const Shape& paddingsShape,
+                Shape* output) {
+    // Currently only 4D tensors are supported.
+    uint32_t numInputDims = getNumberOfDimensions(input);
+    NN_OPS_CHECK(numInputDims == 4);
+
+    // paddings need to be provided as a 2-D int32 tensor.
+    NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
+    NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
+    NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
+
+    std::vector<uint32_t> outDims(numInputDims);
+    for (uint32_t i = 0; i < numInputDims; ++i) {
+        int32_t beforePadding = *paddingsData++;
+        int32_t afterPadding = *paddingsData++;
+        // Pad value has to be greater than equal to 0.
+        NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
+        outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
+    }
+    output->type = input.type;
+    output->dimensions = outDims;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool batchToSpacePrepare(const Shape& input,
+                         const int32_t* blockSizeData,
+                         const Shape& blockSizeShape,
+                         Shape* output) {
+    // Only 4D NHWC tensors are supported.
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+
+    // blockSize need to be provided as a 1-D int32 tensor.
+    NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
+    // Only applies to spatial dimensions.
+    NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
+
+    uint32_t batches  = getSizeOfDimension(input, 0);
+    uint32_t height   = getSizeOfDimension(input, 1);
+    uint32_t width    = getSizeOfDimension(input, 2);
+    uint32_t channels = getSizeOfDimension(input, 3);
+
+    NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
+    output->type = input.type;
+    output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
+                          height * blockSizeData[0],
+                          width * blockSizeData[1],
+                          channels};
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool spaceToBatchPrepare(const Shape& input,
+                         const int32_t* blockSizeData,
+                         const Shape& blockSizeShape,
+                         const int32_t* paddingsData,
+                         const Shape& paddingsShape,
+                         Shape* output) {
+    // Only 4D NHWC tensors are supported.
+    NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
+
+    // blockSize need to be provided as a 1-D int32 tensor.
+    NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
+    // Only applies to spatial dimensions.
+    NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
+
+    // paddings need to be provided as a 2-D int32 tensor.
+    NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
+    NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
+    NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
+
+    uint32_t batches  = getSizeOfDimension(input, 0);
+    uint32_t height   = getSizeOfDimension(input, 1);
+    uint32_t width    = getSizeOfDimension(input, 2);
+    uint32_t channels = getSizeOfDimension(input, 3);
+
+    uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
+    uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
+
+    NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
+    NN_OPS_CHECK(paddedHeight % blockSizeData[1] == 0);
+
+    output->type = input.type;
+    output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
+                          paddedHeight / blockSizeData[0],
+                          paddedHeight / blockSizeData[1],
+                          channels};
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool squeezePrepare(const Shape& input,
+                    const int32_t* squeezeDims,
+                    const Shape& squeezeDimsShape,
+                    Shape* output) {
+    int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
+
+    // squeezeDims need to be provided as a 1-D int32 tensor.
+    NN_OPS_CHECK(squeezeDimsShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(squeezeDimsShape) == 1);
+
+    int32_t squeezeDimsSize = static_cast<int32_t>(getSizeOfDimension(squeezeDimsShape, 0));
+    std::vector<bool> shouldSqueeze(numInputDims, false);
+    int32_t numDimsSqueezed = 0;
+
+    if (squeezeDimsSize == 0) {
+        // If squeezeDimsSize is 0, all dims with value 1 will be squeezed.
+        for (int32_t idx = 0; idx < numInputDims; ++idx) {
+            if (getSizeOfDimension(input, idx) == 1) {
+                shouldSqueeze[idx] = true;
+                ++numDimsSqueezed;
+            }
+        }
+    } else {
+        for (int32_t idx = 0; idx < squeezeDimsSize; ++idx) {
+            int32_t current = squeezeDims[idx] < 0 ? squeezeDims[idx] + numInputDims
+                                               : squeezeDims[idx];
+            NN_OPS_CHECK(current >= 0 && current < numInputDims &&
+                         getSizeOfDimension(input, current) == 1);
+            if (!shouldSqueeze[current]) ++numDimsSqueezed;
+            shouldSqueeze[current] = true;
+      }
+    }
+
+    // Sets output dimensions.
+    std::vector<uint32_t> outDims(numInputDims - numDimsSqueezed);
+    for (int32_t inIdx = 0, outIdx = 0; inIdx < numInputDims; ++inIdx) {
+        if (!shouldSqueeze[inIdx]) {
+            outDims[outIdx++] = getSizeOfDimension(input, inIdx);
+        }
+    }
+
+    output->type = input.type;
+    output->dimensions = outDims;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool transposePrepare(const Shape& input,
+                      const int32_t* permData,
+                      const Shape& permShape,
+                      Shape* output) {
+    uint32_t numInputDims = getNumberOfDimensions(input);
+    // Transpose op only supports 1D-4D input arrays.
+    NN_OPS_CHECK(numInputDims <= 4);
+
+    // perm need to be provided as a 1-D int32 tensor.
+    NN_OPS_CHECK(permShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(permShape) == 1);
+    NN_OPS_CHECK(numInputDims == getSizeOfDimension(permShape, 0));
+
+    std::vector<uint32_t> outDims(numInputDims);
+    for (int32_t idx = 0; idx < static_cast<int32_t>(numInputDims); ++idx) {
+        NN_OPS_CHECK(permData[idx] >= 0 && permData[idx] < static_cast<int32_t>(numInputDims));
+        outDims[idx] = getSizeOfDimension(input, permData[idx]);
+    }
+
+    output->type = input.type;
+    output->dimensions = outDims;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool meanPrepare(const Shape& input,
+                 const int32_t* axisData,
+                 const Shape& axisShape,
+                 bool keepDims,
+                 Shape* output) {
+
+    // perm need to be provided as a 1-D int32 tensor.
+    NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
+
+    int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
+    int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
+
+    // Determines size of output tensor.
+    if (keepDims) {
+        std::vector<uint32_t> outDims(numInputDims);
+        for (int32_t idx = 0; idx < numInputDims; ++idx) {
+            bool isAxis = false;
+            for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
+                if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
+                    isAxis = true;
+                    break;
+                }
+            }
+            if (isAxis) {
+                outDims[idx] = 1;
+            } else {
+                outDims[idx] = getSizeOfDimension(input, idx);
+            }
+        }
+        output->dimensions = outDims;
+    } else {
+        // Calculates size of reducing axis.
+        int32_t numReduceAxis = axisSize;
+        for (int32_t i = 0; i < axisSize; ++i) {
+            int32_t current = axisData[i];
+            if (current < 0) {
+                current += numInputDims;
+            }
+            NN_OPS_CHECK(current >= 0 && current < numInputDims);
+            for (int32_t j = 0; j < i; ++j) {
+                int32_t previous = axisData[j];
+                if (previous < 0) {
+                    previous += numInputDims;
+                }
+                if (current == previous) {
+                    --numReduceAxis;
+                    break;
+                }
+            }
+        }
+        // Determines output dimensions.
+        std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
+        int32_t numSkipAxis = 0;
+        for (int32_t idx = 0; idx < numInputDims; ++idx) {
+            bool isAxis = false;
+            for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
+                if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
+                    ++numSkipAxis;
+                    isAxis = true;
+                    break;
+                }
+            }
+            if (!isAxis) {
+                outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
+            }
+        }
+        output->dimensions = outDims;
+    }
+
+    output->type = input.type;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
+
+bool stridedSlicePrepare(const Shape& input,
+                         const int32_t* beginData, const Shape& beginShape, int32_t beginMask,
+                         const int32_t* endData, const Shape& endShape, int32_t endMask,
+                         const int32_t* stridesData, const Shape& stridesShape,
+                         Shape* output) {
+    uint32_t numInputDims = getNumberOfDimensions(input);
+    // StridedSlice op only supports 1D-4D input arrays.
+    NN_OPS_CHECK(numInputDims <= 4);
+
+    NN_OPS_CHECK(getNumberOfDimensions(beginShape) == 1);
+    NN_OPS_CHECK(getNumberOfDimensions(endShape) == 1);
+    NN_OPS_CHECK(getNumberOfDimensions(stridesShape) == 1);
+
+    NN_OPS_CHECK(getSizeOfDimension(beginShape, 0) == numInputDims);
+    NN_OPS_CHECK(getSizeOfDimension(endShape, 0) == numInputDims);
+    NN_OPS_CHECK(getSizeOfDimension(stridesShape, 0) == numInputDims);
+
+    NN_OPS_CHECK(beginShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(endShape.type == OperandType::TENSOR_INT32);
+    NN_OPS_CHECK(stridesShape.type == OperandType::TENSOR_INT32);
+
+    // Determine size of output tensor and map indices
+    std::vector<uint32_t> outDims(numInputDims);
+    for (int32_t idx = static_cast<int32_t>(numInputDims) - 1; idx >= 0; --idx) {
+      int32_t dim = static_cast<int32_t>(getSizeOfDimension(input, idx));
+      int32_t stride = stridesData[idx];
+      // stride value has to be non-zero
+      NN_OPS_CHECK(stride != 0);
+      bool positiveStride = stride > 0;
+
+      int32_t begin = beginMask & (1 << idx)
+              ? positiveStride ? 0 : dim - 1
+              : ClampedIndex(beginData[idx], dim, positiveStride);
+      int32_t end = endMask & (1 << idx)
+              ? positiveStride ? dim : -1
+              : ClampedIndex(endData[idx], dim, positiveStride);
+
+      // This is valid for both positive and negative strides
+      int32_t outDim = ceil((end - begin) / static_cast<float>(stride));
+      outDims[idx] = outDim < 0 ? 0 : static_cast<uint32_t>(outDim);
+    }
+
+    output->type = input.type;
+    output->dimensions = outDims;
+    output->offset = input.offset;
+    output->scale = input.scale;
+
+    return true;
+}
 } // namespace nn
-} // namespace android
+} // namespace android
\ No newline at end of file