Implement dynamic output shape in CpuExecutor and Sample Driver.

Create interface for CpuExecutor to report output shapes after
execution.

Add a new ResultCode ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE for
dynamic output shape support.

Let Sample Driver notify output shape after computation.

Bug: 73506513
Test: NeuralNetworksTest_static
Test: VtsHalNeuralnetworksV1_xTargetTest with 1.2 sample driver
Change-Id: I1ee906b7af101e447b479bea96050d8bde7fa6f4
Merged-In: I1ee906b7af101e447b479bea96050d8bde7fa6f4
(cherry picked from commit 8918e6df3f0be871e7db71cb5f331bed3ca15df4)
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index 41d8bb5..9828da8 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -111,27 +111,53 @@
     return operation->outputs.size();
 }
 
+// TODO(xusongw): Return the correct error code.
 // Updates the RunTimeOperandInfo with the newly calculated shape.
 // Allocate the buffer if we need to.
 bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
     // For user-provided model output operands, the parameters must match the Shape
     // calculated from the preparation step.
     if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
-        NN_RET_CHECK(info->type == shape.type) << "Invalid type for model output";
-        NN_RET_CHECK(info->dimensions == shape.dimensions) << "Invalid dimensions for model output";
+        if (info->type != shape.type) {
+            LOG(ERROR) << "Invalid type for model output";
+            return false;
+        }
         if (info->type == OperandType::TENSOR_QUANT8_ASYMM) {
-            NN_RET_CHECK_EQ(info->scale, shape.scale) << "Invalid scale for model output";
-            NN_RET_CHECK_EQ(info->zeroPoint, shape.offset) << "Invalid zeroPoint for model output";
+            if (info->scale != shape.scale) {
+                LOG(ERROR) << "Invalid scale for model output";
+                return false;
+            }
+            if (info->zeroPoint != shape.offset) {
+                LOG(ERROR) << "Invalid zeroPoint for model output";
+                return false;
+            }
         }
     }
+
+    std::vector<uint32_t> combined;
+    if (!combineDimensions(shape.dimensions, info->dimensions, &combined)) {
+        LOG(ERROR) << "Invalid dimensions for model operand";
+        return false;
+    }
+    info->dimensions = combined;
     info->type = shape.type;
-    info->dimensions = shape.dimensions;
     info->scale = shape.scale;
     info->zeroPoint = shape.offset;
-    if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
-        uint32_t length = sizeOfData(info->type, info->dimensions);
+
+    // Allocate the buffer only if the combined dimension is fully specified
+    uint32_t length = sizeOfData(info->type, info->dimensions);
+    if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && length > 0 &&
+        info->buffer == nullptr) {
         info->buffer = new uint8_t[length];
-        NN_RET_CHECK(info->buffer != nullptr);
+        if (info->buffer == nullptr) {
+            return false;
+        }
+        info->length = length;
+    }
+    if (!info->isSufficient()) {
+        LOG(ERROR) << "Insufficient size for model operand: require = " << length
+                   << ", provided = " << info->length;
+        return false;
     }
     return true;
 }
@@ -266,6 +292,7 @@
     }
     return true;
 }
+
 template <typename T>
 inline bool convertToNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) {
     uint32_t spatialSize = fromDim[2] * fromDim[3];
@@ -368,6 +395,7 @@
     } else {
         Shape outShape = from.shape();
         to.buffer = from.buffer;
+        to.length = from.length;
         if (!setInfoAndAllocateIfNeeded(&to, outShape)) {
             return false;
         }
@@ -396,6 +424,7 @@
     for (const auto& operation : model.operations) {
         int n = executeOperation(operation);
         if (n != ANEURALNETWORKS_NO_ERROR) {
+            finish(n);
             return n;
         }
     }
@@ -405,8 +434,7 @@
     for (auto& runtimeInfo : requestPoolInfos) {
         runtimeInfo.update();
     }
-    mModel = nullptr;
-    mRequest = nullptr;
+    finish(ANEURALNETWORKS_NO_ERROR);
     VLOG(CPUEXE) << "Completed run normally";
     return ANEURALNETWORKS_NO_ERROR;
 }
@@ -477,11 +505,13 @@
             if (from.hasNoValue) {
                 to.lifetime = OperandLifeTime::NO_VALUE;
                 nnAssert(to.buffer == nullptr);
+                to.length = 0;
             } else {
                 auto poolIndex = from.location.poolIndex;
                 nnAssert(poolIndex < requestPoolInfos.size());
                 auto& r = requestPoolInfos[poolIndex];
                 to.buffer = r.getBuffer() + from.location.offset;
+                to.length = from.location.length;
             }
         }
     };
@@ -733,6 +763,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (useImplicitPadding) {
                 Shape inputShape = input_tmp.shape();
@@ -865,6 +896,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (useImplicitPadding) {
                 Shape inputShape = input_tmp.shape();
@@ -984,6 +1016,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
@@ -1084,6 +1117,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
@@ -1178,6 +1212,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
@@ -1567,6 +1602,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (!resizeBilinearPrepare(input_tmp.shape(), width, height, &outShape) ||
                 !setInfoAndAllocateIfNeeded(&output_tmp, outShape)) {
@@ -1610,6 +1646,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
             if (!depthToSpacePrepare(input_tmp.shape(), blockSize, &outShape) ||
                 !setInfoAndAllocateIfNeeded(&output_tmp, outShape)) {
                 break;
@@ -1666,6 +1703,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (!spaceToDepthPrepare(input_tmp.shape(), blockSize, &outShape) ||
                 !setInfoAndAllocateIfNeeded(&output_tmp, outShape)) {
@@ -1843,6 +1881,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (!batchToSpacePrepare(input_tmp.shape(),
                                      reinterpret_cast<const int32_t*>(blockSize.buffer),
@@ -1906,6 +1945,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (!spaceToBatchPrepare(
                         input_tmp.shape(), reinterpret_cast<const int32_t*>(blockSize.buffer),
@@ -2345,6 +2385,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (inCount == 9) {
                 Shape inputShape = input_tmp.shape();
@@ -2445,6 +2486,7 @@
             }
             output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
             output_tmp.buffer = data_layout ? nullptr : output.buffer;
+            output_tmp.length = data_layout ? 0 : output.length;
 
             if (inCount == 9) {
                 const RunTimeOperandInfo& outShape = mOperands[ins[3]];
@@ -2614,6 +2656,35 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+void CpuExecutor::finish(int result) {
+    // Free allocated temporary operands.
+    for (auto& info : mOperands) {
+        if (info.lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info.buffer != nullptr) {
+            delete[] info.buffer;
+            info.buffer = nullptr;
+        }
+    }
+
+    // Only report the output shapes when the result code is NO_ERROR or
+    // OUTPUT_INSUFFICIENT_SIZE.
+    if (result == ANEURALNETWORKS_NO_ERROR || result == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
+        const auto& outputs = mModel->outputIndexes;
+        mOutputShapes.resize(outputs.size());
+        for (uint32_t i = 0; i < outputs.size(); i++) {
+            const uint32_t operandIndex = outputs[i];
+            RunTimeOperandInfo& from = mOperands[operandIndex];
+            mOutputShapes[i].dimensions = from.dimensions;
+            mOutputShapes[i].isSufficient = from.isSufficient();
+        }
+    } else {
+        mOutputShapes.clear();
+    }
+
+    mModel = nullptr;
+    mRequest = nullptr;
+    mFinished = true;
+}
+
 // b/109953668, disable OpenMP
 #ifdef NNAPI_OPENMP
 ScopedOpenmpSettings::ScopedOpenmpSettings() {
diff --git a/common/OperationsUtils.cpp b/common/OperationsUtils.cpp
index 6f14fda..d2f2da2 100644
--- a/common/OperationsUtils.cpp
+++ b/common/OperationsUtils.cpp
@@ -78,13 +78,44 @@
 }
 
 bool SetShape(const Shape& in, Shape* out) {
-    if (in.type != out->type || in.dimensions.size() != out->dimensions.size()) {
+    if (in.type != out->type) {
         return false;
     }
     out->dimensions = in.dimensions;
     return true;
 }
 
+bool combineDimensions(const std::vector<uint32_t>& lhs, const std::vector<uint32_t>& rhs,
+                       std::vector<uint32_t>* combined) {
+    if (rhs.empty()) {
+        *combined = lhs;
+        return true;
+    }
+    if (lhs.empty()) {
+        *combined = rhs;
+        return true;
+    }
+    if (lhs.size() != rhs.size()) {
+        return false;
+    }
+    combined->resize(lhs.size());
+    for (uint32_t i = 0; i < lhs.size(); i++) {
+        if (lhs[i] == 0) {
+            (*combined)[i] = rhs[i];
+            continue;
+        }
+        if (rhs[i] == 0) {
+            (*combined)[i] = lhs[i];
+            continue;
+        }
+        if (lhs[i] != rhs[i]) {
+            return false;
+        }
+        (*combined)[i] = lhs[i];
+    }
+    return true;
+}
+
 uint32_t getNumberOfElements(const Shape& shape) {
     uint32_t count = 1;
     for (size_t i = 0; i < shape.dimensions.size(); i++) {
diff --git a/common/Utils.cpp b/common/Utils.cpp
index f766f88..0e13354 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -249,6 +249,10 @@
         return size;
     }
 
+    if (dimensions.empty()) {
+        return 0;
+    }
+
     for (auto d : dimensions) {
         size *= d;
     }
@@ -2419,6 +2423,9 @@
         case ANEURALNETWORKS_UNEXPECTED_NULL:
             return ErrorStatus::INVALID_ARGUMENT;
 
+        case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
+            return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
+
         default:
             LOG(ERROR) << "Unknown result code " << resultCode
                        << " mapped to ErrorStatus::GENERAL_FAILURE";
@@ -2440,13 +2447,15 @@
         case ErrorStatus::INVALID_ARGUMENT:
             return ANEURALNETWORKS_BAD_DATA;
 
+        case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
+            return ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
+
         default:
             LOG(ERROR) << "Unknown ErrorStatus " << toString(status)
                        << " mapped to ANEURALNETWORKS_OP_FAILED";
             return ANEURALNETWORKS_OP_FAILED;
         case ErrorStatus::DEVICE_UNAVAILABLE:
         case ErrorStatus::GENERAL_FAILURE:
-        case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
             return ANEURALNETWORKS_OP_FAILED;
     }
 }
diff --git a/common/ValidateHal.cpp b/common/ValidateHal.cpp
index 9e23955..a2512ef 100644
--- a/common/ValidateHal.cpp
+++ b/common/ValidateHal.cpp
@@ -112,7 +112,7 @@
 template <typename VersionedOperand>
 static bool validateOperands(const hidl_vec<VersionedOperand>& operands,
                              const hidl_vec<uint8_t>& operandValues,
-                             const hidl_vec<hidl_memory>& pools) {
+                             const hidl_vec<hidl_memory>& pools, bool allowUnspecifiedRank) {
     uint32_t index = 0;
     MemoryAccessVerifier poolVerifier(pools);
     for (auto& versionedOperand : operands) {
@@ -148,7 +148,9 @@
             case OperandType::TENSOR_BOOL8:
             case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
             case OperandType::TENSOR_OEM_BYTE: {
-                if (operand.dimensions.size() == 0) {
+                if ((!allowUnspecifiedRank || operand.lifetime == OperandLifeTime::CONSTANT_COPY ||
+                     operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE) &&
+                    operand.dimensions.size() == 0) {
                     LOG(ERROR) << "Operand " << index << ": Tensor has dimensions of rank 0";
                     return false;
                 }
@@ -424,8 +426,8 @@
     return true;
 }
 
-template<typename VersionedModel>
-static bool validateModelVersioned(const VersionedModel& model) {
+template <typename VersionedModel>
+static bool validateModelVersioned(const VersionedModel& model, bool allowUnspecifiedRank) {
     NNTRACE_FULL(NNTRACE_LAYER_UTILITY, NNTRACE_PHASE_UNSPECIFIED,
                  "validateModelVersioned");
     if (model.operations.size() == 0 || model.operands.size() == 0) {
@@ -435,7 +437,8 @@
     // We only need versioned operands for their validation. For all the other
     // validations we can use operands upcasted to the latest version.
     const hidl_vec<Operand> latestVersionOperands = convertToV1_2(model.operands);
-    return (validateOperands(model.operands, model.operandValues, model.pools) &&
+    return (validateOperands(model.operands, model.operandValues, model.pools,
+                             allowUnspecifiedRank) &&
             validateOperations(model.operations, latestVersionOperands) &&
             validateModelInputOutputs(model.inputIndexes, latestVersionOperands,
                                       OperandLifeTime::MODEL_INPUT) &&
@@ -445,15 +448,15 @@
 }
 
 bool validateModel(const V1_0::Model& model) {
-    return validateModelVersioned(model);
+    return validateModelVersioned(model, /*allowUnspecifiedRank=*/false);
 }
 
 bool validateModel(const V1_1::Model& model) {
-    return validateModelVersioned(model);
+    return validateModelVersioned(model, /*allowUnspecifiedRank=*/false);
 }
 
 bool validateModel(const V1_2::Model& model) {
-    return validateModelVersioned(model);
+    return validateModelVersioned(model, /*allowUnspecifiedRank=*/true);
 }
 
 // Validates the arguments of a request. type is either "input" or "output" and is used
@@ -462,7 +465,8 @@
 static bool validateRequestArguments(const hidl_vec<RequestArgument>& requestArguments,
                                      const hidl_vec<uint32_t>& operandIndexes,
                                      const hidl_vec<Operand>& operands,
-                                     const hidl_vec<hidl_memory>& pools, const char* type) {
+                                     const hidl_vec<hidl_memory>& pools, bool allowUnspecified,
+                                     const char* type) {
     MemoryAccessVerifier poolVerifier(pools);
     // The request should specify as many arguments as were described in the model.
     const size_t requestArgumentCount = requestArguments.size();
@@ -495,12 +499,14 @@
             // If the argument specified a dimension, validate it.
             uint32_t rank = requestArgument.dimensions.size();
             if (rank == 0) {
-                // Validate that all the dimensions are specified in the model.
-                for (size_t i = 0; i < operand.dimensions.size(); i++) {
-                    if (operand.dimensions[i] == 0) {
-                        LOG(ERROR) << "Model has dimension " << i
-                                   << " set to 0 but the request does specify the dimension.";
-                        return false;
+                if (!allowUnspecified) {
+                    // Validate that all the dimensions are specified in the model.
+                    for (size_t i = 0; i < operand.dimensions.size(); i++) {
+                        if (operand.dimensions[i] == 0) {
+                            LOG(ERROR) << "Model has dimension " << i
+                                       << " set to 0 but the request does specify the dimension.";
+                            return false;
+                        }
                     }
                 }
             } else {
@@ -520,7 +526,7 @@
                                    << " different than the model's " << operand.dimensions[i];
                         return false;
                     }
-                    if (requestArgument.dimensions[i] == 0) {
+                    if (requestArgument.dimensions[i] == 0 && !allowUnspecified) {
                         LOG(ERROR) << "Request " << type << " " << requestArgumentIndex
                                    << " has dimension " << i << " of zero";
                         return false;
@@ -532,25 +538,28 @@
     return true;
 }
 
-template<typename VersionedModel>
-static bool validateRequestVersioned(const Request& request, const VersionedModel& model) {
+template <typename VersionedModel>
+static bool validateRequestVersioned(const Request& request, const VersionedModel& model,
+                                     bool allowDynamicOutputShape) {
     return (validateRequestArguments(request.inputs, model.inputIndexes,
-                                     convertToV1_2(model.operands), request.pools, "input") &&
+                                     convertToV1_2(model.operands), request.pools,
+                                     /*allowUnspecified=*/false, "input") &&
             validateRequestArguments(request.outputs, model.outputIndexes,
-                                     convertToV1_2(model.operands), request.pools, "output") &&
+                                     convertToV1_2(model.operands), request.pools,
+                                     /*allowUnspecified=*/allowDynamicOutputShape, "output") &&
             validatePools(request.pools));
 }
 
 bool validateRequest(const Request& request, const V1_0::Model& model) {
-    return validateRequestVersioned(request, model);
+    return validateRequestVersioned(request, model, /*allowDynamicOutputShape=*/false);
 }
 
 bool validateRequest(const Request& request, const V1_1::Model& model) {
-    return validateRequestVersioned(request, model);
+    return validateRequestVersioned(request, model, /*allowDynamicOutputShape=*/false);
 }
 
 bool validateRequest(const Request& request, const V1_2::Model& model) {
-    return validateRequestVersioned(request, model);
+    return validateRequestVersioned(request, model, /*allowDynamicOutputShape=*/true);
 }
 
 bool validateExecutionPreference(ExecutionPreference preference) {
diff --git a/common/include/CpuExecutor.h b/common/include/CpuExecutor.h
index c9b10e6..3e00a60 100644
--- a/common/include/CpuExecutor.h
+++ b/common/include/CpuExecutor.h
@@ -60,6 +60,8 @@
     Shape shape() const {
         return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
     }
+
+    bool isSufficient() const { return length >= sizeOfData(type, dimensions); }
 };
 
 // Used to keep a pointer to each of the memory pools.
@@ -113,6 +115,11 @@
             const std::vector<RunTimePoolInfo>& modelPoolInfos,
             const std::vector<RunTimePoolInfo>& requestPoolInfos);
 
+    const std::vector<OutputShape>& getOutputShapes() const {
+        CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor.";
+        return mOutputShapes;
+    }
+
 private:
     bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
                                const std::vector<RunTimePoolInfo>& requestPoolInfos);
@@ -122,6 +129,10 @@
     // allocated for any temporary variable with a count of zero.
     void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
 
+    // Frees the memory allocated for any temporary variable, and sets the
+    // output operand shapes returning to the runtime.
+    void finish(int result);
+
     // The model and the request that we'll execute. Only valid while run()
     // is being executed.
     const Model* mModel = nullptr;
@@ -134,6 +145,12 @@
     //    std::vector<uint32_t> mDimensions;
     // Runtime information about all the operands.
     std::vector<RunTimeOperandInfo> mOperands;
+
+    // The output operand shapes returning to the runtime.
+    std::vector<OutputShape> mOutputShapes;
+
+    // Whether execution is finished and mOutputShapes is ready
+    bool mFinished = false;
 };
 
 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
diff --git a/common/include/OperationsUtils.h b/common/include/OperationsUtils.h
index 7c5e5eb..16423ed 100644
--- a/common/include/OperationsUtils.h
+++ b/common/include/OperationsUtils.h
@@ -199,6 +199,10 @@
 // Sets out to the same shape as in.
 bool SetShape(const Shape& in, Shape* out);
 
+// Combine two tensor dimensions, both can have unspecified dimensions.
+bool combineDimensions(const std::vector<uint32_t>& lhs, const std::vector<uint32_t>& rhs,
+                       std::vector<uint32_t>* combined);
+
 // Return the total number of elements, i.e. all the dimensions multiplied
 // together. For a scalar, returns one.
 uint32_t getNumberOfElements(const Shape& shape);
diff --git a/common/include/Utils.h b/common/include/Utils.h
index be0fb5c..713e931 100644
--- a/common/include/Utils.h
+++ b/common/include/Utils.h
@@ -88,7 +88,8 @@
     } while (0)
 
 // Returns the amount of space needed to store a value of the specified
-// dimensions and type.
+// dimensions and type. For a tensor with at least one
+// unspecified dimension, returns zero.
 uint32_t sizeOfData(OperandType type, const std::vector<uint32_t>& dimensions);
 
 // Returns the amount of space needed to store a value of the dimensions and