Implement NDK interface for dynamic output shape.

Implement the following NDK interfaces
- ANeuralNetworksExecution_getOutputOperandDimensions
- ANeuralNetworksExecution_getOutputOperandRank

Bug: 73506513
Test: NeuralNetworksTest_static
Change-Id: I3e0238ec701a0bffbdb5682ee1787df198fe9816
Merged-In: I3e0238ec701a0bffbdb5682ee1787df198fe9816
(cherry picked from commit d2442daafb99f04a1320d0b4bd45ecde9d983b05)
diff --git a/runtime/Callbacks.cpp b/runtime/Callbacks.cpp
index 93be77b..47ddeb4 100644
--- a/runtime/Callbacks.cpp
+++ b/runtime/Callbacks.cpp
@@ -129,7 +129,18 @@
     return mPreparedModel;
 }
 
-ExecutionCallback::ExecutionCallback() : mErrorStatus(ErrorStatus::GENERAL_FAILURE) {}
+ExecutionCallback::ExecutionCallback()
+    : mErrorStatus(ErrorStatus::GENERAL_FAILURE), mOnFinish(nullptr) {
+    on_finish([this]() {
+        if (mOnFinish != nullptr) {
+            ErrorStatus status = mOnFinish(mErrorStatus);
+            if (status != ErrorStatus::NONE) {
+                mErrorStatus = status;
+            }
+        }
+        return true;
+    });
+}
 
 ExecutionCallback::~ExecutionCallback() {}
 
diff --git a/runtime/Callbacks.h b/runtime/Callbacks.h
index 4cd9aa2..d87b135 100644
--- a/runtime/Callbacks.h
+++ b/runtime/Callbacks.h
@@ -268,7 +268,9 @@
  * IPreparedModel::execute.
  */
 class ExecutionCallback : public CallbackBase,  public IExecutionCallback {
- public:
+    using ExecutionFinish = std::function<ErrorStatus(ErrorStatus)>;
+
+   public:
     ExecutionCallback();
     ~ExecutionCallback() override;
 
@@ -362,9 +364,13 @@
      */
     const std::vector<OutputShape>& getOutputShapes();
 
+    // The callback will invoke finish(mErrorStatus) on finish.
+    void setOnFinish(const ExecutionFinish& finish) { mOnFinish = finish; }
+
    private:
     ErrorStatus mErrorStatus;
     std::vector<OutputShape> mOutputShapes;
+    ExecutionFinish mOnFinish;
 };
 
 
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index 5ad8b8f..c163f5b 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -216,6 +216,46 @@
                                          length);
 }
 
+int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
+    if (!mFinished) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
+                      "execution has finished.";
+        return ANEURALNETWORKS_BAD_STATE;
+    }
+    uint32_t count = static_cast<uint32_t>(mOutputs.size());
+    if (index >= count) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions bad index " << index
+                   << " " << count;
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    const auto& dims = mOutputs[index].dimensions;
+    if (dims.empty()) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions can not query "
+                      "dimensions of a scalar";
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    std::copy(dims.begin(), dims.end(), dimensions);
+    return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
+                                        : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
+}
+
+int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) {
+    if (!mFinished) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called before the "
+                      "execution has finished.";
+        return ANEURALNETWORKS_BAD_STATE;
+    }
+    uint32_t count = static_cast<uint32_t>(mOutputs.size());
+    if (index >= count) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank bad index " << index << " "
+                   << count;
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    *rank = static_cast<uint32_t>(mOutputs[index].dimensions.size());
+    return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
+                                        : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
+}
+
 // Attempt synchronous execution of full model on CPU.
 // Ensure that executionCallback->notify() is called.
 static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
@@ -350,6 +390,8 @@
         }
     }
 
+    auto wrappedFinish = [this](ErrorStatus error) { return finish(error); };
+
     // TODO: For asynchronous execution, entire plan-based-path should run in an
     // asynchronous thread -- take the asynchronous thread logic out of
     // startComputeOnCpu() and use it to wrap the plan-based-path.
@@ -358,6 +400,7 @@
     if (synchronous) {
         VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)";
         sp<ExecutionCallback> localSynchronizationCallback = new ExecutionCallback();
+        localSynchronizationCallback->setOnFinish(wrappedFinish);
         asyncStartComputePartitioned(this, mPlan, controller, allowFallback,
                                      localSynchronizationCallback);
         localSynchronizationCallback->wait();
@@ -371,6 +414,7 @@
         // nullptr is returned.  The executionCallback is
         // abstracted in the NN API as an "event".
         sp<ExecutionCallback> executionCallback = new ExecutionCallback();
+        executionCallback->setOnFinish(wrappedFinish);
         if (DeviceManager::get()->syncExecRuntime()) {
             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)";
             asyncStartComputePartitioned(this, mPlan, controller, allowFallback, executionCallback);
@@ -385,6 +429,12 @@
     }
 }
 
+ErrorStatus ExecutionBuilder::finish(ErrorStatus) {
+    CHECK(!mFinished) << "ExecutionBuilder::finish is calling twice";
+    mFinished = true;
+    return ErrorStatus::NONE;
+}
+
 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
 // it does not copy data.  Aligns each input a bit.
 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 9c1d44f..1999ae5 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -24,6 +24,7 @@
 #include "NeuralNetworks.h"
 #include "VersionedInterfaces.h"
 
+#include <atomic>
 #include <unordered_map>
 #include <vector>
 
@@ -55,6 +56,7 @@
     DataLocation locationAndLength;
     std::vector<uint32_t> dimensions;
     void* buffer;
+    bool isSufficient = true;
 
     int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
                        uint32_t length);
@@ -84,8 +86,13 @@
     }
     int computeSynchronously() { return compute(nullptr); }
 
+    int getOutputOperandDimensions(uint32_t index, uint32_t* dimensions);
+    int getOutputOperandRank(uint32_t index, uint32_t* rank);
+
     const ModelBuilder* getModel() const { return mModel; }
 
+    ErrorStatus finish(ErrorStatus error);
+
    private:
     // If a callback is provided, then this is asynchronous. If a callback is
     // not provided (i.e., is nullptr), then this is synchronous.
@@ -111,6 +118,9 @@
     std::vector<ModelArgumentInfo> mInputs;
     std::vector<ModelArgumentInfo> mOutputs;
     MemoryTracker mMemories;
+
+    // Output shapes can only be queried after the execution is finished.
+    std::atomic_bool mFinished = false;
 };
 
 // class StepExecutor is used to execute a single "step" in a
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 091d942..d103f9f 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -701,6 +701,28 @@
     delete r;
 }
 
+int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* execution,
+                                                  int32_t index, uint32_t* rank) {
+    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_getOutputOperandRank");
+    if (!execution || !rank) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+    ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
+    return r->getOutputOperandRank(index, rank);
+}
+
+int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution* execution,
+                                                        int32_t index, uint32_t* dimensions) {
+    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_getOutputOperandDimensions");
+    if (!execution || !dimensions) {
+        LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+    ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
+    return r->getOutputOperandDimensions(index, dimensions);
+}
+
 int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32_t index,
                                       const ANeuralNetworksOperandType* type, const void* buffer,
                                       size_t length) {
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 03cb14b..0cffd11 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -4534,6 +4534,54 @@
  */
 int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution);
 
+/**
+ * Get the dimensional information of the specified output operand of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
+ * the resources used by the execution.
+ *
+ * @param execution The execution to be queried.
+ * @param index The index of the output argument we are querying. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param rank The rank of the output operand.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE
+ *         if the target output is provided an insufficient buffer at execution time,
+ *         ANEURALNETWORKS_BAD_DATA if the index is invalid.
+ *
+ * Available since API level 29.
+ */
+int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* execution,
+                                                  int32_t index, uint32_t* rank);
+
+/**
+ * Get the dimensional information of the specified output operand of the model of the
+ * {@link ANeuralNetworksExecution}. The target output operand cannot be a scalar.
+ *
+ * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
+ * the resources used by the execution.
+ *
+ * @param execution The execution to be queried.
+ * @param index The index of the output argument we are querying. It is an index into the lists
+ *              passed to {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param dimensions The dimension array to be filled. The size of the array must be exactly as
+ *                   large as the rank of the output operand to be queried in the model.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE
+ *         if the target output is provided an insufficient buffer at execution time,
+ *         ANEURALNETWORKS_BAD_DATA if the index is invalid or if the target is a scalar.
+ *
+ * Available since API level 29.
+ */
+int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution* execution,
+                                                        int32_t index, uint32_t* dimensions);
+
 #endif  // __ANDROID_API__ >= __ANDROID_API_Q__
 
 #if __ANDROID_API__ >= 27
diff --git a/runtime/include/NeuralNetworksWrapper.h b/runtime/include/NeuralNetworksWrapper.h
index 9a3dd77..17e246f 100644
--- a/runtime/include/NeuralNetworksWrapper.h
+++ b/runtime/include/NeuralNetworksWrapper.h
@@ -396,6 +396,20 @@
 
     Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
 
+    Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
+        uint32_t rank = 0;
+        Result result = static_cast<Result>(
+                ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
+        dimensions->resize(rank);
+        if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
+            rank == 0) {
+            return result;
+        }
+        result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
+                mExecution, index, dimensions->data()));
+        return result;
+    }
+
    private:
     ANeuralNetworksExecution* mExecution = nullptr;
 };
diff --git a/runtime/libneuralnetworks.map.txt b/runtime/libneuralnetworks.map.txt
index e6af99a..8bf0f0a 100644
--- a/runtime/libneuralnetworks.map.txt
+++ b/runtime/libneuralnetworks.map.txt
@@ -52,6 +52,8 @@
     ANeuralNetworksExecution_setOutput;
     ANeuralNetworksExecution_setOutputFromMemory;
     ANeuralNetworksExecution_startCompute;
+    ANeuralNetworksExecution_getOutputOperandDimensions; # introduced=Q
+    ANeuralNetworksExecution_getOutputOperandRank; # introduced=Q
     ANeuralNetworksEvent_wait;
     ANeuralNetworksEvent_free;
   local:
diff --git a/runtime/test/TestExecution.cpp b/runtime/test/TestExecution.cpp
index 422af91..f7889b8 100644
--- a/runtime/test/TestExecution.cpp
+++ b/runtime/test/TestExecution.cpp
@@ -345,7 +345,7 @@
     float mOutputBuffer;
     const float kOutputBufferExpected = 3;
 
-private:
+   private:
     static WrapperModel makeModel() {
         static const WrapperOperandType tensorType(WrapperType::TENSOR_FLOAT32, { 1 });
 
@@ -374,6 +374,7 @@
         if (kExpectResult == Result::NO_ERROR) {
             ASSERT_EQ(mOutputBuffer, kOutputBufferExpected);
         }
+        std::vector<uint32_t> dimensions;
     }
     {
         SCOPED_TRACE("compute");
diff --git a/runtime/test/TestNeuralNetworksWrapper.h b/runtime/test/TestNeuralNetworksWrapper.h
index 7ae3370..6a0551b 100644
--- a/runtime/test/TestNeuralNetworksWrapper.h
+++ b/runtime/test/TestNeuralNetworksWrapper.h
@@ -422,6 +422,20 @@
     // computation to complete.
     static void setComputeUsesSynchronousAPI(bool val) { mComputeUsesSychronousAPI = val; }
 
+    Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
+        uint32_t rank = 0;
+        Result result = static_cast<Result>(
+                ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
+        dimensions->resize(rank);
+        if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
+            rank == 0) {
+            return result;
+        }
+        result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
+                mExecution, index, dimensions->data()));
+        return result;
+    }
+
    private:
     ANeuralNetworksExecution* mExecution = nullptr;
 
diff --git a/runtime/test/TestValidation.cpp b/runtime/test/TestValidation.cpp
index e87f067..4d155aa 100644
--- a/runtime/test/TestValidation.cpp
+++ b/runtime/test/TestValidation.cpp
@@ -689,6 +689,60 @@
     EXPECT_EQ(ANeuralNetworksEvent_wait(nullptr), ANEURALNETWORKS_UNEXPECTED_NULL);
 }
 
+TEST_F(ValidationTestExecution, GetOutputOperandRankAndDimensions) {
+    ANeuralNetworksExecution* execution;
+    EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution), ANEURALNETWORKS_NO_ERROR);
+
+    float input0 = 1.0f, input1 = 2.0f, output0;
+    int32_t input2 = 0;
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 0, nullptr, &input0, sizeof(float)),
+              ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 1, nullptr, &input1, sizeof(float)),
+              ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 2, nullptr, &input2, sizeof(int32_t)),
+              ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution, 0, nullptr, &output0, sizeof(float)),
+              ANEURALNETWORKS_NO_ERROR);
+
+    uint32_t rank, dims[4], expectedRank = 1, expectedDims = 1;
+    // This should fail, since the execution has not yet started to compute.
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(execution, 0, &rank),
+              ANEURALNETWORKS_BAD_STATE);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(execution, 0, dims),
+              ANEURALNETWORKS_BAD_STATE);
+
+    ANeuralNetworksEvent* event;
+    EXPECT_EQ(ANeuralNetworksExecution_startCompute(execution, &event), ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksEvent_wait(event), ANEURALNETWORKS_NO_ERROR);
+
+    // This should fail, since unexpected nullptr.
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(nullptr, 0, &rank),
+              ANEURALNETWORKS_UNEXPECTED_NULL);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(nullptr, 0, dims),
+              ANEURALNETWORKS_UNEXPECTED_NULL);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(execution, 0, nullptr),
+              ANEURALNETWORKS_UNEXPECTED_NULL);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(execution, 0, nullptr),
+              ANEURALNETWORKS_UNEXPECTED_NULL);
+
+    // This should fail, since the operand does not exist.
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(execution, -1, &rank),
+              ANEURALNETWORKS_BAD_DATA);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(execution, 999, &rank),
+              ANEURALNETWORKS_BAD_DATA);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(execution, -1, dims),
+              ANEURALNETWORKS_BAD_DATA);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(execution, 999, dims),
+              ANEURALNETWORKS_BAD_DATA);
+
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandRank(execution, 0, &rank),
+              ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksExecution_getOutputOperandDimensions(execution, 0, dims),
+              ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(rank, expectedRank);
+    EXPECT_EQ(dims[0], expectedDims);
+}
+
 TEST(ValidationTestIntrospection, GetNumDevices) {
     uint32_t numDevices = 0;
     EXPECT_EQ(ANeuralNetworks_getDeviceCount(&numDevices), ANEURALNETWORKS_NO_ERROR);