Manage inter-partition temporaries.

This means: Allocate a Memory object for each TEMPORARY in the original
model that is live across partition boundaries; and tell each
StepExecutor about the mapping from a submodel input or output to the
appropriate Memory object.

With these changes, we can now fully execute multiple-partition plans.

Limitations:
- Still under control of debug.nn.partition.test property.
- Cannot handle operands of unknown size, except for model (not
  submodel) inputs and outputs.
- Execution is synchronous.

Also: Teach sizeOfData() to work on scalars.

Bug: 63905942
Test: mma (user)
      mma (userdebug)
      ml/nn/runtime/tests (userdebug)
      (with debug.nn.partition.test 0, 1, and 2;
       no new failures, logcat looks plausible,
       confirmed that GeneratedTests.mobilenet
       runs on multiple partitions)

Change-Id: I58b763bc68bf8fe2e0306610c775b854e9292f76
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index d2e64e6..7b4aacc 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -56,6 +56,15 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex) {
+    locationAndDimension.dimensions = operand.dimensions;
+    state = ModelArgumentInfo::MEMORY;
+    locationAndDimension.location =
+            {.poolIndex = poolIndex, .offset = 0, .length = sizeOfData(operand)};
+    buffer = nullptr;
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
                                            const ANeuralNetworksOperandType* newType) {
     if (newType == nullptr) {
@@ -107,6 +116,8 @@
 
 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
                                          const Memory* memory, size_t offset, size_t length) {
+    // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
+
     uint32_t count = static_cast<uint32_t>(mInputs.size());
     if (index >= count) {
         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
@@ -145,6 +156,8 @@
 
 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
                                           const Memory* memory, size_t offset, size_t length) {
+    // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
+
     uint32_t count = static_cast<uint32_t>(mOutputs.size());
     if (index >= count) {
         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
@@ -327,6 +340,17 @@
     }
 }
 
+int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
+                                                      const Memory* memory,
+                                                      ModelArgumentInfo* inputOrOutputInfo) {
+    // Should be similar to
+    //     ExecutionBuilder::setInputFromMemory()
+    //     ExecutionBuilder::setOutputFromMemory()
+
+    uint32_t poolIndex = mMemories.add(memory);
+    return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex);
+}
+
 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
     if (mDriver == nullptr) {
         return startComputeOnCpu(synchronizationCallback);