Support zero-sized tensor and check for omitted input in CpuExecutor.

Zero-sized tensors are only supported internally to the driver, or
reported in output shapes. It is illegal to pre-specify a zero-sized
tensor as model input or output.

To summarize the meanings of dimension = 0:
- Dimension = 0 for model input: dynamic input and must be fully-specified
at execution time
- Dimension = 0 for internal operand / model output: unknown, to be
deduced from execution
- Dimension = 0 from getOutputOperandDimensions:
  * If NO_ERROR, it is a zero-sized output
  * If OUTPUT_INSUFFICIENT_SIZE, it is unknown

Add two additional fields in OperationRegistration:
- allowOmittedOperand, if false, CpuExecutor will enforce inputs/outputs
are not null
- allowZeroSizedInput, if false, CpuExecutor will enforce every dimension
of inputs not being 0

The current implementation is assuming that none of the operations with
the old switch statement path support zero-sized input. Only the
operations with OperationResolver can support zero-sized input tensor.

All the operations with OperationResolver are reporting false for
allowZeroSizedInput. Will enable this on a small subset of operations with
separate CLs.

Bug: 126737477
Test: NeuralNetworksTest_static
Change-Id: Ia94d67e4c8c6a49b543d29ebb3b31d509ece0970
Merged-In: Ia94d67e4c8c6a49b543d29ebb3b31d509ece0970
(cherry picked from commit d305bbd09f750145e5d56b14ae268c2919a7cd3c)
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index 341ccb3..4075557 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -59,7 +59,13 @@
     bool setOutputShape(uint32_t index, const Shape& shape) override;
     int getResultCode() const;
 
-    bool isNullInput(uint32_t index) const override;
+    bool isOmittedInput(uint32_t index) const override;
+    bool isOmittedOutput(uint32_t index) const override;
+
+    // Return false if any of inputs or outputs is omitted, i.e. has lifetime of NO_VALUE.
+    bool checkNoOmittedOperand() const;
+    // Return false if any of inputs has dimension 0.
+    bool checkNoZeroSizedInput() const;
 
    private:
     const RunTimeOperandInfo* getInputInfo(uint32_t index) const;
@@ -198,10 +204,39 @@
     return setInfoAndAllocateIfNeeded(getOutputInfo(index), shape, &result);
 }
 
-bool OperationExecutionContext::isNullInput(uint32_t index) const {
+bool OperationExecutionContext::isOmittedInput(uint32_t index) const {
     return getInputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
 }
 
+bool OperationExecutionContext::isOmittedOutput(uint32_t index) const {
+    return getOutputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
+}
+
+bool OperationExecutionContext::checkNoOmittedOperand() const {
+    for (uint32_t i = 0; i < operation->inputs.size(); i++) {
+        NN_RET_CHECK(!isOmittedInput(i)) << getOperationName(operation->type) << " input operand "
+                                         << i << " is required but missing.";
+    }
+    for (uint32_t i = 0; i < operation->outputs.size(); i++) {
+        NN_RET_CHECK(!isOmittedOutput(i)) << getOperationName(operation->type) << " output operand "
+                                          << i << " is required but missing.";
+    }
+    return true;
+}
+
+bool OperationExecutionContext::checkNoZeroSizedInput() const {
+    for (uint32_t i = 0; i < operation->inputs.size(); i++) {
+        if (isOmittedInput(i)) continue;
+        for (uint32_t j = 0; j < getInputInfo(i)->dimensions.size(); j++) {
+            NN_RET_CHECK_NE(getInputInfo(i)->dimensions[j], 0)
+                    << getOperationName(operation->type)
+                    << " does not support zero-sized tensor, but input " << i << " dimension " << j
+                    << " is 0.";
+        }
+    }
+    return true;
+}
+
 }  // namespace
 
 // TODO: short term, make share memory mapping and updating a utility function.
@@ -586,8 +621,7 @@
             continue;
         }
         info.numberOfUsesLeft--;
-        if (info.numberOfUsesLeft == 0) {
-            nnAssert(info.buffer != nullptr);
+        if (info.numberOfUsesLeft == 0 && info.buffer != nullptr) {
             delete[] info.buffer;
             info.buffer = nullptr;
         }
@@ -625,7 +659,23 @@
             }
             return true;
         };
-        return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
+
+        auto verifyNoZeroSizedInputs = [&operation, this](const hidl_vec<uint32_t>& indexes) {
+            for (size_t i = 0; i < indexes.size(); i++) {
+                for (size_t j = 0; j < mOperands[indexes[i]].dimensions.size(); j++) {
+                    if (mOperands[indexes[i]].dimensions[j] == 0) {
+                        LOG(ERROR) << getOperationName(operation.type)
+                                   << " does not support zero-sized tensor, but input " << i
+                                   << " dimension " << j << " is zero.";
+                        return false;
+                    }
+                }
+            }
+            return true;
+        };
+
+        return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out") &&
+               verifyNoZeroSizedInputs(ins);
     };
 
     switch (operation.type) {
@@ -2302,7 +2352,9 @@
                               splitQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
                                           input.shape(), axis, &outputDataPtrs, outputShapes);
                 } break;
-                default: { return ANEURALNETWORKS_BAD_DATA; }
+                default: {
+                    return ANEURALNETWORKS_BAD_DATA;
+                }
             }
         } break;
         case OperationType::MAXIMUM:
@@ -2652,7 +2704,11 @@
                            << getOperationName(operation.type);
             } else {
                 OperationExecutionContext context(&operation, mOperands.data());
-                success = operationRegistration->prepare(&context) &&
+                success = operationRegistration->flags.allowOmittedOperand ||
+                          context.checkNoOmittedOperand();
+                success = success && (operationRegistration->flags.allowZeroSizedInput ||
+                                      context.checkNoZeroSizedInput());
+                success = success && operationRegistration->prepare(&context) &&
                           operationRegistration->execute(&context);
                 result = context.getResultCode();
             }
diff --git a/common/include/CpuExecutor.h b/common/include/CpuExecutor.h
index a3710e7..7a36ebd 100644
--- a/common/include/CpuExecutor.h
+++ b/common/include/CpuExecutor.h
@@ -39,6 +39,17 @@
     // change at runtime.  We include the type because it's useful
     // to pass together with the dimension to the functions implementing
     // the operators.
+    //
+    // A dimension being zero has different meanings for different operands at different stages:
+    // - Model inputs:
+    //   * Specified in model: implies "dynamic", and must be fully-specified in request.
+    //   * Specified in request: illegal.
+    // - Constant operands: illegal.
+    // - Model outputs and internal operands:
+    //   * Before evaluation: implies unknown and to be deduced from execution.
+    //   * After evaluation:
+    //     - If isSufficient reports true: the tensor is zero-sized.
+    //     - Otherwise: implies unknown.
     std::vector<uint32_t> dimensions;
 
     float scale;
diff --git a/common/include/OperationResolver.h b/common/include/OperationResolver.h
index 24b051c..9bac989 100644
--- a/common/include/OperationResolver.h
+++ b/common/include/OperationResolver.h
@@ -39,6 +39,24 @@
     // Executes the operation, reading from context->getInputBuffer(...)
     // and writing to context->getOutputBuffer(...).
     std::function<bool(IOperationExecutionContext*)> execute;
+
+    struct Flag {
+        // Whether the operation allows at least one operand to be omitted.
+        bool allowOmittedOperand = false;
+        // Whether the operation allows at least one input operand to be a zero-sized tensor.
+        bool allowZeroSizedInput = false;
+    } flags;
+
+    OperationRegistration(OperationType type, const char* name,
+                          std::function<bool(const IOperationValidationContext*)> validate,
+                          std::function<bool(IOperationExecutionContext*)> prepare,
+                          std::function<bool(IOperationExecutionContext*)> execute, Flag flags)
+        : type(type),
+          name(name),
+          validate(validate),
+          prepare(prepare),
+          execute(execute),
+          flags(flags) {}
 };
 
 // A registry of operation implementations.
@@ -82,32 +100,37 @@
 // OperationResolver.
 //
 // Usage:
-//   namespace android {
-//   namespace nn {
-//   namespace gather {
-//   ...
-//   }  // namespace gather
+// (check OperationRegistration::Flag for available fields and default values.)
 //
-//   NN_REGISTER_OPERATION(GATHER, gather::kOperationName, gather::validate,
-//                         gather::prepare, gather::execute);
-//   }  // namespace nn
-//   }  // namespace android
+// - With default flags.
+//   NN_REGISTER_OPERATION(FOO_OP, foo_op::kOperationName, foo_op::validate,
+//                         foo_op::prepare, foo_op::execute);
+//
+// - With a customized flag.
+//   NN_REGISTER_OPERATION(FOO_OP, foo_op::kOperationName, foo_op::validate,
+//                         foo_op::prepare, foo_op::execute, .allowZeroSizedInput = true);
+//
+// - With multiple customized flags.
+//   NN_REGISTER_OPERATION(FOO_OP, foo_op::kOperationName, foo_op::validate,
+//                         foo_op::prepare, foo_op::execute, .allowOmittedOperand = true,
+//                         .allowZeroSizedInput = true);
 //
 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
-#define NN_REGISTER_OPERATION(identifier, operationName, validate, prepare, execute)           \
-    const OperationRegistration* register_##identifier() {                                     \
-        static OperationRegistration registration = {OperationType::identifier, operationName, \
-                                                     validate, prepare, execute};              \
-        return &registration;                                                                  \
+#define NN_REGISTER_OPERATION(identifier, operationName, validate, prepare, execute, ...)     \
+    const OperationRegistration* register_##identifier() {                                    \
+        static OperationRegistration registration(OperationType::identifier, operationName,   \
+                                                  validate, prepare, execute, {__VA_ARGS__}); \
+        return &registration;                                                                 \
     }
 #else
 // This version ignores CPU execution logic (prepare and execute).
 // The compiler is supposed to omit that code so that only validation logic
 // makes it into libneuralnetworks_utils.
-#define NN_REGISTER_OPERATION(identifier, operationName, validate, unused_prepare, unused_execute) \
+#define NN_REGISTER_OPERATION(identifier, operationName, validate, unused_prepare, unused_execute, \
+                              ...)                                                                 \
     const OperationRegistration* register_##identifier() {                                         \
-        static OperationRegistration registration = {OperationType::identifier, operationName,     \
-                                                     validate, nullptr, nullptr};                  \
+        static OperationRegistration registration(OperationType::identifier, operationName,        \
+                                                  validate, nullptr, nullptr, {__VA_ARGS__});      \
         return &registration;                                                                      \
     }
 #endif
diff --git a/common/include/OperationsUtils.h b/common/include/OperationsUtils.h
index ebd48cf..46967a0 100644
--- a/common/include/OperationsUtils.h
+++ b/common/include/OperationsUtils.h
@@ -98,7 +98,8 @@
     // Updates the output shape, allocating the buffer if necessary.
     virtual bool setOutputShape(uint32_t index, const Shape& shape) = 0;
 
-    virtual bool isNullInput(uint32_t index) const = 0;
+    virtual bool isOmittedInput(uint32_t index) const = 0;
+    virtual bool isOmittedOutput(uint32_t index) const = 0;
 
     template <typename T>
     const T* getInputBuffer(uint32_t index) const {
diff --git a/common/operations/BidirectionalSequenceRNN.cpp b/common/operations/BidirectionalSequenceRNN.cpp
index 8b23b94..b1bbe25 100644
--- a/common/operations/BidirectionalSequenceRNN.cpp
+++ b/common/operations/BidirectionalSequenceRNN.cpp
@@ -96,7 +96,7 @@
     const T* auxInput = nullptr;
     const T* fwAuxWeights = nullptr;
     const T* bwAuxWeights = nullptr;
-    const bool hasAuxInputs = !context->isNullInput(kAuxInputTensor);
+    const bool hasAuxInputs = !context->isOmittedInput(kAuxInputTensor);
     if (hasAuxInputs) {
         auxInput = context->getInputBuffer<T>(kAuxInputTensor);
         fwAuxWeights = context->getInputBuffer<T>(kFwAuxWeightsTensor);
@@ -285,13 +285,14 @@
     Shape fwAuxWeights = context->getInputShape(kFwAuxWeightsTensor);
     Shape bwAuxWeights = context->getInputShape(kBwAuxWeightsTensor);
 
-    const bool auxInputsAllOrNone =
-            (context->isNullInput(kAuxInputTensor) && context->isNullInput(kFwAuxWeightsTensor) &&
-             context->isNullInput(kBwAuxWeightsTensor)) ||
-            (!context->isNullInput(kAuxInputTensor) && !context->isNullInput(kFwAuxWeightsTensor) &&
-             !context->isNullInput(kBwAuxWeightsTensor));
+    const bool auxInputsAllOrNone = (context->isOmittedInput(kAuxInputTensor) &&
+                                     context->isOmittedInput(kFwAuxWeightsTensor) &&
+                                     context->isOmittedInput(kBwAuxWeightsTensor)) ||
+                                    (!context->isOmittedInput(kAuxInputTensor) &&
+                                     !context->isOmittedInput(kFwAuxWeightsTensor) &&
+                                     !context->isOmittedInput(kBwAuxWeightsTensor));
     NN_RET_CHECK(auxInputsAllOrNone);
-    const bool hasAuxInputs = !context->isNullInput(kAuxInputTensor);
+    const bool hasAuxInputs = !context->isOmittedInput(kAuxInputTensor);
 
     int32_t timeMajor = context->getInputValue<bool>(kTimeMajorParam);
     const uint32_t batchSize =
@@ -370,7 +371,7 @@
 
 NN_REGISTER_OPERATION(BIDIRECTIONAL_SEQUENCE_RNN, "BIDIRECTIONAL_SEQUENCE_RNN",
                       bidirectional_sequence_rnn::validate, bidirectional_sequence_rnn::prepare,
-                      bidirectional_sequence_rnn::execute);
+                      bidirectional_sequence_rnn::execute, .allowOmittedOperand = true);
 
 }  // namespace nn
 }  // namespace android
diff --git a/common/operations/UnidirectionalSequenceLSTM.cpp b/common/operations/UnidirectionalSequenceLSTM.cpp
index c8732f1..cebc5a9 100644
--- a/common/operations/UnidirectionalSequenceLSTM.cpp
+++ b/common/operations/UnidirectionalSequenceLSTM.cpp
@@ -396,7 +396,7 @@
 
 NN_REGISTER_OPERATION(UNIDIRECTIONAL_SEQUENCE_LSTM, "UNIDIRECTIONAL_SEQUENCE_LSTM",
                       unidirectional_sequence_lstm::validate, unidirectional_sequence_lstm::prepare,
-                      unidirectional_sequence_lstm::execute);
+                      unidirectional_sequence_lstm::execute, .allowOmittedOperand = true);
 
 }  // namespace nn
 }  // namespace android