Add CPU fallback and remove non-plan driver path.

When we do ExecutionPlan-controlled execution, and
there is a failure at some stage of the execution,
fall back to cpu execution for that one stage (if
possible) or for the entire model (otherwise).

When we don't do ExecutionPlan-controlled execution,
we no longer attempt to find a suitable driver for
full-model execution -- instead, we execute the full
model on the cpu.  (Rationale: This avoids having
to implement cpu fallback outside ExecutionPlan-
controlled execution.)

Bug: 63905942
Test: mma (userdebug)
      ml/nn/runtime/tests (userdebug)
      (with debug.nn.partition.test 1,
       no new failures; hand-insert
       errors in the runtime in a few
       places to exercise some of the
       fallback paths, and verified that
       behavior is as expected)

Change-Id: I9ec8fba632f94e04840a786575d338d06fd2dcd3
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index ad9f96c..d43f6a3 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -182,31 +182,110 @@
                                          length);
 }
 
-static void asyncStartComputePartitioned(const ExecutionPlan* plan,
+// Attempt synchronous execution of full model on CPU.
+// Ensure that executionCallback->notify() is called.
+static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
+                            const sp<ExecutionCallback>& executionCallback) {
+    LOG(DEBUG) << "cpuFallbackFull";
+    StepExecutor executor(executionBuilder, executionBuilder->getModel(),
+                          nullptr /* no IDevice, so CPU */,
+                          nullptr /* no IPreparedModel */);
+    executor.mapInputsAndOutputsTrivially();
+    sp<ExecutionCallback> fallbackCallback;
+    if (executor.startCompute(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
+        executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
+        return;
+    }
+    fallbackCallback->wait();
+    executionCallback->notify(fallbackCallback->getStatus());
+}
+
+// Attempt synchronous execution on CPU.
+// (1) First, attempt to execute this step on CPU.  If successful,
+//     return true.  (Do not call executionCallback->notify().)
+// (2) If unsuccessful, attempt to execute the full model on CPU,
+//     ensure that executionCallback->notify() is called, and return
+//     false.
+static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
+                               const ExecutionPlan* plan,
+                               std::shared_ptr<ExecutionPlan::Controller> controller,
+                               const sp<ExecutionCallback>& executionCallback) {
+    LOG(DEBUG) << "cpuFallbackPartial";
+    std::shared_ptr<StepExecutor> executor;
+    int n = plan->fallback(controller, &executor);
+    if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
+        cpuFallbackFull(executionBuilder, executionCallback);
+        return false;
+    }
+    sp<ExecutionCallback> fallbackCallback;
+    if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
+        cpuFallbackFull(executionBuilder, executionCallback);
+        return false;
+    }
+    fallbackCallback->wait();
+    if (fallbackCallback->getStatus() != ErrorStatus::NONE) {
+        cpuFallbackFull(executionBuilder, executionCallback);
+        return false;
+    }
+    return true;
+}
+
+static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder,
+                                         const ExecutionPlan* plan,
                                          std::shared_ptr<ExecutionPlan::Controller> controller,
-                                         const sp<IExecutionCallback>& executionCallback) {
+                                         bool allowFallback,
+                                         const sp<ExecutionCallback>& executionCallback) {
     LOG(DEBUG) << "ExecutionBuilder::startCompute (from plan, iteratively)";
     while (true) {
         std::shared_ptr<StepExecutor> executor;
         LOG(DEBUG) << "looking for next StepExecutor";
         int n = plan->next(controller, &executor);
-        if (n != ANEURALNETWORKS_NO_ERROR || executor == nullptr) {
-            executionCallback->notify(
-                n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE);
+        if (n != ANEURALNETWORKS_NO_ERROR) {
+            if (allowFallback) {
+                cpuFallbackFull(executionBuilder, executionCallback);
+            } else {
+                executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
+            }
+            return;
+        }
+        if (executor == nullptr) {
+            executionCallback->notify(ErrorStatus::NONE);
             return;
         }
 
         sp<ExecutionCallback> stepCallback;
         n = executor->startCompute(&stepCallback);
         if (n != ANEURALNETWORKS_NO_ERROR) {
-            executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
-            return;
+            if (allowFallback) {
+                if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
+                    // Successfully executed one step on CPU.
+                    continue;
+                } else {
+                    // Either successfully executed entire plan on
+                    // CPU, or tried and failed to do so.
+                    return;
+                }
+            } else {
+                executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
+                return;
+            }
         }
         stepCallback->wait();
         ErrorStatus status = stepCallback->getStatus();
         if (status != ErrorStatus::NONE) {
-            executionCallback->notify(status);
-            return;
+            if (allowFallback) {
+                if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
+                    // Successfully executed one step on CPU.
+                    continue;
+                } else {
+                    // Either successfully executed entire plan on
+                    // CPU, or tried and failed to do so.
+                    return;
+                }
+            } else {
+                executionCallback->notify(status);
+                return;
+            }
         }
     }
 }
@@ -230,53 +309,69 @@
         }
     }
 
-    // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
-    // with the compilation and execution phases of the NN API?  Or retain that path
-    // as a fallback in the case of partitioning failure?
-    //
-    // TODO: Entire plan-based-path should run in an asynchronous thread --
-    // take the asynchronous thread logic out of startComputeOnCpu() and use
-    // it to wrap the plan-based-path.
-    const uint32_t partitioning = DeviceManager::get()->getPartitioning();
-    if (partitioning > 0) {
-        std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
-        if (controller == nullptr) {
-            if (!DeviceManager::partitioningAllowsFallback(partitioning)) {
-                return ANEURALNETWORKS_OP_FAILED;
+#ifndef DISABLE_PARTITIONED_EXECUTION
+    {
+        // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
+        // with the compilation and execution phases of the NN API?  Or retain that path
+        // as a fallback in the case of partitioning failure?
+        //
+        // TODO: Entire plan-based-path should run in an asynchronous thread --
+        // take the asynchronous thread logic out of startComputeOnCpu() and use
+        // it to wrap the plan-based-path.
+        const uint32_t partitioning = DeviceManager::get()->getPartitioning();
+        if (partitioning > 0) {
+            const bool allowFallback = DeviceManager::partitioningAllowsFallback(partitioning);
+            std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
+            if (controller == nullptr) {
+                if (!allowFallback) {
+                    return ANEURALNETWORKS_OP_FAILED;
+                }
+            } else {
+                // TODO: use a thread pool
+
+                // Prepare the callback for asynchronous execution.
+                // sp<ExecutionCallback> object is returned when the
+                // execution has been successfully launched, otherwise a
+                // nullptr is returned.  The executionCallback is
+                // abstracted in the NN API as an "event".
+                sp<ExecutionCallback> executionCallback = new ExecutionCallback();
+                std::thread thread(asyncStartComputePartitioned, this, mPlan, controller,
+                                   allowFallback,
+                                   executionCallback);
+                executionCallback->bind_thread(std::move(thread));
+                *synchronizationCallback = executionCallback;
+                return ANEURALNETWORKS_NO_ERROR;
             }
-        } else {
-            // TODO: use a thread pool
-
-            // Prepare the callback for asynchronous execution.
-            // sp<ExecutionCallback> object is returned when the
-            // execution has been successfully launched, otherwise a
-            // nullptr is returned.  The executionCallback is
-            // abstracted in the NN API as an "event".
-            sp<ExecutionCallback> executionCallback = new ExecutionCallback();
-            std::thread thread(asyncStartComputePartitioned, mPlan, controller, executionCallback);
-            executionCallback->bind_thread(std::move(thread));
-            *synchronizationCallback = executionCallback;
-            return ANEURALNETWORKS_NO_ERROR;
         }
     }
-
-    // Find a driver that can handle all the operations.
-    Model hidlModel;
-    mModel->setHidlModel(&hidlModel);
-    const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
-    for (const auto& device : devices) {
-        hidl_vec<bool> supports;
-        LOG(DEBUG) << "Checking " << device->getName();
-        device->getSupportedOperations(hidlModel, &supports);
-        if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
-            LOG(DEBUG) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
-            StepExecutor executor(this, mModel, device->getInterface(),
-                                  nullptr /* no IPreparedModel, so compile */);
-            executor.mapInputsAndOutputsTrivially();
-            return executor.startCompute(synchronizationCallback);
+#else
+    {
+        // Find a driver that can handle all the operations.
+        // TODO: Does not handle CPU fallback (which is tricky because
+        //       StepExecutor::startCompute() is designed as
+        //       asynchronous).
+        // TODO: Does not actually behave asynchronously (because
+        //       StepExecutor::startCompute() isn't actually asynchronous
+        //       on a device as opposed to a CPU).
+        Model hidlModel;
+        mModel->setHidlModel(&hidlModel);
+        const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
+        for (const auto& device : devices) {
+            hidl_vec<bool> supports;
+            LOG(DEBUG) << "Checking " << device->getName();
+            device->getSupportedOperations(hidlModel, &supports);
+            if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
+                LOG(DEBUG) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
+                StepExecutor executor(this, mModel, device->getInterface(),
+                                      nullptr /* no IPreparedModel, so compile */);
+                executor.mapInputsAndOutputsTrivially();
+                return executor.startCompute(synchronizationCallback);
+            }
         }
     }
-    // If none can, run on the CPU.
+#endif  // DISABLE_PARTITIONED_EXECUTION
+
+    // Run on the CPU.
     LOG(DEBUG) << "ExecutionBuilder::startCompute (without plan) on CPU";
     StepExecutor executor(this, mModel,
                           nullptr /* no IDevice, so CPU */,
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 88e3759..8fd1f01 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -147,12 +147,18 @@
                                                    &mOutputs.at(outputIndex));
     }
 
+    // Executes using the (driver, preparedModel) specified at construction time.
     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
 
+    // Executes using the CPU, regardless of the (driver,
+    // preparedModel) specified at construction time.
+    int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
+
+    bool isCpu() const { return mDriver == nullptr; }
+
 private:
     int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
     int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
-    int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
 
     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
                           ModelArgumentInfo* executorInputOrOutput);
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index c969fe9..8369f8f 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -468,6 +468,29 @@
                                                       totalSizeOfTemporaries));
 }
 
+
+// TODO: Find a better way to provide this functionality.
+int ExecutionPlan::fallback(std::shared_ptr<Controller> controller,
+                            std::shared_ptr<StepExecutor>* executor) const {
+    *executor = nullptr;
+
+    LOG(DEBUG) << "ExecutionPlan::fallback(" << controller << ", " << executor
+               << "): mNextStepIndex = " << controller->mNextStepIndex;
+
+    if (controller->mNextStepIndex == 0) {
+        // We haven't called next().
+        return ANEURALNETWORKS_OP_FAILED;
+    }
+
+    if (controller->mNextStepIndex == Controller::kBadStepIndex) {
+        // The last call to next() did not produce an executor.
+        return ANEURALNETWORKS_OP_FAILED;
+    }
+
+    --controller->mNextStepIndex;
+    return next(controller, executor);
+}
+
 int ExecutionPlan::next(std::shared_ptr<Controller> controller,
                         std::shared_ptr<StepExecutor>* executor) const {
     *executor = nullptr;
diff --git a/runtime/ExecutionPlan.h b/runtime/ExecutionPlan.h
index 980445d..0cc6051 100644
--- a/runtime/ExecutionPlan.h
+++ b/runtime/ExecutionPlan.h
@@ -172,6 +172,9 @@
 
     int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
 
+    // Create the same executor as the last one created by next().
+    int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
+
     std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);
 
     void becomeSingleStep(const std::shared_ptr<Device> device,