Implement QoS in NNAPI

Bug: 136739795
Bug: 142902514
Bug: 145300530
Test: mma
Test: CtsNNAPITestCases
Test: NeuralNetworksTest_static
Change-Id: I9b4ed67102b6b1fae2b2ef50ddf746ed912163cc
Merged-In: I9b4ed67102b6b1fae2b2ef50ddf746ed912163cc
(cherry picked from commit 699ffdacfca7a42c059dc6f581eec913d74be9b3)
diff --git a/common/Utils.cpp b/common/Utils.cpp
index f753a16..368ef47 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -90,6 +90,29 @@
     }
 }
 
+static std::pair<int, OptionalTimePoint> makeTimePoint(uint64_t duration) {
+    const auto currentTime = std::chrono::steady_clock::now();
+    const auto currentTimeInNanoseconds =
+            std::chrono::time_point_cast<std::chrono::nanoseconds>(currentTime);
+    const uint64_t nanosecondsSinceEpoch = currentTimeInNanoseconds.time_since_epoch().count();
+
+    // check for overflow
+    if (std::numeric_limits<uint64_t>::max() - nanosecondsSinceEpoch < duration) {
+        LOG(ERROR) << "Launching execution failed due to time point overflow";
+        return {ANEURALNETWORKS_BAD_DATA, {}};
+    }
+    const uint64_t nanosecondsAtTimeout = nanosecondsSinceEpoch + duration;
+
+    OptionalTimePoint otp;
+    otp.nanoseconds(nanosecondsAtTimeout);
+    return {ANEURALNETWORKS_NO_ERROR, otp};
+}
+
+std::pair<int, OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration) {
+    const std::pair<int, OptionalTimePoint> empty = {ANEURALNETWORKS_NO_ERROR, {}};
+    return duration.has_value() ? makeTimePoint(*duration) : empty;
+}
+
 static bool isExtensionOperandType(int32_t type) {
     return static_cast<uint32_t>(type) > static_cast<uint32_t>(OperandTypeRange::BASE_MAX);
 }
diff --git a/common/ValidateHal.cpp b/common/ValidateHal.cpp
index b99518b..74e2d7b 100644
--- a/common/ValidateHal.cpp
+++ b/common/ValidateHal.cpp
@@ -691,6 +691,10 @@
            preference == ExecutionPreference::SUSTAINED_SPEED;
 }
 
+bool validatePriority(Priority priority) {
+    return priority == Priority::LOW || priority == Priority::MEDIUM || priority == Priority::HIGH;
+}
+
 bool validOperandType(V1_0::OperandType operandType) {
     switch (operandType) {
         case V1_0::OperandType::FLOAT32:
diff --git a/common/include/Utils.h b/common/include/Utils.h
index 2d341ef..0bb4d12 100644
--- a/common/include/Utils.h
+++ b/common/include/Utils.h
@@ -131,6 +131,12 @@
 #define NN_RET_CHECK_GE(x, y) NN_RET_CHECK_OP(x, y, >=)
 #define NN_RET_CHECK_GT(x, y) NN_RET_CHECK_OP(x, y, >)
 
+// Make an optional time point from an optional duration. If the operation
+// succeeds, a pair of {ANEURALNETWORKS_NO_ERROR, timepoint} is returned. If an
+// overflow occurs in this function, {ANEURALNETWORKS_BAD_DATA, empty} is
+// returned.
+std::pair<int, hal::OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration);
+
 // Ensure that every user of FalseyErrorStream is linked to the
 // correct instance, using the correct LOG_TAG
 namespace {
diff --git a/common/include/ValidateHal.h b/common/include/ValidateHal.h
index 733c8b9..98d0653 100644
--- a/common/include/ValidateHal.h
+++ b/common/include/ValidateHal.h
@@ -53,6 +53,9 @@
 // Verfies that the execution preference is valid.
 bool validateExecutionPreference(hal::ExecutionPreference preference);
 
+// Verfies that the priority is valid.
+bool validatePriority(hal::Priority priority);
+
 bool validOperationType(hal::V1_0::OperationType operation);
 bool validOperationType(hal::V1_1::OperationType operation);
 bool validOperationType(hal::V1_2::OperationType operation);
diff --git a/driver/sample/SampleDriver.cpp b/driver/sample/SampleDriver.cpp
index 132b457..f2a94e7 100644
--- a/driver/sample/SampleDriver.cpp
+++ b/driver/sample/SampleDriver.cpp
@@ -293,7 +293,7 @@
 template <typename T_IExecutionCallback>
 ErrorStatus executeBase(const Request& request, MeasureTiming measure, const Model& model,
                         const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
-                        const OptionalTimePoint& /*deadline*/,
+                        const OptionalTimePoint& deadline,
                         const sp<T_IExecutionCallback>& callback) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION, "SampleDriver::executeBase");
     VLOG(DRIVER) << "executeBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
@@ -309,6 +309,10 @@
         notify(callback, ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
         return ErrorStatus::INVALID_ARGUMENT;
     }
+    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+        notify(callback, ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
+        return ErrorStatus::INVALID_ARGUMENT;
+    }
 
     // This thread is intentionally detached because the sample driver service
     // is expected to live forever.
@@ -343,7 +347,7 @@
 static std::tuple<ErrorStatus, hidl_vec<OutputShape>, Timing> executeSynchronouslyBase(
         const Request& request, MeasureTiming measure, const Model& model,
         const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
-        const OptionalTimePoint& /*deadline*/) {
+        const OptionalTimePoint& deadline) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SampleDriver::executeSynchronouslyBase");
     VLOG(DRIVER) << "executeSynchronouslyBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
@@ -354,6 +358,9 @@
     if (!validateRequest(request, model)) {
         return {ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
     }
+    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+        return {ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
+    }
 
     NNTRACE_FULL_SWITCH(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_INPUTS_AND_OUTPUTS,
                         "SampleDriver::executeSynchronouslyBase");
@@ -509,7 +516,7 @@
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SampleDriver::configureExecutionBurst");
 
-    const bool preferPowerOverLatency = (kPreference == hal::ExecutionPreference::LOW_POWER);
+    const bool preferPowerOverLatency = (kPreference == ExecutionPreference::LOW_POWER);
     const auto pollingTimeWindow =
             (preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
 
diff --git a/driver/sample/SampleDriver.h b/driver/sample/SampleDriver.h
index 4163113..b463345 100644
--- a/driver/sample/SampleDriver.h
+++ b/driver/sample/SampleDriver.h
@@ -17,6 +17,8 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_DRIVER_SAMPLE_SAMPLE_DRIVER_H
 #define ANDROID_FRAMEWORKS_ML_NN_DRIVER_SAMPLE_SAMPLE_DRIVER_H
 
+#include <hwbinder/IPCThreadState.h>
+
 #include <string>
 #include <vector>
 
@@ -42,7 +44,6 @@
         : mName(name), mOperationResolver(operationResolver) {
         android::nn::initVLogMask();
     }
-    ~SampleDriver() override {}
     hal::Return<void> getCapabilities(getCapabilities_cb cb) override;
     hal::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb cb) override;
     hal::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb cb) override;
@@ -104,9 +105,15 @@
 class SamplePreparedModel : public hal::IPreparedModel {
    public:
     SamplePreparedModel(const hal::Model& model, const SampleDriver* driver,
-                        hal::ExecutionPreference preference)
-        : mModel(model), mDriver(driver), kPreference(preference) {}
-    ~SamplePreparedModel() override {}
+                        hal::ExecutionPreference preference, uid_t userId, hal::Priority priority)
+        : mModel(model),
+          mDriver(driver),
+          kPreference(preference),
+          kUserId(userId),
+          kPriority(priority) {
+        (void)kUserId;
+        (void)kPriority;
+    }
     bool initialize();
     hal::Return<hal::V1_0::ErrorStatus> execute(
             const hal::V1_0::Request& request,
@@ -136,6 +143,8 @@
     const SampleDriver* mDriver;
     std::vector<RunTimePoolInfo> mPoolInfos;
     const hal::ExecutionPreference kPreference;
+    const uid_t kUserId;
+    const hal::Priority kPriority;
 };
 
 }  // namespace sample_driver
diff --git a/driver/sample/SampleDriverUtils.h b/driver/sample/SampleDriverUtils.h
index b40b040..77db00b 100644
--- a/driver/sample/SampleDriverUtils.h
+++ b/driver/sample/SampleDriverUtils.h
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <hwbinder/IPCThreadState.h>
+
 #include <thread>
 
 #include "HalInterfaces.h"
@@ -43,10 +45,11 @@
 
 template <typename T_Model, typename T_IPreparedModelCallback>
 hal::ErrorStatus prepareModelBase(const T_Model& model, const SampleDriver* driver,
-                                  hal::ExecutionPreference preference, hal::Priority /*priority*/,
-                                  const hal::OptionalTimePoint& /*deadline*/,
+                                  hal::ExecutionPreference preference, hal::Priority priority,
+                                  const hal::OptionalTimePoint& deadline,
                                   const sp<T_IPreparedModelCallback>& callback,
                                   bool isFullModelSupported = true) {
+    const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
     if (callback.get() == nullptr) {
         LOG(ERROR) << "invalid callback passed to prepareModelBase";
         return hal::ErrorStatus::INVALID_ARGUMENT;
@@ -55,7 +58,8 @@
         VLOG(DRIVER) << "prepareModelBase";
         logModelToInfo(model);
     }
-    if (!validateModel(model) || !validateExecutionPreference(preference)) {
+    if (!validateModel(model) || !validateExecutionPreference(preference) ||
+        !validatePriority(priority)) {
         notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
         return hal::ErrorStatus::INVALID_ARGUMENT;
     }
@@ -63,10 +67,14 @@
         notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
         return hal::ErrorStatus::NONE;
     }
+    if (deadline.getDiscriminator() != hal::OptionalTimePoint::hidl_discriminator::none) {
+        notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
+        return hal::ErrorStatus::INVALID_ARGUMENT;
+    }
     // asynchronously prepare the model from a new, detached thread
-    std::thread([model, driver, preference, callback] {
+    std::thread([model, driver, preference, userId, priority, callback] {
         sp<SamplePreparedModel> preparedModel =
-                new SamplePreparedModel(convertToV1_3(model), driver, preference);
+                new SamplePreparedModel(convertToV1_3(model), driver, preference, userId, priority);
         if (!preparedModel->initialize()) {
             notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
             return;
diff --git a/runtime/Callbacks.cpp b/runtime/Callbacks.cpp
index 6655a1a..6a81b9c 100644
--- a/runtime/Callbacks.cpp
+++ b/runtime/Callbacks.cpp
@@ -32,7 +32,7 @@
 
 // PreparedModelCallback methods begin here
 
-Return<void> PreparedModelCallback::notifyInternal(ErrorStatus errorStatus,
+Return<void> PreparedModelCallback::notifyInternal(bool deadObject, ErrorStatus errorStatus,
                                                    const sp<V1_0::IPreparedModel>& preparedModel) {
     {
         std::lock_guard<std::mutex> hold(mMutex);
@@ -43,6 +43,7 @@
         }
 
         // store results and mark as notified
+        mDeadObject = deadObject;
         mErrorStatus = errorStatus;
         mPreparedModel = preparedModel;
         mNotified = true;
@@ -54,17 +55,21 @@
 
 Return<void> PreparedModelCallback::notify(V1_0::ErrorStatus errorStatus,
                                            const sp<V1_0::IPreparedModel>& preparedModel) {
-    return notifyInternal(static_cast<ErrorStatus>(errorStatus), preparedModel);
+    return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), preparedModel);
 }
 
 Return<void> PreparedModelCallback::notify_1_2(V1_0::ErrorStatus errorStatus,
                                                const sp<V1_2::IPreparedModel>& preparedModel) {
-    return notifyInternal(static_cast<ErrorStatus>(errorStatus), preparedModel);
+    return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), preparedModel);
 }
 
 Return<void> PreparedModelCallback::notify_1_3(ErrorStatus errorStatus,
                                                const sp<V1_3::IPreparedModel>& preparedModel) {
-    return notifyInternal(errorStatus, preparedModel);
+    return notifyInternal(false, errorStatus, preparedModel);
+}
+
+void PreparedModelCallback::notifyAsDeadObject() {
+    notifyInternal(true, ErrorStatus::GENERAL_FAILURE, nullptr);
 }
 
 void PreparedModelCallback::wait() const {
@@ -82,22 +87,31 @@
     return mPreparedModel;
 }
 
+bool PreparedModelCallback::isDeadObject() const {
+    wait();
+    return mDeadObject;
+}
+
 // ExecutionCallback methods begin here
 
 Return<void> ExecutionCallback::notify(V1_0::ErrorStatus errorStatus) {
-    return notifyInternal(static_cast<ErrorStatus>(errorStatus), {}, kNoTiming);
+    return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), {}, kNoTiming);
 }
 
 Return<void> ExecutionCallback::notify_1_2(V1_0::ErrorStatus errorStatus,
                                            const hidl_vec<OutputShape>& outputShapes,
                                            const Timing& timing) {
-    return notifyInternal(static_cast<ErrorStatus>(errorStatus), outputShapes, timing);
+    return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), outputShapes, timing);
 }
 
 Return<void> ExecutionCallback::notify_1_3(V1_3::ErrorStatus errorStatus,
                                            const hidl_vec<OutputShape>& outputShapes,
                                            const Timing& timing) {
-    return notifyInternal(errorStatus, outputShapes, timing);
+    return notifyInternal(false, errorStatus, outputShapes, timing);
+}
+
+void ExecutionCallback::notifyAsDeadObject() {
+    notifyInternal(true, ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
 }
 
 void ExecutionCallback::wait() const {
@@ -135,6 +149,11 @@
     return mTiming;
 }
 
+bool ExecutionCallback::isDeadObject() const {
+    wait();
+    return mDeadObject;
+}
+
 bool ExecutionCallback::bindThread(std::thread asyncThread) {
     std::lock_guard<std::mutex> lock(mMutex);
 
@@ -180,25 +199,30 @@
     mOnFinish = finish;
 }
 
-Return<void> ExecutionCallback::notifyInternal(ErrorStatus errorStatus,
-                                               hidl_vec<OutputShape> outputShapes, Timing timing) {
+Return<void> ExecutionCallback::notifyInternal(bool deadObject, ErrorStatus errorStatus,
+                                               std::vector<OutputShape> outputShapes,
+                                               Timing timing) {
     // check results
-    if (errorStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
-        // outputShapes must not be empty if OUTPUT_INSUFFICIENT_SIZE.
-        if (outputShapes.size() == 0) {
-            LOG(ERROR) << "Notified with empty output shape vector when OUTPUT_INSUFFICIENT_SIZE";
-            errorStatus = ErrorStatus::GENERAL_FAILURE;
-            outputShapes = {};
-            timing = kNoTiming;
-        }
-    } else if (errorStatus != ErrorStatus::NONE) {
-        // outputShapes must be empty if errorStatus is neither NONE nor OUTPUT_INSUFFICIENT_SIZE.
-        if (outputShapes.size() != 0) {
-            LOG(ERROR) << "Notified with non-empty output shape vector when error status is "
-                          "neither NONE nor OUTPUT_INSUFFICIENT_SIZE";
-            errorStatus = ErrorStatus::GENERAL_FAILURE;
-            outputShapes = {};
-            timing = kNoTiming;
+    if (!deadObject) {
+        if (errorStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
+            // outputShapes must not be empty if OUTPUT_INSUFFICIENT_SIZE.
+            if (outputShapes.size() == 0) {
+                LOG(ERROR)
+                        << "Notified with empty output shape vector when OUTPUT_INSUFFICIENT_SIZE";
+                errorStatus = ErrorStatus::GENERAL_FAILURE;
+                outputShapes = {};
+                timing = kNoTiming;
+            }
+        } else if (errorStatus != ErrorStatus::NONE) {
+            // outputShapes must be empty if errorStatus is neither NONE nor
+            // OUTPUT_INSUFFICIENT_SIZE.
+            if (outputShapes.size() != 0) {
+                LOG(ERROR) << "Notified with non-empty output shape vector when error status is "
+                              "neither NONE nor OUTPUT_INSUFFICIENT_SIZE";
+                errorStatus = ErrorStatus::GENERAL_FAILURE;
+                outputShapes = {};
+                timing = kNoTiming;
+            }
         }
     }
 
@@ -211,8 +235,9 @@
             return Void();
         }
 
+        mDeadObject = deadObject;
         mErrorStatus = errorStatus;
-        mOutputShapes = outputShapes;
+        mOutputShapes = std::move(outputShapes);
         mTiming = timing;
         mNotified = true;
 
diff --git a/runtime/Callbacks.h b/runtime/Callbacks.h
index 1c484e4..7537025 100644
--- a/runtime/Callbacks.h
+++ b/runtime/Callbacks.h
@@ -143,6 +143,11 @@
                                  const sp<hal::V1_3::IPreparedModel>& preparedModel) override;
 
     /**
+     * Mark the callback object as a dead object. This acts as a call to notify.
+     */
+    void notifyAsDeadObject();
+
+    /**
      * PreparedModelCallback::wait blocks until notify* has been called on the
      * callback object.
      */
@@ -178,13 +183,21 @@
      */
     sp<hal::V1_0::IPreparedModel> getPreparedModel() const;
 
+    /**
+     * Queries whether the object is dead.
+     *
+     * @return 'true' if dead, 'false' otherwise.
+     */
+    bool isDeadObject() const;
+
    private:
-    hal::Return<void> notifyInternal(hal::ErrorStatus errorStatus,
+    hal::Return<void> notifyInternal(bool deadObject, hal::ErrorStatus errorStatus,
                                      const sp<hal::V1_0::IPreparedModel>& preparedModel);
 
     mutable std::mutex mMutex;
     mutable std::condition_variable mCondition;
     bool mNotified GUARDED_BY(mMutex) = false;
+    bool mDeadObject = false;
     hal::ErrorStatus mErrorStatus = hal::ErrorStatus::GENERAL_FAILURE;
     sp<hal::V1_0::IPreparedModel> mPreparedModel;
 };
@@ -317,6 +330,11 @@
     }
 
     /**
+     * Mark the callback object as a dead object. This acts as a call to notify.
+     */
+    void notifyAsDeadObject();
+
+    /**
      * ExecutionCallback::wait blocks until notify* has been called on the
      * callback object.
      */
@@ -428,6 +446,13 @@
      */
     void setOnFinish(const ExecutionFinish& finish);
 
+    /**
+     * Queries whether the object is dead.
+     *
+     * @return 'true' if dead, 'false' otherwise.
+     */
+    bool isDeadObject() const;
+
    private:
     /*
      * ExecutionCallback::notifyInternal stores the results of the execution
@@ -436,8 +461,8 @@
      * before any call to wait or get* return. It then enables all prior and
      * future wait calls on the ExecutionCallback object to proceed.
      */
-    hal::Return<void> notifyInternal(hal::ErrorStatus errorStatus,
-                                     hal::hidl_vec<hal::OutputShape> outputShapes,
+    hal::Return<void> notifyInternal(bool deadObject, hal::ErrorStatus errorStatus,
+                                     std::vector<hal::OutputShape> outputShapes,
                                      hal::Timing timing);
 
     // members
@@ -446,6 +471,7 @@
     mutable std::thread mThread GUARDED_BY(mMutex);
     ExecutionFinish mOnFinish GUARDED_BY(mMutex);
     bool mNotified GUARDED_BY(mMutex) = false;
+    bool mDeadObject = false;
     hal::ErrorStatus mErrorStatus = hal::ErrorStatus::GENERAL_FAILURE;
     std::vector<hal::OutputShape> mOutputShapes;
     hal::Timing mTiming = {};
diff --git a/runtime/CompilationBuilder.cpp b/runtime/CompilationBuilder.cpp
index 87923ff..cecad58 100644
--- a/runtime/CompilationBuilder.cpp
+++ b/runtime/CompilationBuilder.cpp
@@ -19,10 +19,12 @@
 #include "CompilationBuilder.h"
 
 #include <algorithm>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
+
 #include "BurstBuilder.h"
 #include "ExecutionBuilder.h"
 #include "ExecutionBurstController.h"
@@ -34,6 +36,8 @@
 namespace android {
 namespace nn {
 
+using namespace hal;
+
 CompilationBuilder::CompilationBuilder(const ModelBuilder* model,
                                        const std::vector<std::shared_ptr<Device>>& devices,
                                        bool explicitDeviceList)
@@ -52,12 +56,15 @@
     }
     // TODO validate the rest
 
+    const auto [n, timeout] = makeTimePoint(mTimeoutDuration);
+    NN_RETURN_IF_ERROR(n);
+
     mFinished = true;
     if (mIsCacheInfoProvided) {
         mPlan.setCaching(&mCacheDir, mToken);
     }
     if (mPartitioning) {
-        int n = mModel->partitionTheWork(mDevices, mPreference, &mPlan);
+        int n = mModel->partitionTheWork(mDevices, mPreference, mPriority, timeout, &mPlan);
         switch (n) {
             case ANEURALNETWORKS_NO_ERROR:
                 return n;
@@ -90,7 +97,7 @@
     VLOG(COMPILATION) << "CompilationBuilder::finish with CPU fallback";
     mPlan.reset();
     mPlan.becomeSingleStep(DeviceManager::getCpuDevice(), mModel);
-    return mPlan.finish(mModel, mPreference);
+    return mPlan.finish(mModel, mPreference, mPriority, timeout);
 }
 
 int CompilationBuilder::setPreference(int32_t preference) {
@@ -124,6 +131,46 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+int CompilationBuilder::setPriority(int32_t priority) {
+    if (mFinished) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setPriority can't modify after compilation "
+                      "finished";
+        return ANEURALNETWORKS_BAD_STATE;
+    }
+    if (priority != ANEURALNETWORKS_PRIORITY_LOW && priority != ANEURALNETWORKS_PRIORITY_MEDIUM &&
+        priority != ANEURALNETWORKS_PRIORITY_HIGH) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setPriority invalid priority " << priority;
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+
+    mPriority = priority;
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
+int CompilationBuilder::setTimeoutDuration(uint64_t duration) {
+    if (mFinished) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout can't modify after compilation "
+                      "finished";
+        return ANEURALNETWORKS_BAD_STATE;
+    }
+    if (!mExplicitDeviceList || (mDevices.size() != 1)) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout called on an "
+                      "ANeuralNetworksCompilation that was not created by "
+                      "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    const auto& device = mDevices.front();
+    const bool supportsCompilationDeadline = device->supportsDeadlines().first;
+    if (!supportsCompilationDeadline) {
+        LOG(ERROR)
+                << "ANeuralNetworksCompilation_setTimeout called on device that does not support "
+                   "compilation timeouts.";
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    mTimeoutDuration = duration;
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
 int CompilationBuilder::setPartitioning(uint32_t partitioning) {
     if (mFinished) {
         LOG(ERROR) << "ANeuralNetworksCompilation_setPartitioning can't modify after compilation "
diff --git a/runtime/CompilationBuilder.h b/runtime/CompilationBuilder.h
index e7fc077..66ef5b9 100644
--- a/runtime/CompilationBuilder.h
+++ b/runtime/CompilationBuilder.h
@@ -17,7 +17,9 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_COMPILATION_BUILDER_H
 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_COMPILATION_BUILDER_H
 
+#include <chrono>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -49,6 +51,10 @@
 
     int setCaching(const std::string& cacheDir, const uint8_t* token);
 
+    int setPriority(int32_t priority);
+
+    int setTimeoutDuration(uint64_t duration);
+
     int finish();
 
     int createExecution(ExecutionBuilder** execution);
@@ -92,6 +98,12 @@
     std::string mCacheDir;
     uint8_t mToken[ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN];
     bool mIsCacheInfoProvided = false;
+
+    // Compilation priority information.
+    int32_t mPriority = ANEURALNETWORKS_PRIORITY_DEFAULT;
+
+    // Amount of time to complete or abort the execution.
+    std::optional<uint64_t> mTimeoutDuration;
 };
 
 }  // namespace nn
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index 174faa0..2f52b1e 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -19,6 +19,7 @@
 #include "ExecutionBuilder.h"
 
 #include <algorithm>
+#include <limits>
 #include <memory>
 #include <mutex>
 #include <optional>
@@ -267,6 +268,32 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+int ExecutionBuilder::setTimeoutDuration(uint64_t duration) {
+    if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on an ANeuralNetworksExecution "
+                      "created from an ANeuralNetworksCompilation that was not created by "
+                      "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    const auto& device = mCompilation->mDevices.front();
+    const bool supportsExecutionDeadline = device->supportsDeadlines().second;
+    if (!supportsExecutionDeadline) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on device that does not support "
+                      "execution timeouts.";
+        return ANEURALNETWORKS_BAD_DATA;
+    }
+    if (mStarted) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called after the execution has started.";
+        return ANEURALNETWORKS_BAD_STATE;
+    }
+    mTimeoutDuration = duration;
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
+std::optional<uint64_t> ExecutionBuilder::getTimeoutDuration() const {
+    return mTimeoutDuration;
+}
+
 int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
     if (!mFinished) {
         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
@@ -740,8 +767,12 @@
     }
 
     const MeasureTiming measure = measureTiming(mExecutionBuilder);
-    const auto [n, outputShapes, timing] =
-            mPreparedModel->execute(mInputs, mOutputs, mMemories, burstController, measure);
+    const auto [timePointN, deadline] = makeTimePoint(mExecutionBuilder->getTimeoutDuration());
+    if (timePointN != ANEURALNETWORKS_NO_ERROR) {
+        return {timePointN, {}, kNoTiming};
+    }
+    const auto [n, outputShapes, timing] = mPreparedModel->execute(
+            mInputs, mOutputs, mMemories, burstController, measure, deadline);
     mExecutionBuilder->reportTiming(timing);
 
     return {n, std::move(outputShapes), timing};
@@ -754,11 +785,14 @@
     mDevice = DeviceManager::getCpuDevice();
     mPreparedModel = nullptr;
     const ModelFactory makeModel = [this] { return mModel->makeHidlModel(); };
-    // TODO: Propagate user preference to this point instead of using default value of
-    // ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER.
+    // TODO: Propagate user preference and compilation priority to this point instead of using
+    // default values of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER and
+    // ANEURALNETWORKS_PRIORITY_MEDIUM
     const ExecutionPreference preference =
             static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
-    const auto [n, preparedModel] = mDevice->prepareModel(makeModel, preference, {}, {});
+    const Priority priority = convertToHalPriority(ANEURALNETWORKS_PRIORITY_DEFAULT);
+    const auto [n, preparedModel] =
+            mDevice->prepareModel(makeModel, preference, priority, {}, {}, {});
     mPreparedModel = preparedModel;
     if (n != ANEURALNETWORKS_NO_ERROR) {
         return {n, {}, kNoTiming};
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 65f08d2..9b30808 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -62,6 +62,10 @@
 
     int getDuration(int32_t durationCode, uint64_t* duration) const;
 
+    int setTimeoutDuration(uint64_t duration);
+
+    std::optional<uint64_t> getTimeoutDuration() const;
+
     int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
         CHECK(synchronizationCallback != nullptr);
         return compute(synchronizationCallback);
@@ -132,6 +136,9 @@
     // Timing reported from the driver
     hal::Timing mTiming = {};
 
+    // Amount of time to complete or abort the execution.
+    std::optional<uint64_t> mTimeoutDuration;
+
     // Properties cannot be set once the execution has started.
     std::atomic_bool mStarted = false;
 
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index e753793..395357d 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -65,8 +65,8 @@
 // operation indices to be executed (COMPOUND body). The token will be re-hashed further by the
 // device name, device version string, and the execution preference in this function.
 int compile(const Device& device, const ModelBuilder& model, int executionPreference,
-            const std::string& cacheDir, TokenHasher* token,
-            std::shared_ptr<PreparedModel>* preparedModel) {
+            int compilationPriority, const OptionalTimePoint& deadline, const std::string& cacheDir,
+            TokenHasher* token, std::shared_ptr<PreparedModel>* preparedModel) {
     CHECK(token != nullptr);
     CHECK(preparedModel != nullptr);
     *preparedModel = nullptr;
@@ -81,8 +81,9 @@
 
     const ModelFactory makeModel = [&model] { return model.makeHidlModel(); };
     const ExecutionPreference preference = static_cast<ExecutionPreference>(executionPreference);
+    const Priority priority = convertToHalPriority(compilationPriority);
     const auto [n, returnedPreparedModel] =
-            device.prepareModel(makeModel, preference, cacheDir, cacheToken);
+            device.prepareModel(makeModel, preference, priority, deadline, cacheDir, cacheToken);
     *preparedModel = returnedPreparedModel;
     return n;
 }
@@ -423,7 +424,7 @@
 }
 
 int ExecutionStep::finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize,
-                                   int32_t executionPreference) {
+                                   int32_t executionPreference, int32_t priority) {
     CHECK(mDevice != nullptr);
 
     for (const auto& stepModelOutput : mTempsAsStepModelOutputs) {
@@ -511,8 +512,8 @@
 
     // TODO: Move compilation elsewhere?
     VLOG(COMPILATION) << "ExecutionStep::finishStepModel, compilation on " << mDevice->getName();
-    return compile(*mDevice, mStepModel, executionPreference, *mPlan->getCacheDir(), &mToken,
-                   &mPreparedStepModel);
+    return compile(*mDevice, mStepModel, executionPreference, priority, {}, *mPlan->getCacheDir(),
+                   &mToken, &mPreparedStepModel);
 }
 
 void ExecutionStep::dump() const {
@@ -522,12 +523,13 @@
     }
 }
 
-int ExecutionPlan::CompoundBody::finish(const ModelBuilder* mainModel,
-                                        int32_t executionPreference) {
+int ExecutionPlan::CompoundBody::finish(const ModelBuilder* mainModel, int32_t executionPreference,
+                                        int32_t priority, const OptionalTimePoint& deadline) {
+    CHECK(deadline.getDiscriminator() == OptionalTimePoint::hidl_discriminator::none);
     findTempsAsStepModelOutputs();
     for (const auto& step : mSteps) {
         int n = step->finishStepModel(mainModel, &mHasStepModelOutputOfUnknownSize,
-                                      executionPreference);
+                                      executionPreference, priority);
         if (n != ANEURALNETWORKS_NO_ERROR) {
             VLOG(COMPILATION) << "ExecutionPlan::CompoundBody::finish -- finishStepModel failed";
             return n;
@@ -550,18 +552,20 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
-int ExecutionPlan::SimpleBody::finish(const ModelBuilder*, int32_t executionPreference) {
+int ExecutionPlan::SimpleBody::finish(const ModelBuilder*, int32_t executionPreference,
+                                      int32_t priority, const OptionalTimePoint& deadline) {
     CHECK(mDevice != nullptr);
     VLOG(COMPILATION) << "ExecutionPlan::SimpleBody::finish, compilation";
-    const int n =
-            compile(*mDevice, *mModel, executionPreference, *mCacheDir, &mToken, &mPreparedModel);
+    const int n = compile(*mDevice, *mModel, executionPreference, priority, deadline, *mCacheDir,
+                          &mToken, &mPreparedModel);
     mSuccessfulFinish = (n == ANEURALNETWORKS_NO_ERROR);
     return n;
 }
 
-int ExecutionPlan::finish(const ModelBuilder* mainModel, int32_t executionPreference) {
+int ExecutionPlan::finish(const ModelBuilder* mainModel, int32_t executionPreference,
+                          int32_t priority, const OptionalTimePoint& deadline) {
     CHECK(mBody != nullptr);
-    return mBody->finish(mainModel, executionPreference);
+    return mBody->finish(mainModel, executionPreference, priority, deadline);
 }
 
 ExecutionPlan::Controller::Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
@@ -908,7 +912,8 @@
 }
 
 int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
-                                   uint32_t preference, ExecutionPlan* plan) const {
+                                   uint32_t preference, uint32_t priority,
+                                   const OptionalTimePoint& deadline, ExecutionPlan* plan) const {
     // This function uses a heuristic approach to partitioning the graph.
     // It should be good enough for the first release.
 
@@ -931,7 +936,7 @@
         VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: only one best device: "
                           << bestDeviceIndex << " = " << devices[bestDeviceIndex]->getName();
         plan->becomeSingleStep(devices[bestDeviceIndex], this);
-        return plan->finish(this, preference);
+        return plan->finish(this, preference, priority, deadline);
     }
 
     // No easy solution, we need to split the work.
@@ -986,7 +991,7 @@
         }
     }
 
-    int n = plan->finish(this, preference);
+    int n = plan->finish(this, preference, priority, deadline);
     if (VLOG_IS_ON(COMPILATION)) {
         VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: source model: ";
         logModelToInfo(makeHidlModel());
diff --git a/runtime/ExecutionPlan.h b/runtime/ExecutionPlan.h
index bb87266..f2bb454 100644
--- a/runtime/ExecutionPlan.h
+++ b/runtime/ExecutionPlan.h
@@ -105,7 +105,7 @@
     // *hasOutputOfUnknownSize to true; otherwise, leaves it
     // unchanged.
     int finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize,
-                        int32_t executionPreference);
+                        int32_t executionPreference, int32_t priority);
 
     const ModelBuilder* getStepModel() const { return &mStepModel; }
     std::shared_ptr<Device> getDevice() const { return mDevice; }
@@ -264,7 +264,8 @@
 
     void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
 
-    int finish(const ModelBuilder* mainModel, int32_t executionPreference);
+    int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+               const hal::OptionalTimePoint& deadline);
 
     void recordTemporaryDef(uint32_t sourceOperandIndex, uint32_t stepIndex);
 
@@ -313,7 +314,8 @@
     struct Body {
         virtual ~Body() {}
         virtual void dump() const = 0;
-        virtual int finish(const ModelBuilder* mainModel, int32_t executionPreference) = 0;
+        virtual int finish(const ModelBuilder* mainModel, int32_t executionPreference,
+                           int32_t priority, const hal::OptionalTimePoint& deadline) = 0;
         virtual bool hasStepModelOutputsOfUnknownSize() const = 0;
         virtual void forEachStepRoleOfInput(uint32_t index,
                                             const StepRoleCallback& callback) const = 0;
@@ -328,7 +330,8 @@
             : mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}
 
         void dump() const override;
-        int finish(const ModelBuilder* mainModel, int32_t executionPreference) override;
+        int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+                   const hal::OptionalTimePoint& deadline) override;
         bool hasStepModelOutputsOfUnknownSize() const override { return false; }
         void forEachStepRoleOfInput(uint32_t index,
                                     const StepRoleCallback& callback) const override;
@@ -345,7 +348,8 @@
 
     struct CompoundBody : Body {
         void dump() const override;
-        int finish(const ModelBuilder* mainModel, int32_t executionPreference) override;
+        int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+                   const hal::OptionalTimePoint& deadline) override;
         bool hasStepModelOutputsOfUnknownSize() const override {
             return mHasStepModelOutputOfUnknownSize;
         }
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index 11765ff..ff405ec 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -85,10 +85,14 @@
                 kInterface->getNumberOfCacheFilesNeeded();
         return numModelCacheFiles > 0 || numDataCacheFiles > 0;
     }
+    std::pair<bool, bool> supportsDeadlines() const override {
+        return kInterface->supportsDeadlines();
+    }
+    int wait() const override { return kInterface->wait(); }
 
     std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
-            const ModelFactory& makeModel, ExecutionPreference preference,
-            const std::string& cacheDir,
+            const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+            const OptionalTimePoint& deadline, const std::string& cacheDir,
             const std::optional<CacheToken>& maybeToken) const override;
 
     std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor& desc) const override;
@@ -121,8 +125,8 @@
     std::tuple<int, std::vector<OutputShape>, Timing> execute(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
-            const std::shared_ptr<ExecutionBurstController>& burstController,
-            MeasureTiming measure) const override;
+            const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+            const OptionalTimePoint& deadline) const override;
 
     std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
             bool preferPowerOverLatency) const override {
@@ -217,10 +221,11 @@
 }
 
 std::pair<int, std::shared_ptr<PreparedModel>> DriverDevice::prepareModel(
-        const ModelFactory& makeModel, ExecutionPreference preference, const std::string& cacheDir,
+        const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+        const OptionalTimePoint& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
-    const auto [n, preparedModel] =
-            kInterface->prepareModel(makeModel, preference, cacheDir, maybeToken);
+    const auto [n, preparedModel] = kInterface->prepareModel(makeModel, preference, priority,
+                                                             deadline, cacheDir, maybeToken);
     if (n != ANEURALNETWORKS_NO_ERROR) {
         return {n, nullptr};
     }
@@ -297,8 +302,8 @@
 std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
         const MemoryTracker& memories,
-        const std::shared_ptr<ExecutionBurstController>& burstController,
-        MeasureTiming measure) const {
+        const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+        const OptionalTimePoint& deadline) const {
     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
 
     // Make a copy of the memory tracker as we will append memory pools for pointer arguments.
@@ -375,7 +380,7 @@
     if (!burstCompute || burstFallback) {
         const bool preferSynchronous = DeviceManager::get()->syncExecHal();
         std::tie(n, outputShapes, timing) =
-                mPreparedModel->execute(request, measure, preferSynchronous);
+                mPreparedModel->execute(request, measure, deadline, preferSynchronous);
     }
 
     if (n != ANEURALNETWORKS_NO_ERROR) {
@@ -426,10 +431,14 @@
         return kPerformance;
     }
     bool isCachingSupported() const override { return false; }
+    std::pair<bool, bool> supportsDeadlines() const override {
+        return {/*prepareModelDeadline=*/false, /*executionDeadline=*/false};
+    }
+    int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
 
     std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
-            const ModelFactory& makeModel, ExecutionPreference preference,
-            const std::string& cacheDir,
+            const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+            const OptionalTimePoint& deadline, const std::string& cacheDir,
             const std::optional<CacheToken>& maybeToken) const override;
 
     std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor&) const override {
@@ -463,8 +472,8 @@
     std::tuple<int, std::vector<OutputShape>, Timing> execute(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
-            const std::shared_ptr<ExecutionBurstController>& burstController,
-            MeasureTiming measure) const override;
+            const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+            const OptionalTimePoint& deadline) const override;
 
     std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
             bool /*preferPowerOverLatency*/) const override {
@@ -496,15 +505,20 @@
 }
 
 std::pair<int, std::shared_ptr<PreparedModel>> CpuDevice::prepareModel(
-        const ModelFactory& makeModel, ExecutionPreference preference,
-        const std::string& /*cacheDir*/, const std::optional<CacheToken>& maybeToken) const {
+        const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+        const OptionalTimePoint& deadline, const std::string& /*cacheDir*/,
+        const std::optional<CacheToken>& maybeToken) const {
     CHECK(!maybeToken.has_value())
             << "Should never call prepareModel with cache information on CpuDevice";
 
     const Model model = makeModel();
-    if (!validateModel(model) || !validateExecutionPreference(preference)) {
+    if (!validateModel(model) || !validateExecutionPreference(preference) ||
+        !validatePriority(priority)) {
         return {ANEURALNETWORKS_OP_FAILED, nullptr};
     }
+    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+        return {ANEURALNETWORKS_BAD_DATA, nullptr};
+    }
 
     return CpuPreparedModel::create(model);
 }
@@ -542,7 +556,11 @@
         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
         const MemoryTracker& memories,
         const std::shared_ptr<ExecutionBurstController>& /*burstController*/,
-        MeasureTiming /*measure*/) const {
+        MeasureTiming /*measure*/, const OptionalTimePoint& deadline) const {
+    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+        return {ANEURALNETWORKS_BAD_DATA, {}, kNoTiming};
+    }
+
     std::vector<RunTimePoolInfo> requestPoolInfos;
     requestPoolInfos.reserve(memories.size());
     for (const Memory* mem : memories) {
diff --git a/runtime/Manager.h b/runtime/Manager.h
index 3cd085c..68ca105 100644
--- a/runtime/Manager.h
+++ b/runtime/Manager.h
@@ -58,7 +58,7 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::shared_ptr<ExecutionBurstController>& burstController,
-            hal::MeasureTiming measure) const = 0;
+            hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline) const = 0;
 
     virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
             bool preferPowerOverLatency) const = 0;
@@ -86,9 +86,12 @@
     virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
     virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
     virtual bool isCachingSupported() const = 0;
+    virtual std::pair<bool, bool> supportsDeadlines() const = 0;
+    virtual int wait() const = 0;
 
     virtual std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
             const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
+            hal::Priority priority, const hal::OptionalTimePoint& deadline,
             const std::string& cacheDir,
             const std::optional<hal::CacheToken>& maybeToken) const = 0;
 
diff --git a/runtime/ModelBuilder.h b/runtime/ModelBuilder.h
index 7cfb685..c3ac96d 100644
--- a/runtime/ModelBuilder.h
+++ b/runtime/ModelBuilder.h
@@ -105,6 +105,7 @@
     }
 
     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference,
+                         uint32_t priority, const hal::OptionalTimePoint& deadline,
                          ExecutionPlan* plan) const;
 
    private:
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 8913c0e..52f50a7 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -655,18 +655,30 @@
 }
 
 bool ANeuralNetworksDevice_supportsCompilationTimeout(const ANeuralNetworksDevice* device) {
-    (void)device;
-    return false;
+    if (device == nullptr) {
+        LOG(ERROR) << "ANeuralNetworksDevice_supportsCompilationTimeout passed a nullptr";
+        return false;
+    }
+    const Device* d = reinterpret_cast<const Device*>(device);
+    return d->supportsDeadlines().first;
 }
 
 bool ANeuralNetworksDevice_supportsExecutionTimeout(const ANeuralNetworksDevice* device) {
-    (void)device;
-    return false;
+    if (device == nullptr) {
+        LOG(ERROR) << "ANeuralNetworksDevice_supportsExecutionTimeout passed a nullptr";
+        return false;
+    }
+    const Device* d = reinterpret_cast<const Device*>(device);
+    return d->supportsDeadlines().second;
 }
 
 int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) {
-    (void)device;
-    return ANEURALNETWORKS_OP_FAILED;
+    if (device == nullptr) {
+        LOG(ERROR) << "ANeuralNetworksDevice_wait passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+    const Device* d = reinterpret_cast<const Device*>(device);
+    return d->wait();
 }
 
 int ANeuralNetworksModel_getSupportedOperationsForDevices(
@@ -1175,17 +1187,23 @@
 
 int ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation* compilation, int priority) {
     NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_setPriority");
-    (void)compilation;
-    (void)priority;
-    return ANEURALNETWORKS_OP_FAILED;
+    if (!compilation) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setPriority passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+    CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(compilation);
+    return c->setPriority(priority);
 }
 
 int ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation* compilation,
                                           uint64_t duration) {
     NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_setTimeout");
-    (void)compilation;
-    (void)duration;
-    return ANEURALNETWORKS_OP_FAILED;
+    if (!compilation) {
+        LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+    CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(compilation);
+    return c->setTimeoutDuration(duration);
 }
 
 int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
@@ -1321,9 +1339,13 @@
 
 int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration) {
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_setTimeout");
-    (void)execution;
-    (void)duration;
-    return ANEURALNETWORKS_OP_FAILED;
+    if (!execution) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setTimeout passed a nullptr";
+        return ANEURALNETWORKS_UNEXPECTED_NULL;
+    }
+
+    ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
+    return r->setTimeoutDuration(duration);
 }
 
 int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
diff --git a/runtime/VersionedInterfaces.cpp b/runtime/VersionedInterfaces.cpp
index c4c6e85..565ad6e 100644
--- a/runtime/VersionedInterfaces.cpp
+++ b/runtime/VersionedInterfaces.cpp
@@ -105,56 +105,49 @@
 
 const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
 
-void sendFailureMessage(const sp<IPreparedModelCallback>& cb) {
+void sendFailureMessage(IPreparedModelCallback* cb) {
+    CHECK(cb != nullptr);
     cb->notify_1_3(ErrorStatus::GENERAL_FAILURE, nullptr);
 }
 
-void sendFailureMessage(const sp<PreparedModelCallback>& cb) {
-    sendFailureMessage(static_cast<sp<IPreparedModelCallback>>(cb));
-}
-
-void sendFailureMessage(const sp<IExecutionCallback>& cb) {
-    cb->notify_1_3(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
-}
-
 // This class is thread safe
-template <typename ICallback>
+template <typename Callback>
 class DeathHandler : public hidl_death_recipient {
    public:
     void serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0::IBase>& /*who*/) override {
         LOG(ERROR) << "DeathHandler::serviceDied -- service unexpectedly died!";
         std::lock_guard<std::mutex> hold(mMutex);
         std::for_each(mCallbacks.begin(), mCallbacks.end(),
-                      [](const auto& cb) { sendFailureMessage(cb); });
+                      [](const auto& cb) { cb->notifyAsDeadObject(); });
     }
 
     [[nodiscard]] base::ScopeGuard<std::function<void()>> protectCallback(
-            const sp<ICallback>& callback) {
+            const sp<Callback>& callback) {
         registerCallback(callback);
         return ::android::base::make_scope_guard(
                 [this, callback] { unregisterCallback(callback); });
     }
 
    private:
-    void registerCallback(const sp<ICallback>& callback) {
+    void registerCallback(const sp<Callback>& callback) {
         std::lock_guard<std::mutex> hold(mMutex);
         mCallbacks.push_back(callback);
     }
 
-    void unregisterCallback(const sp<ICallback>& callback) {
+    void unregisterCallback(const sp<Callback>& callback) {
         std::lock_guard<std::mutex> hold(mMutex);
         mCallbacks.erase(std::remove(mCallbacks.begin(), mCallbacks.end(), callback),
                          mCallbacks.end());
     }
 
     std::mutex mMutex;
-    std::vector<sp<ICallback>> mCallbacks GUARDED_BY(mMutex);
+    std::vector<sp<Callback>> mCallbacks GUARDED_BY(mMutex);
 };
 
 }  // anonymous namespace
 
-class IDeviceDeathHandler : public DeathHandler<IPreparedModelCallback> {};
-class IPreparedModelDeathHandler : public DeathHandler<IExecutionCallback> {};
+class IDeviceDeathHandler : public DeathHandler<PreparedModelCallback> {};
+class IPreparedModelDeathHandler : public DeathHandler<ExecutionCallback> {};
 
 static std::pair<int, std::shared_ptr<VersionedIPreparedModel>> makeVersionedIPreparedModel(
         sp<V1_0::IPreparedModel> preparedModel) {
@@ -169,7 +162,18 @@
     // asynchronous calls are susceptible to hangs if the service crashes before
     // providing the response.
     const Return<bool> ret = preparedModel->linkToDeath(deathHandler, 0);
-    if (!ret.isOk() || ret != true) {
+    if (ret.isDeadObject()) {
+        LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
+                      "IPreparedModel object because the IPreparedModel object is dead.";
+        return {ANEURALNETWORKS_DEAD_OBJECT, nullptr};
+    }
+    if (!ret.isOk()) {
+        LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
+                      "IPreparedModel object because of failure: "
+                   << ret.description();
+        return {ANEURALNETWORKS_OP_FAILED, nullptr};
+    }
+    if (ret != true) {
         LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
                       "IPreparedModel object.";
         return {ANEURALNETWORKS_OP_FAILED, nullptr};
@@ -196,11 +200,17 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeAsynchronously(
-        const Request& request, MeasureTiming measure) const {
+        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline) const {
+    const auto failDeadObject = []() -> std::tuple<int, std::vector<OutputShape>, Timing> {
+        return {ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
+    };
     const auto failWithStatus = [](ErrorStatus status) {
         return getExecutionResult(status, {}, kNoTiming);
     };
-    const auto getResults = [](const ExecutionCallback& cb) {
+    const auto getResults = [failDeadObject](const ExecutionCallback& cb) {
+        if (cb.isDeadObject()) {
+            return failDeadObject();
+        }
         return getExecutionResult(cb.getStatus(), cb.getOutputShapes(), cb.getTiming());
     };
 
@@ -209,7 +219,12 @@
 
     // version 1.3+ HAL
     if (mPreparedModelV1_3 != nullptr) {
-        Return<ErrorStatus> ret = mPreparedModelV1_3->execute_1_3(request, measure, {}, callback);
+        Return<ErrorStatus> ret =
+                mPreparedModelV1_3->execute_1_3(request, measure, deadline, callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "execute_1_3 failure: " << ret.description();
+            return failDeadObject();
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "execute_1_3 failure: " << ret.description();
             return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -233,6 +248,10 @@
     if (mPreparedModelV1_2 != nullptr) {
         Return<V1_0::ErrorStatus> ret =
                 mPreparedModelV1_2->execute_1_2(request10, measure, callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "execute_1_2 failure: " << ret.description();
+            return failDeadObject();
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "execute_1_2 failure: " << ret.description();
             return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -249,6 +268,10 @@
     // version 1.0 HAL
     if (mPreparedModelV1_0 != nullptr) {
         Return<V1_0::ErrorStatus> ret = mPreparedModelV1_0->execute(request10, callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "execute failure: " << ret.description();
+            return failDeadObject();
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "execute failure: " << ret.description();
             return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -268,18 +291,24 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeSynchronously(
-        const Request& request, MeasureTiming measure) const {
+        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline) const {
+    const std::tuple<int, std::vector<OutputShape>, Timing> kDeadObject = {
+            ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
     const auto kFailure = getExecutionResult(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
 
     // version 1.3+ HAL
     if (mPreparedModelV1_3 != nullptr) {
         std::tuple<int, std::vector<OutputShape>, Timing> result;
         Return<void> ret = mPreparedModelV1_3->executeSynchronously_1_3(
-                request, measure, {},
+                request, measure, deadline,
                 [&result](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
                           const Timing& timing) {
                     result = getExecutionResult(error, outputShapes, timing);
                 });
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "executeSynchronously_1_3 failure: " << ret.description();
+            return kDeadObject;
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "executeSynchronously_1_3 failure: " << ret.description();
             return kFailure;
@@ -303,6 +332,10 @@
                           const Timing& timing) {
                     result = getExecutionResult(convertToV1_3(error), outputShapes, timing);
                 });
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "executeSynchronously failure: " << ret.description();
+            return kDeadObject;
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "executeSynchronously failure: " << ret.description();
             return kFailure;
@@ -311,18 +344,19 @@
     }
 
     // Fallback to asynchronous execution.
-    return executeAsynchronously(request, measure);
+    return executeAsynchronously(request, measure, deadline);
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execute(
-        const Request& request, MeasureTiming measure, bool preferSynchronous) const {
+        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline,
+        bool preferSynchronous) const {
     if (preferSynchronous) {
         VLOG(EXECUTION) << "Before executeSynchronously() " << SHOW_IF_DEBUG(toString(request));
-        return executeSynchronously(request, measure);
+        return executeSynchronously(request, measure, deadline);
     }
 
     VLOG(EXECUTION) << "Before executeAsynchronously() " << SHOW_IF_DEBUG(toString(request));
-    return executeAsynchronously(request, measure);
+    return executeAsynchronously(request, measure, deadline);
 }
 
 // This is the amount of time the ExecutionBurstController should spend polling
@@ -505,12 +539,34 @@
     return {ErrorStatus::NONE, 0, 0};
 }
 
+static std::tuple<ErrorStatus, bool, bool> supportsDeadlinesFunction(V1_3::IDevice* device) {
+    CHECK(device != nullptr);
+    constexpr std::tuple<ErrorStatus, bool, bool> kFailure = {ErrorStatus::GENERAL_FAILURE, false,
+                                                              false};
+    std::tuple<ErrorStatus, bool, bool> result = kFailure;
+    const Return<void> ret =
+            device->supportsDeadlines([&result](bool prepareModelDeadline, bool executionDeadline) {
+                result = {ErrorStatus::NONE, prepareModelDeadline, executionDeadline};
+            });
+    if (!ret.isOk()) {
+        LOG(ERROR) << "supportsDeadlines failure: " << ret.description();
+        return kFailure;
+    }
+    return result;
+}
+
+static std::tuple<ErrorStatus, bool, bool> supportsDeadlinesFunction(V1_0::IDevice* device) {
+    CHECK(device != nullptr);
+    return {ErrorStatus::NONE, /*prepareModelDeadline=*/false, /*executionDeadline=*/false};
+}
+
 struct InitialData {
     hal::Capabilities capabilities;
     hal::hidl_vec<hal::Extension> supportedExtensions;
     int32_t type;
     hal::hidl_string versionString;
     std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded;
+    std::pair<bool, bool> supportsDeadlines;
 };
 
 template <typename Device>
@@ -565,12 +621,21 @@
         return std::nullopt;
     }
 
+    const auto [supportsDeadlinesStatus, prepareModelDeadline, executionDeadline] =
+            supportsDeadlinesFunction(device);
+    if (supportsDeadlinesStatus != ErrorStatus::NONE) {
+        LOG(ERROR) << "IDevice::supportsDeadlines returned the error "
+                   << toString(supportsDeadlinesStatus);
+        return std::nullopt;
+    }
+
     return InitialData{
             /*.capabilities=*/std::move(capabilities),
             /*.supportedExtensions=*/std::move(supportedExtensions),
             /*.type=*/type,
             /*.versionString=*/std::move(versionString),
             /*.numberOfCacheFilesNeeded=*/{numModelCacheFiles, numDataCacheFiles},
+            /*.supportsDeadlines=*/{prepareModelDeadline, executionDeadline},
     };
 }
 
@@ -617,23 +682,26 @@
         return nullptr;
     }
 
-    auto [capabilities, supportedExtensions, type, versionString, numberOfCacheFilesNeeded] =
-            std::move(*initialData);
+    auto [capabilities, supportedExtensions, type, versionString, numberOfCacheFilesNeeded,
+          supportsDeadlines] = std::move(*initialData);
     return std::make_shared<VersionedIDevice>(
             std::move(capabilities), std::move(supportedExtensions), type, std::move(versionString),
-            numberOfCacheFilesNeeded, std::move(serviceName), std::move(core.value()));
+            numberOfCacheFilesNeeded, supportsDeadlines, std::move(serviceName),
+            std::move(core.value()));
 }
 
 VersionedIDevice::VersionedIDevice(hal::Capabilities capabilities,
                                    std::vector<hal::Extension> supportedExtensions, int32_t type,
                                    std::string versionString,
                                    std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded,
-                                   std::string serviceName, Core core)
+                                   std::pair<bool, bool> supportsDeadlines, std::string serviceName,
+                                   Core core)
     : kCapabilities(std::move(capabilities)),
       kSupportedExtensions(std::move(supportedExtensions)),
       kType(type),
       kVersionString(std::move(versionString)),
       kNumberOfCacheFilesNeeded(numberOfCacheFilesNeeded),
+      kSupportsDeadlines(supportsDeadlines),
       kServiceName(std::move(serviceName)),
       mCore(std::move(core)) {}
 
@@ -648,7 +716,13 @@
     // asynchronous calls are susceptible to hangs if the service crashes before
     // providing the response.
     const Return<bool> ret = device->linkToDeath(deathHandler, 0);
-    if (!ret.isOk() || ret != true) {
+    if (!ret.isOk()) {
+        LOG(ERROR) << "VersionedIDevice::Core::create failed to register a death recipient for the "
+                      "IDevice object because of failure: "
+                   << ret.description();
+        return {};
+    }
+    if (ret != true) {
         LOG(ERROR) << "VersionedIDevice::Core::create failed to register a death recipient for the "
                       "IDevice object.";
         return {};
@@ -733,7 +807,7 @@
         } else {
             LOG(ERROR) << context << " failure: " << ret.description();
         }
-        sendFailureMessage(callback);
+        sendFailureMessage(callback.get());
     }
     callback->wait();
     return ret;
@@ -797,6 +871,35 @@
     return ret;
 }
 
+int VersionedIDevice::wait() const {
+    std::unique_lock lock(mMutex);
+    // It's possible that another device has already done the recovery.
+    // It's harmless but wasteful for us to do so in this case.
+    auto pingReturn = mCore.getDevice<V1_0::IDevice>()->ping();
+    if (pingReturn.isDeadObject()) {
+        VLOG(DRIVER) << "VersionedIDevice::wait -- Recovering " << kServiceName;
+        sp<V1_0::IDevice> recoveredDevice = V1_0::IDevice::getService(kServiceName);
+        if (recoveredDevice == nullptr) {
+            LOG(ERROR) << "VersionedIDevice::wait got a null IDevice for " << kServiceName;
+            return ANEURALNETWORKS_OP_FAILED;
+        }
+
+        auto core = Core::create(std::move(recoveredDevice));
+        if (!core.has_value()) {
+            LOG(ERROR) << "VersionedIDevice::wait failed to create Core.";
+            return ANEURALNETWORKS_OP_FAILED;
+        }
+
+        mCore = std::move(core.value());
+    } else if (!pingReturn.isOk()) {
+        LOG(ERROR) << "VersionedIDevice::wait failed -- IDevice::ping returned "
+                   << pingReturn.description();
+        return ANEURALNETWORKS_OP_FAILED;
+    }
+
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
 const Capabilities& VersionedIDevice::getCapabilities() const {
     return kCapabilities;
 }
@@ -1028,6 +1131,11 @@
         const PreparedModelCallback& callback, const char* prepareName,
         const std::string& serviceName) {
     callback.wait();
+    if (callback.isDeadObject()) {
+        LOG(ERROR) << prepareName << " on " << serviceName
+                   << " failed because the PreparedModel object is dead";
+        return {ANEURALNETWORKS_DEAD_OBJECT, nullptr};
+    }
     const ErrorStatus status = callback.getStatus();
     const sp<V1_0::IPreparedModel> preparedModel = callback.getPreparedModel();
 
@@ -1045,10 +1153,13 @@
 }
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModelInternal(
-        const Model& model, ExecutionPreference preference, const std::string& cacheDir,
+        const Model& model, ExecutionPreference preference, Priority priority,
+        const OptionalTimePoint& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
     // Note that some work within VersionedIDevice will be subtracted from the IPC layer
     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
+    const std::pair<int, std::shared_ptr<VersionedIPreparedModel>> kDeadObject = {
+            ANEURALNETWORKS_DEAD_OBJECT, nullptr};
 
     // Get cache files if they exist, otherwise create them.
     hidl_vec<hidl_handle> modelCache, dataCache;
@@ -1069,12 +1180,16 @@
     if (getDevice<V1_3::IDevice>() != nullptr) {
         const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
                 __FUNCTION__,
-                [&model, &preference, &modelCache, &dataCache, &token,
+                [&model, preference, priority, &deadline, &modelCache, &dataCache, &token,
                  &callback](const sp<V1_3::IDevice>& device) {
-                    return device->prepareModel_1_3(model, preference, kDefaultPriority, {},
+                    return device->prepareModel_1_3(model, preference, priority, deadline,
                                                     modelCache, dataCache, token, callback);
                 },
                 callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "prepareModel_1_3 failure: " << ret.description();
+            return kDeadObject;
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "prepareModel_1_3 failure: " << ret.description();
             return prepareModelFailure();
@@ -1110,6 +1225,10 @@
                                                         token, callback);
                     },
                     callback);
+            if (ret.isDeadObject()) {
+                LOG(ERROR) << "prepareModel_1_2 failure: " << ret.description();
+                return kDeadObject;
+            }
             if (!ret.isOk()) {
                 LOG(ERROR) << "prepareModel_1_2 failure: " << ret.description();
                 return prepareModelFailure();
@@ -1148,6 +1267,10 @@
                         return device->prepareModel_1_1(model11, preference, callback);
                     },
                     callback);
+            if (ret.isDeadObject()) {
+                LOG(ERROR) << "prepareModel_1_1 failure: " << ret.description();
+                return kDeadObject;
+            }
             if (!ret.isOk()) {
                 LOG(ERROR) << "prepareModel_1_1 failure: " << ret.description();
                 return prepareModelFailure();
@@ -1186,6 +1309,10 @@
                         return device->prepareModel(model10, callback);
                     },
                     callback);
+            if (ret.isDeadObject()) {
+                LOG(ERROR) << "prepareModel failure: " << ret.description();
+                return kDeadObject;
+            }
             if (!ret.isOk()) {
                 LOG(ERROR) << "prepareModel failure: " << ret.description();
                 return prepareModelFailure();
@@ -1208,11 +1335,15 @@
 }
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>>
-VersionedIDevice::prepareModelFromCacheInternal(const std::string& cacheDir,
+VersionedIDevice::prepareModelFromCacheInternal(Priority priority,
+                                                const OptionalTimePoint& deadline,
+                                                const std::string& cacheDir,
                                                 const CacheToken& token) const {
     // Note that some work within VersionedIDevice will be subtracted from the IPC layer
     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModelFromCache");
     VLOG(COMPILATION) << "prepareModelFromCache";
+    const std::pair<int, std::shared_ptr<VersionedIPreparedModel>> kDeadObject = {
+            ANEURALNETWORKS_DEAD_OBJECT, nullptr};
 
     // Get cache files if they exist, otherwise return from the function early.
     hidl_vec<hidl_handle> modelCache, dataCache;
@@ -1226,11 +1357,16 @@
         const sp<PreparedModelCallback> callback = new PreparedModelCallback();
         const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
                 __FUNCTION__,
-                [&modelCache, &dataCache, &token, &callback](const sp<V1_3::IDevice>& device) {
-                    return device->prepareModelFromCache_1_3(kDefaultPriority, {}, modelCache,
+                [priority, &deadline, &modelCache, &dataCache, &token,
+                 &callback](const sp<V1_3::IDevice>& device) {
+                    return device->prepareModelFromCache_1_3(priority, deadline, modelCache,
                                                              dataCache, token, callback);
                 },
                 callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "prepareModelFromCache_1_3 failure: " << ret.description();
+            return kDeadObject;
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "prepareModelFromCache_1_3 failure: " << ret.description();
             return prepareModelFailure();
@@ -1252,6 +1388,10 @@
                     return device->prepareModelFromCache(modelCache, dataCache, token, callback);
                 },
                 callback);
+        if (ret.isDeadObject()) {
+            LOG(ERROR) << "prepareModelFromCache failure: " << ret.description();
+            return kDeadObject;
+        }
         if (!ret.isOk()) {
             LOG(ERROR) << "prepareModelFromCache failure: " << ret.description();
             return prepareModelFailure();
@@ -1276,11 +1416,13 @@
 }
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModel(
-        const ModelFactory& makeModel, ExecutionPreference preference, const std::string& cacheDir,
+        const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+        const OptionalTimePoint& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
     // Attempt to compile from cache if token is present.
     if (maybeToken.has_value()) {
-        const auto [n, preparedModel] = prepareModelFromCacheInternal(cacheDir, *maybeToken);
+        const auto [n, preparedModel] =
+                prepareModelFromCacheInternal(priority, deadline, cacheDir, *maybeToken);
         if (n == ANEURALNETWORKS_NO_ERROR) {
             return {n, preparedModel};
         }
@@ -1289,7 +1431,7 @@
     // Fallback to full compilation (possibly with token) if
     // prepareModelFromCache could not be used or failed.
     const Model model = makeModel();
-    return prepareModelInternal(model, preference, cacheDir, maybeToken);
+    return prepareModelInternal(model, preference, priority, deadline, cacheDir, maybeToken);
 }
 
 DeviceStatus VersionedIDevice::getStatus() const {
@@ -1339,6 +1481,10 @@
     return kNumberOfCacheFilesNeeded;
 }
 
+std::pair<bool, bool> VersionedIDevice::supportsDeadlines() const {
+    return kSupportsDeadlines;
+}
+
 const std::string& VersionedIDevice::getName() const {
     return kServiceName;
 }
diff --git a/runtime/VersionedInterfaces.h b/runtime/VersionedInterfaces.h
index b903fd8..5b72722 100644
--- a/runtime/VersionedInterfaces.h
+++ b/runtime/VersionedInterfaces.h
@@ -99,7 +99,7 @@
                      std::vector<hal::Extension> supportedExtensions, int32_t type,
                      std::string versionString,
                      std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded,
-                     std::string serviceName, Core core);
+                     std::pair<bool, bool> supportsDeadlines, std::string serviceName, Core core);
 
     /**
      * Gets the capabilities of a driver.
@@ -191,6 +191,10 @@
      *     execution.
      * @param preference Indicates the intended execution behavior of a prepared
      *     model.
+     * @param priority Priority of the prepared model relative to other prepared
+     *     models owned by an application.
+     * @param deadline Optional time point. If provided, prepareModel must
+     *     complete or be aborted by this time point.
      * @param cacheDir String specifying the cache directory.
      * @param maybeToken An optional caching token of length
      *     Constant::BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared model.
@@ -212,8 +216,9 @@
      *         that has been prepared for execution, else nullptr.
      */
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModel(
-            const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
-            const std::string& cacheDir, const std::optional<hal::CacheToken>& maybeToken) const;
+            const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, hal::Priority,
+            const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<hal::CacheToken>& maybeToken) const;
 
     /**
      * Returns the current status of a driver.
@@ -310,6 +315,15 @@
     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const;
 
     /**
+     * Returns which task deadlines are supported.
+     *
+     * @return A pair of:
+     *     - prepareModelDeadline is supported
+     *     - executionDeadline is supported
+     */
+    std::pair<bool, bool> supportsDeadlines() const;
+
+    /**
      * Returns the name of the service.
      *
      * @return Name of the service.
@@ -374,6 +388,14 @@
             const hal::hidl_vec<hal::BufferRole>& inputRoles,
             const hal::hidl_vec<hal::BufferRole>& outputRoles) const;
 
+    /**
+     * Blocks until the device is not in a bad state.
+     *
+     * @return Error code after waiting. ANEURALNETWORKS_NO_ERROR if device is
+     *     not in a bad state.
+     */
+    int wait() const;
+
    private:
     // Cached initialization results.
     const hal::Capabilities kCapabilities;
@@ -381,12 +403,15 @@
     const int32_t kType;
     const std::string kVersionString;
     const std::pair<uint32_t, uint32_t> kNumberOfCacheFilesNeeded;
+    const std::pair<bool, bool> kSupportsDeadlines;
 
     // internal methods to prepare a model
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelInternal(
-            const hal::Model& model, hal::ExecutionPreference preference,
-            const std::string& cacheDir, const std::optional<hal::CacheToken>& maybeToken) const;
+            const hal::Model& model, hal::ExecutionPreference preference, hal::Priority priority,
+            const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<hal::CacheToken>& maybeToken) const;
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelFromCacheInternal(
+            hal::Priority priority, const hal::OptionalTimePoint& deadline,
             const std::string& cacheDir, const hal::CacheToken& token) const;
 
     /**
@@ -618,6 +643,8 @@
      *     model is to be executed.
      * @param measure Specifies whether or not to measure duration of the
      *     execution.
+     * @param deadline Optional time point. If provided, prepareModel must
+     *     complete or be aborted by this time point.
      * @param preferSynchronous 'true' to perform synchronous HAL execution when
      *     possible, 'false' to force asynchronous HAL execution.
      * @return A tuple consisting of:
@@ -649,7 +676,8 @@
      *         indicating that measurement is not available.
      */
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute(
-            const hal::Request& request, hal::MeasureTiming measure, bool preferSynchronous) const;
+            const hal::Request& request, hal::MeasureTiming measure,
+            const hal::OptionalTimePoint& deadline, bool preferSynchronous) const;
 
     /**
      * Creates a burst controller on a prepared model.
@@ -669,9 +697,11 @@
     friend class VersionedIDevice;
 
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
-            const hal::Request& request, hal::MeasureTiming timing) const;
+            const hal::Request& request, hal::MeasureTiming timing,
+            const hal::OptionalTimePoint& deadline) const;
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeSynchronously(
-            const hal::Request& request, hal::MeasureTiming measure) const;
+            const hal::Request& request, hal::MeasureTiming measure,
+            const hal::OptionalTimePoint& deadline) const;
 
     /**
      * Returns sp<V1_3::IPreparedModel> that is a downcast of the sp<V1_0::IPreparedModel>
diff --git a/runtime/test/TestIntrospectionControl.cpp b/runtime/test/TestIntrospectionControl.cpp
index b567f02..00537c3 100644
--- a/runtime/test/TestIntrospectionControl.cpp
+++ b/runtime/test/TestIntrospectionControl.cpp
@@ -312,7 +312,8 @@
 class TestPreparedModelLatest : public SamplePreparedModel {
    public:
     TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
-        : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER),
+        : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
+                              kDefaultPriority),
           mSuccess(success) {}
 
     Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
diff --git a/runtime/test/TestPartitioning.cpp b/runtime/test/TestPartitioning.cpp
index 92ebb55..bc52103 100644
--- a/runtime/test/TestPartitioning.cpp
+++ b/runtime/test/TestPartitioning.cpp
@@ -156,7 +156,8 @@
 template <typename T>
 using MQDescriptorSync = ::android::hardware::MQDescriptorSync<T>;
 
-const Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
+constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
+constexpr int32_t kDefaultRuntimePriority = ANEURALNETWORKS_PRIORITY_DEFAULT;
 
 Capabilities makeCapabilities(float perf) {
     PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
@@ -652,9 +653,11 @@
 
     // Run the partitioning algorithm to create an ExecutionPlan.
     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
-                         ExecutePreference preference, ExecutionPlan* plan) {
+                         ExecutePreference preference, int32_t priority,
+                         const OptionalTimePoint& deadline, ExecutionPlan* plan) {
         return reinterpret_cast<ModelBuilder*>(getHandle())
-                ->partitionTheWork(devices, static_cast<uint32_t>(preference), plan);
+                ->partitionTheWork(devices, static_cast<uint32_t>(preference), priority, deadline,
+                                   plan);
     }
 
 #ifdef VERBOSE
@@ -1264,7 +1267,8 @@
     // didn't actually do any partitioning.
     const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
     ExecutionPlan planA;
-    ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, &planA),
+    ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &planA),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
@@ -1275,7 +1279,8 @@
     // didn't actually do any partitioning.
     const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
     ExecutionPlan planC;
-    ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER, &planC),
+    ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &planC),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
@@ -1286,7 +1291,8 @@
     // correct (model and step model)x(inputs and outputs).
     const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
     ExecutionPlan planB;
-    ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, &planB),
+    ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &planB),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
     const auto& stepsB = planB.forTest_compoundGetSteps();
@@ -1352,7 +1358,8 @@
                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
                                        {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}});
     ExecutionPlan planA;
-    ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, &planA),
+    ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &planA),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
@@ -1364,7 +1371,8 @@
                                        {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
                                        {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
     ExecutionPlan planB;
-    ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, &planB),
+    ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &planB),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
     const auto& stepsB = planB.forTest_compoundGetSteps();
@@ -1452,7 +1460,8 @@
                                       {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
                                       {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
     ExecutionPlan plan;
-    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &plan),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
     ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
@@ -1490,7 +1499,8 @@
     ASSERT_TRUE(model.isValid());
 
     ExecutionPlan plan;
-    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &plan),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
     const auto& steps = plan.forTest_compoundGetSteps();
@@ -1635,7 +1645,8 @@
     // correct (model and step model)x(inputs and outputs).
     const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
     ExecutionPlan plan;
-    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+    ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                     kDefaultRuntimePriority, {}, &plan),
               ANEURALNETWORKS_NO_ERROR);
     ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
     const auto& steps = plan.forTest_compoundGetSteps();
@@ -1735,7 +1746,8 @@
         // No need to compare the original model to the model from the plan -- we
         // didn't actually do any partitioning.
         ExecutionPlan plan;
-        ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+        ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                         kDefaultRuntimePriority, {}, &plan),
                   ANEURALNETWORKS_NO_ERROR);
         ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
         ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
@@ -1787,7 +1799,8 @@
             // No need to compare the original model to the model from the plan -- we
             // didn't actually do any partitioning.
             ExecutionPlan plan;
-            ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+            ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                             kDefaultRuntimePriority, {}, &plan),
                       ANEURALNETWORKS_NO_ERROR);
             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
@@ -1805,7 +1818,8 @@
             // No need to compare the original model to the model from the plan -- we
             // didn't actually do any partitioning.
             ExecutionPlan plan;
-            ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+            ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+                                             kDefaultRuntimePriority, {}, &plan),
                       ANEURALNETWORKS_NO_ERROR);
             ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
             ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");