Implement QoS in NNAPI
Bug: 136739795
Bug: 142902514
Bug: 145300530
Test: mma
Test: CtsNNAPITestCases
Test: NeuralNetworksTest_static
Change-Id: I9b4ed67102b6b1fae2b2ef50ddf746ed912163cc
Merged-In: I9b4ed67102b6b1fae2b2ef50ddf746ed912163cc
(cherry picked from commit 699ffdacfca7a42c059dc6f581eec913d74be9b3)
diff --git a/common/Utils.cpp b/common/Utils.cpp
index f753a16..368ef47 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -90,6 +90,29 @@
}
}
+static std::pair<int, OptionalTimePoint> makeTimePoint(uint64_t duration) {
+ const auto currentTime = std::chrono::steady_clock::now();
+ const auto currentTimeInNanoseconds =
+ std::chrono::time_point_cast<std::chrono::nanoseconds>(currentTime);
+ const uint64_t nanosecondsSinceEpoch = currentTimeInNanoseconds.time_since_epoch().count();
+
+ // check for overflow
+ if (std::numeric_limits<uint64_t>::max() - nanosecondsSinceEpoch < duration) {
+ LOG(ERROR) << "Launching execution failed due to time point overflow";
+ return {ANEURALNETWORKS_BAD_DATA, {}};
+ }
+ const uint64_t nanosecondsAtTimeout = nanosecondsSinceEpoch + duration;
+
+ OptionalTimePoint otp;
+ otp.nanoseconds(nanosecondsAtTimeout);
+ return {ANEURALNETWORKS_NO_ERROR, otp};
+}
+
+std::pair<int, OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration) {
+ const std::pair<int, OptionalTimePoint> empty = {ANEURALNETWORKS_NO_ERROR, {}};
+ return duration.has_value() ? makeTimePoint(*duration) : empty;
+}
+
static bool isExtensionOperandType(int32_t type) {
return static_cast<uint32_t>(type) > static_cast<uint32_t>(OperandTypeRange::BASE_MAX);
}
diff --git a/common/ValidateHal.cpp b/common/ValidateHal.cpp
index b99518b..74e2d7b 100644
--- a/common/ValidateHal.cpp
+++ b/common/ValidateHal.cpp
@@ -691,6 +691,10 @@
preference == ExecutionPreference::SUSTAINED_SPEED;
}
+bool validatePriority(Priority priority) {
+ return priority == Priority::LOW || priority == Priority::MEDIUM || priority == Priority::HIGH;
+}
+
bool validOperandType(V1_0::OperandType operandType) {
switch (operandType) {
case V1_0::OperandType::FLOAT32:
diff --git a/common/include/Utils.h b/common/include/Utils.h
index 2d341ef..0bb4d12 100644
--- a/common/include/Utils.h
+++ b/common/include/Utils.h
@@ -131,6 +131,12 @@
#define NN_RET_CHECK_GE(x, y) NN_RET_CHECK_OP(x, y, >=)
#define NN_RET_CHECK_GT(x, y) NN_RET_CHECK_OP(x, y, >)
+// Make an optional time point from an optional duration. If the operation
+// succeeds, a pair of {ANEURALNETWORKS_NO_ERROR, timepoint} is returned. If an
+// overflow occurs in this function, {ANEURALNETWORKS_BAD_DATA, empty} is
+// returned.
+std::pair<int, hal::OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration);
+
// Ensure that every user of FalseyErrorStream is linked to the
// correct instance, using the correct LOG_TAG
namespace {
diff --git a/common/include/ValidateHal.h b/common/include/ValidateHal.h
index 733c8b9..98d0653 100644
--- a/common/include/ValidateHal.h
+++ b/common/include/ValidateHal.h
@@ -53,6 +53,9 @@
// Verfies that the execution preference is valid.
bool validateExecutionPreference(hal::ExecutionPreference preference);
+// Verfies that the priority is valid.
+bool validatePriority(hal::Priority priority);
+
bool validOperationType(hal::V1_0::OperationType operation);
bool validOperationType(hal::V1_1::OperationType operation);
bool validOperationType(hal::V1_2::OperationType operation);
diff --git a/driver/sample/SampleDriver.cpp b/driver/sample/SampleDriver.cpp
index 132b457..f2a94e7 100644
--- a/driver/sample/SampleDriver.cpp
+++ b/driver/sample/SampleDriver.cpp
@@ -293,7 +293,7 @@
template <typename T_IExecutionCallback>
ErrorStatus executeBase(const Request& request, MeasureTiming measure, const Model& model,
const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
- const OptionalTimePoint& /*deadline*/,
+ const OptionalTimePoint& deadline,
const sp<T_IExecutionCallback>& callback) {
NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION, "SampleDriver::executeBase");
VLOG(DRIVER) << "executeBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
@@ -309,6 +309,10 @@
notify(callback, ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
return ErrorStatus::INVALID_ARGUMENT;
}
+ if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+ notify(callback, ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
// This thread is intentionally detached because the sample driver service
// is expected to live forever.
@@ -343,7 +347,7 @@
static std::tuple<ErrorStatus, hidl_vec<OutputShape>, Timing> executeSynchronouslyBase(
const Request& request, MeasureTiming measure, const Model& model,
const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
- const OptionalTimePoint& /*deadline*/) {
+ const OptionalTimePoint& deadline) {
NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
"SampleDriver::executeSynchronouslyBase");
VLOG(DRIVER) << "executeSynchronouslyBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
@@ -354,6 +358,9 @@
if (!validateRequest(request, model)) {
return {ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
}
+ if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+ return {ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
+ }
NNTRACE_FULL_SWITCH(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_INPUTS_AND_OUTPUTS,
"SampleDriver::executeSynchronouslyBase");
@@ -509,7 +516,7 @@
NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
"SampleDriver::configureExecutionBurst");
- const bool preferPowerOverLatency = (kPreference == hal::ExecutionPreference::LOW_POWER);
+ const bool preferPowerOverLatency = (kPreference == ExecutionPreference::LOW_POWER);
const auto pollingTimeWindow =
(preferPowerOverLatency ? std::chrono::microseconds{0} : getPollingTimeWindow());
diff --git a/driver/sample/SampleDriver.h b/driver/sample/SampleDriver.h
index 4163113..b463345 100644
--- a/driver/sample/SampleDriver.h
+++ b/driver/sample/SampleDriver.h
@@ -17,6 +17,8 @@
#ifndef ANDROID_FRAMEWORKS_ML_NN_DRIVER_SAMPLE_SAMPLE_DRIVER_H
#define ANDROID_FRAMEWORKS_ML_NN_DRIVER_SAMPLE_SAMPLE_DRIVER_H
+#include <hwbinder/IPCThreadState.h>
+
#include <string>
#include <vector>
@@ -42,7 +44,6 @@
: mName(name), mOperationResolver(operationResolver) {
android::nn::initVLogMask();
}
- ~SampleDriver() override {}
hal::Return<void> getCapabilities(getCapabilities_cb cb) override;
hal::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb cb) override;
hal::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb cb) override;
@@ -104,9 +105,15 @@
class SamplePreparedModel : public hal::IPreparedModel {
public:
SamplePreparedModel(const hal::Model& model, const SampleDriver* driver,
- hal::ExecutionPreference preference)
- : mModel(model), mDriver(driver), kPreference(preference) {}
- ~SamplePreparedModel() override {}
+ hal::ExecutionPreference preference, uid_t userId, hal::Priority priority)
+ : mModel(model),
+ mDriver(driver),
+ kPreference(preference),
+ kUserId(userId),
+ kPriority(priority) {
+ (void)kUserId;
+ (void)kPriority;
+ }
bool initialize();
hal::Return<hal::V1_0::ErrorStatus> execute(
const hal::V1_0::Request& request,
@@ -136,6 +143,8 @@
const SampleDriver* mDriver;
std::vector<RunTimePoolInfo> mPoolInfos;
const hal::ExecutionPreference kPreference;
+ const uid_t kUserId;
+ const hal::Priority kPriority;
};
} // namespace sample_driver
diff --git a/driver/sample/SampleDriverUtils.h b/driver/sample/SampleDriverUtils.h
index b40b040..77db00b 100644
--- a/driver/sample/SampleDriverUtils.h
+++ b/driver/sample/SampleDriverUtils.h
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <hwbinder/IPCThreadState.h>
+
#include <thread>
#include "HalInterfaces.h"
@@ -43,10 +45,11 @@
template <typename T_Model, typename T_IPreparedModelCallback>
hal::ErrorStatus prepareModelBase(const T_Model& model, const SampleDriver* driver,
- hal::ExecutionPreference preference, hal::Priority /*priority*/,
- const hal::OptionalTimePoint& /*deadline*/,
+ hal::ExecutionPreference preference, hal::Priority priority,
+ const hal::OptionalTimePoint& deadline,
const sp<T_IPreparedModelCallback>& callback,
bool isFullModelSupported = true) {
+ const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
if (callback.get() == nullptr) {
LOG(ERROR) << "invalid callback passed to prepareModelBase";
return hal::ErrorStatus::INVALID_ARGUMENT;
@@ -55,7 +58,8 @@
VLOG(DRIVER) << "prepareModelBase";
logModelToInfo(model);
}
- if (!validateModel(model) || !validateExecutionPreference(preference)) {
+ if (!validateModel(model) || !validateExecutionPreference(preference) ||
+ !validatePriority(priority)) {
notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
return hal::ErrorStatus::INVALID_ARGUMENT;
}
@@ -63,10 +67,14 @@
notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
return hal::ErrorStatus::NONE;
}
+ if (deadline.getDiscriminator() != hal::OptionalTimePoint::hidl_discriminator::none) {
+ notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
+ return hal::ErrorStatus::INVALID_ARGUMENT;
+ }
// asynchronously prepare the model from a new, detached thread
- std::thread([model, driver, preference, callback] {
+ std::thread([model, driver, preference, userId, priority, callback] {
sp<SamplePreparedModel> preparedModel =
- new SamplePreparedModel(convertToV1_3(model), driver, preference);
+ new SamplePreparedModel(convertToV1_3(model), driver, preference, userId, priority);
if (!preparedModel->initialize()) {
notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
return;
diff --git a/runtime/Callbacks.cpp b/runtime/Callbacks.cpp
index 6655a1a..6a81b9c 100644
--- a/runtime/Callbacks.cpp
+++ b/runtime/Callbacks.cpp
@@ -32,7 +32,7 @@
// PreparedModelCallback methods begin here
-Return<void> PreparedModelCallback::notifyInternal(ErrorStatus errorStatus,
+Return<void> PreparedModelCallback::notifyInternal(bool deadObject, ErrorStatus errorStatus,
const sp<V1_0::IPreparedModel>& preparedModel) {
{
std::lock_guard<std::mutex> hold(mMutex);
@@ -43,6 +43,7 @@
}
// store results and mark as notified
+ mDeadObject = deadObject;
mErrorStatus = errorStatus;
mPreparedModel = preparedModel;
mNotified = true;
@@ -54,17 +55,21 @@
Return<void> PreparedModelCallback::notify(V1_0::ErrorStatus errorStatus,
const sp<V1_0::IPreparedModel>& preparedModel) {
- return notifyInternal(static_cast<ErrorStatus>(errorStatus), preparedModel);
+ return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), preparedModel);
}
Return<void> PreparedModelCallback::notify_1_2(V1_0::ErrorStatus errorStatus,
const sp<V1_2::IPreparedModel>& preparedModel) {
- return notifyInternal(static_cast<ErrorStatus>(errorStatus), preparedModel);
+ return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), preparedModel);
}
Return<void> PreparedModelCallback::notify_1_3(ErrorStatus errorStatus,
const sp<V1_3::IPreparedModel>& preparedModel) {
- return notifyInternal(errorStatus, preparedModel);
+ return notifyInternal(false, errorStatus, preparedModel);
+}
+
+void PreparedModelCallback::notifyAsDeadObject() {
+ notifyInternal(true, ErrorStatus::GENERAL_FAILURE, nullptr);
}
void PreparedModelCallback::wait() const {
@@ -82,22 +87,31 @@
return mPreparedModel;
}
+bool PreparedModelCallback::isDeadObject() const {
+ wait();
+ return mDeadObject;
+}
+
// ExecutionCallback methods begin here
Return<void> ExecutionCallback::notify(V1_0::ErrorStatus errorStatus) {
- return notifyInternal(static_cast<ErrorStatus>(errorStatus), {}, kNoTiming);
+ return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), {}, kNoTiming);
}
Return<void> ExecutionCallback::notify_1_2(V1_0::ErrorStatus errorStatus,
const hidl_vec<OutputShape>& outputShapes,
const Timing& timing) {
- return notifyInternal(static_cast<ErrorStatus>(errorStatus), outputShapes, timing);
+ return notifyInternal(false, static_cast<ErrorStatus>(errorStatus), outputShapes, timing);
}
Return<void> ExecutionCallback::notify_1_3(V1_3::ErrorStatus errorStatus,
const hidl_vec<OutputShape>& outputShapes,
const Timing& timing) {
- return notifyInternal(errorStatus, outputShapes, timing);
+ return notifyInternal(false, errorStatus, outputShapes, timing);
+}
+
+void ExecutionCallback::notifyAsDeadObject() {
+ notifyInternal(true, ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
}
void ExecutionCallback::wait() const {
@@ -135,6 +149,11 @@
return mTiming;
}
+bool ExecutionCallback::isDeadObject() const {
+ wait();
+ return mDeadObject;
+}
+
bool ExecutionCallback::bindThread(std::thread asyncThread) {
std::lock_guard<std::mutex> lock(mMutex);
@@ -180,25 +199,30 @@
mOnFinish = finish;
}
-Return<void> ExecutionCallback::notifyInternal(ErrorStatus errorStatus,
- hidl_vec<OutputShape> outputShapes, Timing timing) {
+Return<void> ExecutionCallback::notifyInternal(bool deadObject, ErrorStatus errorStatus,
+ std::vector<OutputShape> outputShapes,
+ Timing timing) {
// check results
- if (errorStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
- // outputShapes must not be empty if OUTPUT_INSUFFICIENT_SIZE.
- if (outputShapes.size() == 0) {
- LOG(ERROR) << "Notified with empty output shape vector when OUTPUT_INSUFFICIENT_SIZE";
- errorStatus = ErrorStatus::GENERAL_FAILURE;
- outputShapes = {};
- timing = kNoTiming;
- }
- } else if (errorStatus != ErrorStatus::NONE) {
- // outputShapes must be empty if errorStatus is neither NONE nor OUTPUT_INSUFFICIENT_SIZE.
- if (outputShapes.size() != 0) {
- LOG(ERROR) << "Notified with non-empty output shape vector when error status is "
- "neither NONE nor OUTPUT_INSUFFICIENT_SIZE";
- errorStatus = ErrorStatus::GENERAL_FAILURE;
- outputShapes = {};
- timing = kNoTiming;
+ if (!deadObject) {
+ if (errorStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
+ // outputShapes must not be empty if OUTPUT_INSUFFICIENT_SIZE.
+ if (outputShapes.size() == 0) {
+ LOG(ERROR)
+ << "Notified with empty output shape vector when OUTPUT_INSUFFICIENT_SIZE";
+ errorStatus = ErrorStatus::GENERAL_FAILURE;
+ outputShapes = {};
+ timing = kNoTiming;
+ }
+ } else if (errorStatus != ErrorStatus::NONE) {
+ // outputShapes must be empty if errorStatus is neither NONE nor
+ // OUTPUT_INSUFFICIENT_SIZE.
+ if (outputShapes.size() != 0) {
+ LOG(ERROR) << "Notified with non-empty output shape vector when error status is "
+ "neither NONE nor OUTPUT_INSUFFICIENT_SIZE";
+ errorStatus = ErrorStatus::GENERAL_FAILURE;
+ outputShapes = {};
+ timing = kNoTiming;
+ }
}
}
@@ -211,8 +235,9 @@
return Void();
}
+ mDeadObject = deadObject;
mErrorStatus = errorStatus;
- mOutputShapes = outputShapes;
+ mOutputShapes = std::move(outputShapes);
mTiming = timing;
mNotified = true;
diff --git a/runtime/Callbacks.h b/runtime/Callbacks.h
index 1c484e4..7537025 100644
--- a/runtime/Callbacks.h
+++ b/runtime/Callbacks.h
@@ -143,6 +143,11 @@
const sp<hal::V1_3::IPreparedModel>& preparedModel) override;
/**
+ * Mark the callback object as a dead object. This acts as a call to notify.
+ */
+ void notifyAsDeadObject();
+
+ /**
* PreparedModelCallback::wait blocks until notify* has been called on the
* callback object.
*/
@@ -178,13 +183,21 @@
*/
sp<hal::V1_0::IPreparedModel> getPreparedModel() const;
+ /**
+ * Queries whether the object is dead.
+ *
+ * @return 'true' if dead, 'false' otherwise.
+ */
+ bool isDeadObject() const;
+
private:
- hal::Return<void> notifyInternal(hal::ErrorStatus errorStatus,
+ hal::Return<void> notifyInternal(bool deadObject, hal::ErrorStatus errorStatus,
const sp<hal::V1_0::IPreparedModel>& preparedModel);
mutable std::mutex mMutex;
mutable std::condition_variable mCondition;
bool mNotified GUARDED_BY(mMutex) = false;
+ bool mDeadObject = false;
hal::ErrorStatus mErrorStatus = hal::ErrorStatus::GENERAL_FAILURE;
sp<hal::V1_0::IPreparedModel> mPreparedModel;
};
@@ -317,6 +330,11 @@
}
/**
+ * Mark the callback object as a dead object. This acts as a call to notify.
+ */
+ void notifyAsDeadObject();
+
+ /**
* ExecutionCallback::wait blocks until notify* has been called on the
* callback object.
*/
@@ -428,6 +446,13 @@
*/
void setOnFinish(const ExecutionFinish& finish);
+ /**
+ * Queries whether the object is dead.
+ *
+ * @return 'true' if dead, 'false' otherwise.
+ */
+ bool isDeadObject() const;
+
private:
/*
* ExecutionCallback::notifyInternal stores the results of the execution
@@ -436,8 +461,8 @@
* before any call to wait or get* return. It then enables all prior and
* future wait calls on the ExecutionCallback object to proceed.
*/
- hal::Return<void> notifyInternal(hal::ErrorStatus errorStatus,
- hal::hidl_vec<hal::OutputShape> outputShapes,
+ hal::Return<void> notifyInternal(bool deadObject, hal::ErrorStatus errorStatus,
+ std::vector<hal::OutputShape> outputShapes,
hal::Timing timing);
// members
@@ -446,6 +471,7 @@
mutable std::thread mThread GUARDED_BY(mMutex);
ExecutionFinish mOnFinish GUARDED_BY(mMutex);
bool mNotified GUARDED_BY(mMutex) = false;
+ bool mDeadObject = false;
hal::ErrorStatus mErrorStatus = hal::ErrorStatus::GENERAL_FAILURE;
std::vector<hal::OutputShape> mOutputShapes;
hal::Timing mTiming = {};
diff --git a/runtime/CompilationBuilder.cpp b/runtime/CompilationBuilder.cpp
index 87923ff..cecad58 100644
--- a/runtime/CompilationBuilder.cpp
+++ b/runtime/CompilationBuilder.cpp
@@ -19,10 +19,12 @@
#include "CompilationBuilder.h"
#include <algorithm>
+#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
+
#include "BurstBuilder.h"
#include "ExecutionBuilder.h"
#include "ExecutionBurstController.h"
@@ -34,6 +36,8 @@
namespace android {
namespace nn {
+using namespace hal;
+
CompilationBuilder::CompilationBuilder(const ModelBuilder* model,
const std::vector<std::shared_ptr<Device>>& devices,
bool explicitDeviceList)
@@ -52,12 +56,15 @@
}
// TODO validate the rest
+ const auto [n, timeout] = makeTimePoint(mTimeoutDuration);
+ NN_RETURN_IF_ERROR(n);
+
mFinished = true;
if (mIsCacheInfoProvided) {
mPlan.setCaching(&mCacheDir, mToken);
}
if (mPartitioning) {
- int n = mModel->partitionTheWork(mDevices, mPreference, &mPlan);
+ int n = mModel->partitionTheWork(mDevices, mPreference, mPriority, timeout, &mPlan);
switch (n) {
case ANEURALNETWORKS_NO_ERROR:
return n;
@@ -90,7 +97,7 @@
VLOG(COMPILATION) << "CompilationBuilder::finish with CPU fallback";
mPlan.reset();
mPlan.becomeSingleStep(DeviceManager::getCpuDevice(), mModel);
- return mPlan.finish(mModel, mPreference);
+ return mPlan.finish(mModel, mPreference, mPriority, timeout);
}
int CompilationBuilder::setPreference(int32_t preference) {
@@ -124,6 +131,46 @@
return ANEURALNETWORKS_NO_ERROR;
}
+int CompilationBuilder::setPriority(int32_t priority) {
+ if (mFinished) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setPriority can't modify after compilation "
+ "finished";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ if (priority != ANEURALNETWORKS_PRIORITY_LOW && priority != ANEURALNETWORKS_PRIORITY_MEDIUM &&
+ priority != ANEURALNETWORKS_PRIORITY_HIGH) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setPriority invalid priority " << priority;
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ mPriority = priority;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int CompilationBuilder::setTimeoutDuration(uint64_t duration) {
+ if (mFinished) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout can't modify after compilation "
+ "finished";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ if (!mExplicitDeviceList || (mDevices.size() != 1)) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout called on an "
+ "ANeuralNetworksCompilation that was not created by "
+ "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const auto& device = mDevices.front();
+ const bool supportsCompilationDeadline = device->supportsDeadlines().first;
+ if (!supportsCompilationDeadline) {
+ LOG(ERROR)
+ << "ANeuralNetworksCompilation_setTimeout called on device that does not support "
+ "compilation timeouts.";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ mTimeoutDuration = duration;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
int CompilationBuilder::setPartitioning(uint32_t partitioning) {
if (mFinished) {
LOG(ERROR) << "ANeuralNetworksCompilation_setPartitioning can't modify after compilation "
diff --git a/runtime/CompilationBuilder.h b/runtime/CompilationBuilder.h
index e7fc077..66ef5b9 100644
--- a/runtime/CompilationBuilder.h
+++ b/runtime/CompilationBuilder.h
@@ -17,7 +17,9 @@
#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_COMPILATION_BUILDER_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_COMPILATION_BUILDER_H
+#include <chrono>
#include <memory>
+#include <optional>
#include <string>
#include <vector>
@@ -49,6 +51,10 @@
int setCaching(const std::string& cacheDir, const uint8_t* token);
+ int setPriority(int32_t priority);
+
+ int setTimeoutDuration(uint64_t duration);
+
int finish();
int createExecution(ExecutionBuilder** execution);
@@ -92,6 +98,12 @@
std::string mCacheDir;
uint8_t mToken[ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN];
bool mIsCacheInfoProvided = false;
+
+ // Compilation priority information.
+ int32_t mPriority = ANEURALNETWORKS_PRIORITY_DEFAULT;
+
+ // Amount of time to complete or abort the execution.
+ std::optional<uint64_t> mTimeoutDuration;
};
} // namespace nn
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index 174faa0..2f52b1e 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -19,6 +19,7 @@
#include "ExecutionBuilder.h"
#include <algorithm>
+#include <limits>
#include <memory>
#include <mutex>
#include <optional>
@@ -267,6 +268,32 @@
return ANEURALNETWORKS_NO_ERROR;
}
+int ExecutionBuilder::setTimeoutDuration(uint64_t duration) {
+ if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
+ LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on an ANeuralNetworksExecution "
+ "created from an ANeuralNetworksCompilation that was not created by "
+ "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const auto& device = mCompilation->mDevices.front();
+ const bool supportsExecutionDeadline = device->supportsDeadlines().second;
+ if (!supportsExecutionDeadline) {
+ LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on device that does not support "
+ "execution timeouts.";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ if (mStarted) {
+ LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called after the execution has started.";
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+ mTimeoutDuration = duration;
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+std::optional<uint64_t> ExecutionBuilder::getTimeoutDuration() const {
+ return mTimeoutDuration;
+}
+
int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
if (!mFinished) {
LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
@@ -740,8 +767,12 @@
}
const MeasureTiming measure = measureTiming(mExecutionBuilder);
- const auto [n, outputShapes, timing] =
- mPreparedModel->execute(mInputs, mOutputs, mMemories, burstController, measure);
+ const auto [timePointN, deadline] = makeTimePoint(mExecutionBuilder->getTimeoutDuration());
+ if (timePointN != ANEURALNETWORKS_NO_ERROR) {
+ return {timePointN, {}, kNoTiming};
+ }
+ const auto [n, outputShapes, timing] = mPreparedModel->execute(
+ mInputs, mOutputs, mMemories, burstController, measure, deadline);
mExecutionBuilder->reportTiming(timing);
return {n, std::move(outputShapes), timing};
@@ -754,11 +785,14 @@
mDevice = DeviceManager::getCpuDevice();
mPreparedModel = nullptr;
const ModelFactory makeModel = [this] { return mModel->makeHidlModel(); };
- // TODO: Propagate user preference to this point instead of using default value of
- // ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER.
+ // TODO: Propagate user preference and compilation priority to this point instead of using
+ // default values of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER and
+ // ANEURALNETWORKS_PRIORITY_MEDIUM
const ExecutionPreference preference =
static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
- const auto [n, preparedModel] = mDevice->prepareModel(makeModel, preference, {}, {});
+ const Priority priority = convertToHalPriority(ANEURALNETWORKS_PRIORITY_DEFAULT);
+ const auto [n, preparedModel] =
+ mDevice->prepareModel(makeModel, preference, priority, {}, {}, {});
mPreparedModel = preparedModel;
if (n != ANEURALNETWORKS_NO_ERROR) {
return {n, {}, kNoTiming};
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 65f08d2..9b30808 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -62,6 +62,10 @@
int getDuration(int32_t durationCode, uint64_t* duration) const;
+ int setTimeoutDuration(uint64_t duration);
+
+ std::optional<uint64_t> getTimeoutDuration() const;
+
int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
CHECK(synchronizationCallback != nullptr);
return compute(synchronizationCallback);
@@ -132,6 +136,9 @@
// Timing reported from the driver
hal::Timing mTiming = {};
+ // Amount of time to complete or abort the execution.
+ std::optional<uint64_t> mTimeoutDuration;
+
// Properties cannot be set once the execution has started.
std::atomic_bool mStarted = false;
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index e753793..395357d 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -65,8 +65,8 @@
// operation indices to be executed (COMPOUND body). The token will be re-hashed further by the
// device name, device version string, and the execution preference in this function.
int compile(const Device& device, const ModelBuilder& model, int executionPreference,
- const std::string& cacheDir, TokenHasher* token,
- std::shared_ptr<PreparedModel>* preparedModel) {
+ int compilationPriority, const OptionalTimePoint& deadline, const std::string& cacheDir,
+ TokenHasher* token, std::shared_ptr<PreparedModel>* preparedModel) {
CHECK(token != nullptr);
CHECK(preparedModel != nullptr);
*preparedModel = nullptr;
@@ -81,8 +81,9 @@
const ModelFactory makeModel = [&model] { return model.makeHidlModel(); };
const ExecutionPreference preference = static_cast<ExecutionPreference>(executionPreference);
+ const Priority priority = convertToHalPriority(compilationPriority);
const auto [n, returnedPreparedModel] =
- device.prepareModel(makeModel, preference, cacheDir, cacheToken);
+ device.prepareModel(makeModel, preference, priority, deadline, cacheDir, cacheToken);
*preparedModel = returnedPreparedModel;
return n;
}
@@ -423,7 +424,7 @@
}
int ExecutionStep::finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize,
- int32_t executionPreference) {
+ int32_t executionPreference, int32_t priority) {
CHECK(mDevice != nullptr);
for (const auto& stepModelOutput : mTempsAsStepModelOutputs) {
@@ -511,8 +512,8 @@
// TODO: Move compilation elsewhere?
VLOG(COMPILATION) << "ExecutionStep::finishStepModel, compilation on " << mDevice->getName();
- return compile(*mDevice, mStepModel, executionPreference, *mPlan->getCacheDir(), &mToken,
- &mPreparedStepModel);
+ return compile(*mDevice, mStepModel, executionPreference, priority, {}, *mPlan->getCacheDir(),
+ &mToken, &mPreparedStepModel);
}
void ExecutionStep::dump() const {
@@ -522,12 +523,13 @@
}
}
-int ExecutionPlan::CompoundBody::finish(const ModelBuilder* mainModel,
- int32_t executionPreference) {
+int ExecutionPlan::CompoundBody::finish(const ModelBuilder* mainModel, int32_t executionPreference,
+ int32_t priority, const OptionalTimePoint& deadline) {
+ CHECK(deadline.getDiscriminator() == OptionalTimePoint::hidl_discriminator::none);
findTempsAsStepModelOutputs();
for (const auto& step : mSteps) {
int n = step->finishStepModel(mainModel, &mHasStepModelOutputOfUnknownSize,
- executionPreference);
+ executionPreference, priority);
if (n != ANEURALNETWORKS_NO_ERROR) {
VLOG(COMPILATION) << "ExecutionPlan::CompoundBody::finish -- finishStepModel failed";
return n;
@@ -550,18 +552,20 @@
return ANEURALNETWORKS_NO_ERROR;
}
-int ExecutionPlan::SimpleBody::finish(const ModelBuilder*, int32_t executionPreference) {
+int ExecutionPlan::SimpleBody::finish(const ModelBuilder*, int32_t executionPreference,
+ int32_t priority, const OptionalTimePoint& deadline) {
CHECK(mDevice != nullptr);
VLOG(COMPILATION) << "ExecutionPlan::SimpleBody::finish, compilation";
- const int n =
- compile(*mDevice, *mModel, executionPreference, *mCacheDir, &mToken, &mPreparedModel);
+ const int n = compile(*mDevice, *mModel, executionPreference, priority, deadline, *mCacheDir,
+ &mToken, &mPreparedModel);
mSuccessfulFinish = (n == ANEURALNETWORKS_NO_ERROR);
return n;
}
-int ExecutionPlan::finish(const ModelBuilder* mainModel, int32_t executionPreference) {
+int ExecutionPlan::finish(const ModelBuilder* mainModel, int32_t executionPreference,
+ int32_t priority, const OptionalTimePoint& deadline) {
CHECK(mBody != nullptr);
- return mBody->finish(mainModel, executionPreference);
+ return mBody->finish(mainModel, executionPreference, priority, deadline);
}
ExecutionPlan::Controller::Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
@@ -908,7 +912,8 @@
}
int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
- uint32_t preference, ExecutionPlan* plan) const {
+ uint32_t preference, uint32_t priority,
+ const OptionalTimePoint& deadline, ExecutionPlan* plan) const {
// This function uses a heuristic approach to partitioning the graph.
// It should be good enough for the first release.
@@ -931,7 +936,7 @@
VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: only one best device: "
<< bestDeviceIndex << " = " << devices[bestDeviceIndex]->getName();
plan->becomeSingleStep(devices[bestDeviceIndex], this);
- return plan->finish(this, preference);
+ return plan->finish(this, preference, priority, deadline);
}
// No easy solution, we need to split the work.
@@ -986,7 +991,7 @@
}
}
- int n = plan->finish(this, preference);
+ int n = plan->finish(this, preference, priority, deadline);
if (VLOG_IS_ON(COMPILATION)) {
VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: source model: ";
logModelToInfo(makeHidlModel());
diff --git a/runtime/ExecutionPlan.h b/runtime/ExecutionPlan.h
index bb87266..f2bb454 100644
--- a/runtime/ExecutionPlan.h
+++ b/runtime/ExecutionPlan.h
@@ -105,7 +105,7 @@
// *hasOutputOfUnknownSize to true; otherwise, leaves it
// unchanged.
int finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize,
- int32_t executionPreference);
+ int32_t executionPreference, int32_t priority);
const ModelBuilder* getStepModel() const { return &mStepModel; }
std::shared_ptr<Device> getDevice() const { return mDevice; }
@@ -264,7 +264,8 @@
void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
- int finish(const ModelBuilder* mainModel, int32_t executionPreference);
+ int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+ const hal::OptionalTimePoint& deadline);
void recordTemporaryDef(uint32_t sourceOperandIndex, uint32_t stepIndex);
@@ -313,7 +314,8 @@
struct Body {
virtual ~Body() {}
virtual void dump() const = 0;
- virtual int finish(const ModelBuilder* mainModel, int32_t executionPreference) = 0;
+ virtual int finish(const ModelBuilder* mainModel, int32_t executionPreference,
+ int32_t priority, const hal::OptionalTimePoint& deadline) = 0;
virtual bool hasStepModelOutputsOfUnknownSize() const = 0;
virtual void forEachStepRoleOfInput(uint32_t index,
const StepRoleCallback& callback) const = 0;
@@ -328,7 +330,8 @@
: mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}
void dump() const override;
- int finish(const ModelBuilder* mainModel, int32_t executionPreference) override;
+ int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+ const hal::OptionalTimePoint& deadline) override;
bool hasStepModelOutputsOfUnknownSize() const override { return false; }
void forEachStepRoleOfInput(uint32_t index,
const StepRoleCallback& callback) const override;
@@ -345,7 +348,8 @@
struct CompoundBody : Body {
void dump() const override;
- int finish(const ModelBuilder* mainModel, int32_t executionPreference) override;
+ int finish(const ModelBuilder* mainModel, int32_t executionPreference, int32_t priority,
+ const hal::OptionalTimePoint& deadline) override;
bool hasStepModelOutputsOfUnknownSize() const override {
return mHasStepModelOutputOfUnknownSize;
}
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index 11765ff..ff405ec 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -85,10 +85,14 @@
kInterface->getNumberOfCacheFilesNeeded();
return numModelCacheFiles > 0 || numDataCacheFiles > 0;
}
+ std::pair<bool, bool> supportsDeadlines() const override {
+ return kInterface->supportsDeadlines();
+ }
+ int wait() const override { return kInterface->wait(); }
std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
- const ModelFactory& makeModel, ExecutionPreference preference,
- const std::string& cacheDir,
+ const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const override;
std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor& desc) const override;
@@ -121,8 +125,8 @@
std::tuple<int, std::vector<OutputShape>, Timing> execute(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
- const std::shared_ptr<ExecutionBurstController>& burstController,
- MeasureTiming measure) const override;
+ const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+ const OptionalTimePoint& deadline) const override;
std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool preferPowerOverLatency) const override {
@@ -217,10 +221,11 @@
}
std::pair<int, std::shared_ptr<PreparedModel>> DriverDevice::prepareModel(
- const ModelFactory& makeModel, ExecutionPreference preference, const std::string& cacheDir,
+ const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const {
- const auto [n, preparedModel] =
- kInterface->prepareModel(makeModel, preference, cacheDir, maybeToken);
+ const auto [n, preparedModel] = kInterface->prepareModel(makeModel, preference, priority,
+ deadline, cacheDir, maybeToken);
if (n != ANEURALNETWORKS_NO_ERROR) {
return {n, nullptr};
}
@@ -297,8 +302,8 @@
std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
const MemoryTracker& memories,
- const std::shared_ptr<ExecutionBurstController>& burstController,
- MeasureTiming measure) const {
+ const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+ const OptionalTimePoint& deadline) const {
NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
// Make a copy of the memory tracker as we will append memory pools for pointer arguments.
@@ -375,7 +380,7 @@
if (!burstCompute || burstFallback) {
const bool preferSynchronous = DeviceManager::get()->syncExecHal();
std::tie(n, outputShapes, timing) =
- mPreparedModel->execute(request, measure, preferSynchronous);
+ mPreparedModel->execute(request, measure, deadline, preferSynchronous);
}
if (n != ANEURALNETWORKS_NO_ERROR) {
@@ -426,10 +431,14 @@
return kPerformance;
}
bool isCachingSupported() const override { return false; }
+ std::pair<bool, bool> supportsDeadlines() const override {
+ return {/*prepareModelDeadline=*/false, /*executionDeadline=*/false};
+ }
+ int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
- const ModelFactory& makeModel, ExecutionPreference preference,
- const std::string& cacheDir,
+ const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const override;
std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor&) const override {
@@ -463,8 +472,8 @@
std::tuple<int, std::vector<OutputShape>, Timing> execute(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
- const std::shared_ptr<ExecutionBurstController>& burstController,
- MeasureTiming measure) const override;
+ const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
+ const OptionalTimePoint& deadline) const override;
std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool /*preferPowerOverLatency*/) const override {
@@ -496,15 +505,20 @@
}
std::pair<int, std::shared_ptr<PreparedModel>> CpuDevice::prepareModel(
- const ModelFactory& makeModel, ExecutionPreference preference,
- const std::string& /*cacheDir*/, const std::optional<CacheToken>& maybeToken) const {
+ const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& /*cacheDir*/,
+ const std::optional<CacheToken>& maybeToken) const {
CHECK(!maybeToken.has_value())
<< "Should never call prepareModel with cache information on CpuDevice";
const Model model = makeModel();
- if (!validateModel(model) || !validateExecutionPreference(preference)) {
+ if (!validateModel(model) || !validateExecutionPreference(preference) ||
+ !validatePriority(priority)) {
return {ANEURALNETWORKS_OP_FAILED, nullptr};
}
+ if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+ return {ANEURALNETWORKS_BAD_DATA, nullptr};
+ }
return CpuPreparedModel::create(model);
}
@@ -542,7 +556,11 @@
const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& /*burstController*/,
- MeasureTiming /*measure*/) const {
+ MeasureTiming /*measure*/, const OptionalTimePoint& deadline) const {
+ if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
+ return {ANEURALNETWORKS_BAD_DATA, {}, kNoTiming};
+ }
+
std::vector<RunTimePoolInfo> requestPoolInfos;
requestPoolInfos.reserve(memories.size());
for (const Memory* mem : memories) {
diff --git a/runtime/Manager.h b/runtime/Manager.h
index 3cd085c..68ca105 100644
--- a/runtime/Manager.h
+++ b/runtime/Manager.h
@@ -58,7 +58,7 @@
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
const std::shared_ptr<ExecutionBurstController>& burstController,
- hal::MeasureTiming measure) const = 0;
+ hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline) const = 0;
virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool preferPowerOverLatency) const = 0;
@@ -86,9 +86,12 @@
virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
virtual hal::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
virtual bool isCachingSupported() const = 0;
+ virtual std::pair<bool, bool> supportsDeadlines() const = 0;
+ virtual int wait() const = 0;
virtual std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
+ hal::Priority priority, const hal::OptionalTimePoint& deadline,
const std::string& cacheDir,
const std::optional<hal::CacheToken>& maybeToken) const = 0;
diff --git a/runtime/ModelBuilder.h b/runtime/ModelBuilder.h
index 7cfb685..c3ac96d 100644
--- a/runtime/ModelBuilder.h
+++ b/runtime/ModelBuilder.h
@@ -105,6 +105,7 @@
}
int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference,
+ uint32_t priority, const hal::OptionalTimePoint& deadline,
ExecutionPlan* plan) const;
private:
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 8913c0e..52f50a7 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -655,18 +655,30 @@
}
bool ANeuralNetworksDevice_supportsCompilationTimeout(const ANeuralNetworksDevice* device) {
- (void)device;
- return false;
+ if (device == nullptr) {
+ LOG(ERROR) << "ANeuralNetworksDevice_supportsCompilationTimeout passed a nullptr";
+ return false;
+ }
+ const Device* d = reinterpret_cast<const Device*>(device);
+ return d->supportsDeadlines().first;
}
bool ANeuralNetworksDevice_supportsExecutionTimeout(const ANeuralNetworksDevice* device) {
- (void)device;
- return false;
+ if (device == nullptr) {
+ LOG(ERROR) << "ANeuralNetworksDevice_supportsExecutionTimeout passed a nullptr";
+ return false;
+ }
+ const Device* d = reinterpret_cast<const Device*>(device);
+ return d->supportsDeadlines().second;
}
int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) {
- (void)device;
- return ANEURALNETWORKS_OP_FAILED;
+ if (device == nullptr) {
+ LOG(ERROR) << "ANeuralNetworksDevice_wait passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ const Device* d = reinterpret_cast<const Device*>(device);
+ return d->wait();
}
int ANeuralNetworksModel_getSupportedOperationsForDevices(
@@ -1175,17 +1187,23 @@
int ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation* compilation, int priority) {
NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_setPriority");
- (void)compilation;
- (void)priority;
- return ANEURALNETWORKS_OP_FAILED;
+ if (!compilation) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setPriority passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(compilation);
+ return c->setPriority(priority);
}
int ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation* compilation,
uint64_t duration) {
NNTRACE_RT(NNTRACE_PHASE_COMPILATION, "ANeuralNetworksCompilation_setTimeout");
- (void)compilation;
- (void)duration;
- return ANEURALNETWORKS_OP_FAILED;
+ if (!compilation) {
+ LOG(ERROR) << "ANeuralNetworksCompilation_setTimeout passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+ CompilationBuilder* c = reinterpret_cast<CompilationBuilder*>(compilation);
+ return c->setTimeoutDuration(duration);
}
int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
@@ -1321,9 +1339,13 @@
int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration) {
NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_setTimeout");
- (void)execution;
- (void)duration;
- return ANEURALNETWORKS_OP_FAILED;
+ if (!execution) {
+ LOG(ERROR) << "ANeuralNetworksExecution_setTimeout passed a nullptr";
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
+ return r->setTimeoutDuration(duration);
}
int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
diff --git a/runtime/VersionedInterfaces.cpp b/runtime/VersionedInterfaces.cpp
index c4c6e85..565ad6e 100644
--- a/runtime/VersionedInterfaces.cpp
+++ b/runtime/VersionedInterfaces.cpp
@@ -105,56 +105,49 @@
const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
-void sendFailureMessage(const sp<IPreparedModelCallback>& cb) {
+void sendFailureMessage(IPreparedModelCallback* cb) {
+ CHECK(cb != nullptr);
cb->notify_1_3(ErrorStatus::GENERAL_FAILURE, nullptr);
}
-void sendFailureMessage(const sp<PreparedModelCallback>& cb) {
- sendFailureMessage(static_cast<sp<IPreparedModelCallback>>(cb));
-}
-
-void sendFailureMessage(const sp<IExecutionCallback>& cb) {
- cb->notify_1_3(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
-}
-
// This class is thread safe
-template <typename ICallback>
+template <typename Callback>
class DeathHandler : public hidl_death_recipient {
public:
void serviceDied(uint64_t /*cookie*/, const wp<hidl::base::V1_0::IBase>& /*who*/) override {
LOG(ERROR) << "DeathHandler::serviceDied -- service unexpectedly died!";
std::lock_guard<std::mutex> hold(mMutex);
std::for_each(mCallbacks.begin(), mCallbacks.end(),
- [](const auto& cb) { sendFailureMessage(cb); });
+ [](const auto& cb) { cb->notifyAsDeadObject(); });
}
[[nodiscard]] base::ScopeGuard<std::function<void()>> protectCallback(
- const sp<ICallback>& callback) {
+ const sp<Callback>& callback) {
registerCallback(callback);
return ::android::base::make_scope_guard(
[this, callback] { unregisterCallback(callback); });
}
private:
- void registerCallback(const sp<ICallback>& callback) {
+ void registerCallback(const sp<Callback>& callback) {
std::lock_guard<std::mutex> hold(mMutex);
mCallbacks.push_back(callback);
}
- void unregisterCallback(const sp<ICallback>& callback) {
+ void unregisterCallback(const sp<Callback>& callback) {
std::lock_guard<std::mutex> hold(mMutex);
mCallbacks.erase(std::remove(mCallbacks.begin(), mCallbacks.end(), callback),
mCallbacks.end());
}
std::mutex mMutex;
- std::vector<sp<ICallback>> mCallbacks GUARDED_BY(mMutex);
+ std::vector<sp<Callback>> mCallbacks GUARDED_BY(mMutex);
};
} // anonymous namespace
-class IDeviceDeathHandler : public DeathHandler<IPreparedModelCallback> {};
-class IPreparedModelDeathHandler : public DeathHandler<IExecutionCallback> {};
+class IDeviceDeathHandler : public DeathHandler<PreparedModelCallback> {};
+class IPreparedModelDeathHandler : public DeathHandler<ExecutionCallback> {};
static std::pair<int, std::shared_ptr<VersionedIPreparedModel>> makeVersionedIPreparedModel(
sp<V1_0::IPreparedModel> preparedModel) {
@@ -169,7 +162,18 @@
// asynchronous calls are susceptible to hangs if the service crashes before
// providing the response.
const Return<bool> ret = preparedModel->linkToDeath(deathHandler, 0);
- if (!ret.isOk() || ret != true) {
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
+ "IPreparedModel object because the IPreparedModel object is dead.";
+ return {ANEURALNETWORKS_DEAD_OBJECT, nullptr};
+ }
+ if (!ret.isOk()) {
+ LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
+ "IPreparedModel object because of failure: "
+ << ret.description();
+ return {ANEURALNETWORKS_OP_FAILED, nullptr};
+ }
+ if (ret != true) {
LOG(ERROR) << "makeVersionedIPreparedModel failed to register a death recipient for the "
"IPreparedModel object.";
return {ANEURALNETWORKS_OP_FAILED, nullptr};
@@ -196,11 +200,17 @@
}
std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeAsynchronously(
- const Request& request, MeasureTiming measure) const {
+ const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline) const {
+ const auto failDeadObject = []() -> std::tuple<int, std::vector<OutputShape>, Timing> {
+ return {ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
+ };
const auto failWithStatus = [](ErrorStatus status) {
return getExecutionResult(status, {}, kNoTiming);
};
- const auto getResults = [](const ExecutionCallback& cb) {
+ const auto getResults = [failDeadObject](const ExecutionCallback& cb) {
+ if (cb.isDeadObject()) {
+ return failDeadObject();
+ }
return getExecutionResult(cb.getStatus(), cb.getOutputShapes(), cb.getTiming());
};
@@ -209,7 +219,12 @@
// version 1.3+ HAL
if (mPreparedModelV1_3 != nullptr) {
- Return<ErrorStatus> ret = mPreparedModelV1_3->execute_1_3(request, measure, {}, callback);
+ Return<ErrorStatus> ret =
+ mPreparedModelV1_3->execute_1_3(request, measure, deadline, callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "execute_1_3 failure: " << ret.description();
+ return failDeadObject();
+ }
if (!ret.isOk()) {
LOG(ERROR) << "execute_1_3 failure: " << ret.description();
return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -233,6 +248,10 @@
if (mPreparedModelV1_2 != nullptr) {
Return<V1_0::ErrorStatus> ret =
mPreparedModelV1_2->execute_1_2(request10, measure, callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "execute_1_2 failure: " << ret.description();
+ return failDeadObject();
+ }
if (!ret.isOk()) {
LOG(ERROR) << "execute_1_2 failure: " << ret.description();
return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -249,6 +268,10 @@
// version 1.0 HAL
if (mPreparedModelV1_0 != nullptr) {
Return<V1_0::ErrorStatus> ret = mPreparedModelV1_0->execute(request10, callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "execute failure: " << ret.description();
+ return failDeadObject();
+ }
if (!ret.isOk()) {
LOG(ERROR) << "execute failure: " << ret.description();
return failWithStatus(ErrorStatus::GENERAL_FAILURE);
@@ -268,18 +291,24 @@
}
std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeSynchronously(
- const Request& request, MeasureTiming measure) const {
+ const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline) const {
+ const std::tuple<int, std::vector<OutputShape>, Timing> kDeadObject = {
+ ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
const auto kFailure = getExecutionResult(ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
// version 1.3+ HAL
if (mPreparedModelV1_3 != nullptr) {
std::tuple<int, std::vector<OutputShape>, Timing> result;
Return<void> ret = mPreparedModelV1_3->executeSynchronously_1_3(
- request, measure, {},
+ request, measure, deadline,
[&result](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
const Timing& timing) {
result = getExecutionResult(error, outputShapes, timing);
});
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "executeSynchronously_1_3 failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "executeSynchronously_1_3 failure: " << ret.description();
return kFailure;
@@ -303,6 +332,10 @@
const Timing& timing) {
result = getExecutionResult(convertToV1_3(error), outputShapes, timing);
});
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "executeSynchronously failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "executeSynchronously failure: " << ret.description();
return kFailure;
@@ -311,18 +344,19 @@
}
// Fallback to asynchronous execution.
- return executeAsynchronously(request, measure);
+ return executeAsynchronously(request, measure, deadline);
}
std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execute(
- const Request& request, MeasureTiming measure, bool preferSynchronous) const {
+ const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline,
+ bool preferSynchronous) const {
if (preferSynchronous) {
VLOG(EXECUTION) << "Before executeSynchronously() " << SHOW_IF_DEBUG(toString(request));
- return executeSynchronously(request, measure);
+ return executeSynchronously(request, measure, deadline);
}
VLOG(EXECUTION) << "Before executeAsynchronously() " << SHOW_IF_DEBUG(toString(request));
- return executeAsynchronously(request, measure);
+ return executeAsynchronously(request, measure, deadline);
}
// This is the amount of time the ExecutionBurstController should spend polling
@@ -505,12 +539,34 @@
return {ErrorStatus::NONE, 0, 0};
}
+static std::tuple<ErrorStatus, bool, bool> supportsDeadlinesFunction(V1_3::IDevice* device) {
+ CHECK(device != nullptr);
+ constexpr std::tuple<ErrorStatus, bool, bool> kFailure = {ErrorStatus::GENERAL_FAILURE, false,
+ false};
+ std::tuple<ErrorStatus, bool, bool> result = kFailure;
+ const Return<void> ret =
+ device->supportsDeadlines([&result](bool prepareModelDeadline, bool executionDeadline) {
+ result = {ErrorStatus::NONE, prepareModelDeadline, executionDeadline};
+ });
+ if (!ret.isOk()) {
+ LOG(ERROR) << "supportsDeadlines failure: " << ret.description();
+ return kFailure;
+ }
+ return result;
+}
+
+static std::tuple<ErrorStatus, bool, bool> supportsDeadlinesFunction(V1_0::IDevice* device) {
+ CHECK(device != nullptr);
+ return {ErrorStatus::NONE, /*prepareModelDeadline=*/false, /*executionDeadline=*/false};
+}
+
struct InitialData {
hal::Capabilities capabilities;
hal::hidl_vec<hal::Extension> supportedExtensions;
int32_t type;
hal::hidl_string versionString;
std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded;
+ std::pair<bool, bool> supportsDeadlines;
};
template <typename Device>
@@ -565,12 +621,21 @@
return std::nullopt;
}
+ const auto [supportsDeadlinesStatus, prepareModelDeadline, executionDeadline] =
+ supportsDeadlinesFunction(device);
+ if (supportsDeadlinesStatus != ErrorStatus::NONE) {
+ LOG(ERROR) << "IDevice::supportsDeadlines returned the error "
+ << toString(supportsDeadlinesStatus);
+ return std::nullopt;
+ }
+
return InitialData{
/*.capabilities=*/std::move(capabilities),
/*.supportedExtensions=*/std::move(supportedExtensions),
/*.type=*/type,
/*.versionString=*/std::move(versionString),
/*.numberOfCacheFilesNeeded=*/{numModelCacheFiles, numDataCacheFiles},
+ /*.supportsDeadlines=*/{prepareModelDeadline, executionDeadline},
};
}
@@ -617,23 +682,26 @@
return nullptr;
}
- auto [capabilities, supportedExtensions, type, versionString, numberOfCacheFilesNeeded] =
- std::move(*initialData);
+ auto [capabilities, supportedExtensions, type, versionString, numberOfCacheFilesNeeded,
+ supportsDeadlines] = std::move(*initialData);
return std::make_shared<VersionedIDevice>(
std::move(capabilities), std::move(supportedExtensions), type, std::move(versionString),
- numberOfCacheFilesNeeded, std::move(serviceName), std::move(core.value()));
+ numberOfCacheFilesNeeded, supportsDeadlines, std::move(serviceName),
+ std::move(core.value()));
}
VersionedIDevice::VersionedIDevice(hal::Capabilities capabilities,
std::vector<hal::Extension> supportedExtensions, int32_t type,
std::string versionString,
std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded,
- std::string serviceName, Core core)
+ std::pair<bool, bool> supportsDeadlines, std::string serviceName,
+ Core core)
: kCapabilities(std::move(capabilities)),
kSupportedExtensions(std::move(supportedExtensions)),
kType(type),
kVersionString(std::move(versionString)),
kNumberOfCacheFilesNeeded(numberOfCacheFilesNeeded),
+ kSupportsDeadlines(supportsDeadlines),
kServiceName(std::move(serviceName)),
mCore(std::move(core)) {}
@@ -648,7 +716,13 @@
// asynchronous calls are susceptible to hangs if the service crashes before
// providing the response.
const Return<bool> ret = device->linkToDeath(deathHandler, 0);
- if (!ret.isOk() || ret != true) {
+ if (!ret.isOk()) {
+ LOG(ERROR) << "VersionedIDevice::Core::create failed to register a death recipient for the "
+ "IDevice object because of failure: "
+ << ret.description();
+ return {};
+ }
+ if (ret != true) {
LOG(ERROR) << "VersionedIDevice::Core::create failed to register a death recipient for the "
"IDevice object.";
return {};
@@ -733,7 +807,7 @@
} else {
LOG(ERROR) << context << " failure: " << ret.description();
}
- sendFailureMessage(callback);
+ sendFailureMessage(callback.get());
}
callback->wait();
return ret;
@@ -797,6 +871,35 @@
return ret;
}
+int VersionedIDevice::wait() const {
+ std::unique_lock lock(mMutex);
+ // It's possible that another device has already done the recovery.
+ // It's harmless but wasteful for us to do so in this case.
+ auto pingReturn = mCore.getDevice<V1_0::IDevice>()->ping();
+ if (pingReturn.isDeadObject()) {
+ VLOG(DRIVER) << "VersionedIDevice::wait -- Recovering " << kServiceName;
+ sp<V1_0::IDevice> recoveredDevice = V1_0::IDevice::getService(kServiceName);
+ if (recoveredDevice == nullptr) {
+ LOG(ERROR) << "VersionedIDevice::wait got a null IDevice for " << kServiceName;
+ return ANEURALNETWORKS_OP_FAILED;
+ }
+
+ auto core = Core::create(std::move(recoveredDevice));
+ if (!core.has_value()) {
+ LOG(ERROR) << "VersionedIDevice::wait failed to create Core.";
+ return ANEURALNETWORKS_OP_FAILED;
+ }
+
+ mCore = std::move(core.value());
+ } else if (!pingReturn.isOk()) {
+ LOG(ERROR) << "VersionedIDevice::wait failed -- IDevice::ping returned "
+ << pingReturn.description();
+ return ANEURALNETWORKS_OP_FAILED;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
const Capabilities& VersionedIDevice::getCapabilities() const {
return kCapabilities;
}
@@ -1028,6 +1131,11 @@
const PreparedModelCallback& callback, const char* prepareName,
const std::string& serviceName) {
callback.wait();
+ if (callback.isDeadObject()) {
+ LOG(ERROR) << prepareName << " on " << serviceName
+ << " failed because the PreparedModel object is dead";
+ return {ANEURALNETWORKS_DEAD_OBJECT, nullptr};
+ }
const ErrorStatus status = callback.getStatus();
const sp<V1_0::IPreparedModel> preparedModel = callback.getPreparedModel();
@@ -1045,10 +1153,13 @@
}
std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModelInternal(
- const Model& model, ExecutionPreference preference, const std::string& cacheDir,
+ const Model& model, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const {
// Note that some work within VersionedIDevice will be subtracted from the IPC layer
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
+ const std::pair<int, std::shared_ptr<VersionedIPreparedModel>> kDeadObject = {
+ ANEURALNETWORKS_DEAD_OBJECT, nullptr};
// Get cache files if they exist, otherwise create them.
hidl_vec<hidl_handle> modelCache, dataCache;
@@ -1069,12 +1180,16 @@
if (getDevice<V1_3::IDevice>() != nullptr) {
const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
__FUNCTION__,
- [&model, &preference, &modelCache, &dataCache, &token,
+ [&model, preference, priority, &deadline, &modelCache, &dataCache, &token,
&callback](const sp<V1_3::IDevice>& device) {
- return device->prepareModel_1_3(model, preference, kDefaultPriority, {},
+ return device->prepareModel_1_3(model, preference, priority, deadline,
modelCache, dataCache, token, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModel_1_3 failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModel_1_3 failure: " << ret.description();
return prepareModelFailure();
@@ -1110,6 +1225,10 @@
token, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModel_1_2 failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModel_1_2 failure: " << ret.description();
return prepareModelFailure();
@@ -1148,6 +1267,10 @@
return device->prepareModel_1_1(model11, preference, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModel_1_1 failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModel_1_1 failure: " << ret.description();
return prepareModelFailure();
@@ -1186,6 +1309,10 @@
return device->prepareModel(model10, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModel failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModel failure: " << ret.description();
return prepareModelFailure();
@@ -1208,11 +1335,15 @@
}
std::pair<int, std::shared_ptr<VersionedIPreparedModel>>
-VersionedIDevice::prepareModelFromCacheInternal(const std::string& cacheDir,
+VersionedIDevice::prepareModelFromCacheInternal(Priority priority,
+ const OptionalTimePoint& deadline,
+ const std::string& cacheDir,
const CacheToken& token) const {
// Note that some work within VersionedIDevice will be subtracted from the IPC layer
NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModelFromCache");
VLOG(COMPILATION) << "prepareModelFromCache";
+ const std::pair<int, std::shared_ptr<VersionedIPreparedModel>> kDeadObject = {
+ ANEURALNETWORKS_DEAD_OBJECT, nullptr};
// Get cache files if they exist, otherwise return from the function early.
hidl_vec<hidl_handle> modelCache, dataCache;
@@ -1226,11 +1357,16 @@
const sp<PreparedModelCallback> callback = new PreparedModelCallback();
const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
__FUNCTION__,
- [&modelCache, &dataCache, &token, &callback](const sp<V1_3::IDevice>& device) {
- return device->prepareModelFromCache_1_3(kDefaultPriority, {}, modelCache,
+ [priority, &deadline, &modelCache, &dataCache, &token,
+ &callback](const sp<V1_3::IDevice>& device) {
+ return device->prepareModelFromCache_1_3(priority, deadline, modelCache,
dataCache, token, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModelFromCache_1_3 failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModelFromCache_1_3 failure: " << ret.description();
return prepareModelFailure();
@@ -1252,6 +1388,10 @@
return device->prepareModelFromCache(modelCache, dataCache, token, callback);
},
callback);
+ if (ret.isDeadObject()) {
+ LOG(ERROR) << "prepareModelFromCache failure: " << ret.description();
+ return kDeadObject;
+ }
if (!ret.isOk()) {
LOG(ERROR) << "prepareModelFromCache failure: " << ret.description();
return prepareModelFailure();
@@ -1276,11 +1416,13 @@
}
std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModel(
- const ModelFactory& makeModel, ExecutionPreference preference, const std::string& cacheDir,
+ const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
+ const OptionalTimePoint& deadline, const std::string& cacheDir,
const std::optional<CacheToken>& maybeToken) const {
// Attempt to compile from cache if token is present.
if (maybeToken.has_value()) {
- const auto [n, preparedModel] = prepareModelFromCacheInternal(cacheDir, *maybeToken);
+ const auto [n, preparedModel] =
+ prepareModelFromCacheInternal(priority, deadline, cacheDir, *maybeToken);
if (n == ANEURALNETWORKS_NO_ERROR) {
return {n, preparedModel};
}
@@ -1289,7 +1431,7 @@
// Fallback to full compilation (possibly with token) if
// prepareModelFromCache could not be used or failed.
const Model model = makeModel();
- return prepareModelInternal(model, preference, cacheDir, maybeToken);
+ return prepareModelInternal(model, preference, priority, deadline, cacheDir, maybeToken);
}
DeviceStatus VersionedIDevice::getStatus() const {
@@ -1339,6 +1481,10 @@
return kNumberOfCacheFilesNeeded;
}
+std::pair<bool, bool> VersionedIDevice::supportsDeadlines() const {
+ return kSupportsDeadlines;
+}
+
const std::string& VersionedIDevice::getName() const {
return kServiceName;
}
diff --git a/runtime/VersionedInterfaces.h b/runtime/VersionedInterfaces.h
index b903fd8..5b72722 100644
--- a/runtime/VersionedInterfaces.h
+++ b/runtime/VersionedInterfaces.h
@@ -99,7 +99,7 @@
std::vector<hal::Extension> supportedExtensions, int32_t type,
std::string versionString,
std::pair<uint32_t, uint32_t> numberOfCacheFilesNeeded,
- std::string serviceName, Core core);
+ std::pair<bool, bool> supportsDeadlines, std::string serviceName, Core core);
/**
* Gets the capabilities of a driver.
@@ -191,6 +191,10 @@
* execution.
* @param preference Indicates the intended execution behavior of a prepared
* model.
+ * @param priority Priority of the prepared model relative to other prepared
+ * models owned by an application.
+ * @param deadline Optional time point. If provided, prepareModel must
+ * complete or be aborted by this time point.
* @param cacheDir String specifying the cache directory.
* @param maybeToken An optional caching token of length
* Constant::BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared model.
@@ -212,8 +216,9 @@
* that has been prepared for execution, else nullptr.
*/
std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModel(
- const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
- const std::string& cacheDir, const std::optional<hal::CacheToken>& maybeToken) const;
+ const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, hal::Priority,
+ const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+ const std::optional<hal::CacheToken>& maybeToken) const;
/**
* Returns the current status of a driver.
@@ -310,6 +315,15 @@
std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const;
/**
+ * Returns which task deadlines are supported.
+ *
+ * @return A pair of:
+ * - prepareModelDeadline is supported
+ * - executionDeadline is supported
+ */
+ std::pair<bool, bool> supportsDeadlines() const;
+
+ /**
* Returns the name of the service.
*
* @return Name of the service.
@@ -374,6 +388,14 @@
const hal::hidl_vec<hal::BufferRole>& inputRoles,
const hal::hidl_vec<hal::BufferRole>& outputRoles) const;
+ /**
+ * Blocks until the device is not in a bad state.
+ *
+ * @return Error code after waiting. ANEURALNETWORKS_NO_ERROR if device is
+ * not in a bad state.
+ */
+ int wait() const;
+
private:
// Cached initialization results.
const hal::Capabilities kCapabilities;
@@ -381,12 +403,15 @@
const int32_t kType;
const std::string kVersionString;
const std::pair<uint32_t, uint32_t> kNumberOfCacheFilesNeeded;
+ const std::pair<bool, bool> kSupportsDeadlines;
// internal methods to prepare a model
std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelInternal(
- const hal::Model& model, hal::ExecutionPreference preference,
- const std::string& cacheDir, const std::optional<hal::CacheToken>& maybeToken) const;
+ const hal::Model& model, hal::ExecutionPreference preference, hal::Priority priority,
+ const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+ const std::optional<hal::CacheToken>& maybeToken) const;
std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelFromCacheInternal(
+ hal::Priority priority, const hal::OptionalTimePoint& deadline,
const std::string& cacheDir, const hal::CacheToken& token) const;
/**
@@ -618,6 +643,8 @@
* model is to be executed.
* @param measure Specifies whether or not to measure duration of the
* execution.
+ * @param deadline Optional time point. If provided, prepareModel must
+ * complete or be aborted by this time point.
* @param preferSynchronous 'true' to perform synchronous HAL execution when
* possible, 'false' to force asynchronous HAL execution.
* @return A tuple consisting of:
@@ -649,7 +676,8 @@
* indicating that measurement is not available.
*/
std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute(
- const hal::Request& request, hal::MeasureTiming measure, bool preferSynchronous) const;
+ const hal::Request& request, hal::MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline, bool preferSynchronous) const;
/**
* Creates a burst controller on a prepared model.
@@ -669,9 +697,11 @@
friend class VersionedIDevice;
std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
- const hal::Request& request, hal::MeasureTiming timing) const;
+ const hal::Request& request, hal::MeasureTiming timing,
+ const hal::OptionalTimePoint& deadline) const;
std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeSynchronously(
- const hal::Request& request, hal::MeasureTiming measure) const;
+ const hal::Request& request, hal::MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline) const;
/**
* Returns sp<V1_3::IPreparedModel> that is a downcast of the sp<V1_0::IPreparedModel>
diff --git a/runtime/test/TestIntrospectionControl.cpp b/runtime/test/TestIntrospectionControl.cpp
index b567f02..00537c3 100644
--- a/runtime/test/TestIntrospectionControl.cpp
+++ b/runtime/test/TestIntrospectionControl.cpp
@@ -312,7 +312,8 @@
class TestPreparedModelLatest : public SamplePreparedModel {
public:
TestPreparedModelLatest(const HidlModel& model, const SampleDriver* driver, Success success)
- : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER),
+ : SamplePreparedModel(model, driver, ExecutionPreference::FAST_SINGLE_ANSWER, uid_t{},
+ kDefaultPriority),
mSuccess(success) {}
Return<V1_0::ErrorStatus> execute(const V1_0::Request&,
diff --git a/runtime/test/TestPartitioning.cpp b/runtime/test/TestPartitioning.cpp
index 92ebb55..bc52103 100644
--- a/runtime/test/TestPartitioning.cpp
+++ b/runtime/test/TestPartitioning.cpp
@@ -156,7 +156,8 @@
template <typename T>
using MQDescriptorSync = ::android::hardware::MQDescriptorSync<T>;
-const Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
+constexpr Timing kBadTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
+constexpr int32_t kDefaultRuntimePriority = ANEURALNETWORKS_PRIORITY_DEFAULT;
Capabilities makeCapabilities(float perf) {
PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf};
@@ -652,9 +653,11 @@
// Run the partitioning algorithm to create an ExecutionPlan.
int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
- ExecutePreference preference, ExecutionPlan* plan) {
+ ExecutePreference preference, int32_t priority,
+ const OptionalTimePoint& deadline, ExecutionPlan* plan) {
return reinterpret_cast<ModelBuilder*>(getHandle())
- ->partitionTheWork(devices, static_cast<uint32_t>(preference), plan);
+ ->partitionTheWork(devices, static_cast<uint32_t>(preference), priority, deadline,
+ plan);
}
#ifdef VERBOSE
@@ -1264,7 +1267,8 @@
// didn't actually do any partitioning.
const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}});
ExecutionPlan planA;
- ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, &planA),
+ ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &planA),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
@@ -1275,7 +1279,8 @@
// didn't actually do any partitioning.
const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}});
ExecutionPlan planC;
- ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER, &planC),
+ ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &planC),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice());
@@ -1286,7 +1291,8 @@
// correct (model and step model)x(inputs and outputs).
const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}});
ExecutionPlan planB;
- ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, &planB),
+ ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &planB),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
const auto& stepsB = planB.forTest_compoundGetSteps();
@@ -1352,7 +1358,8 @@
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
{"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}});
ExecutionPlan planA;
- ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, &planA),
+ ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &planA),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr);
@@ -1364,7 +1371,8 @@
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
{"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
ExecutionPlan planB;
- ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, &planB),
+ ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &planB),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
const auto& stepsB = planB.forTest_compoundGetSteps();
@@ -1452,7 +1460,8 @@
{"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U},
{"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}});
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr);
@@ -1490,7 +1499,8 @@
ASSERT_TRUE(model.isValid());
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
const auto& steps = plan.forTest_compoundGetSteps();
@@ -1635,7 +1645,8 @@
// correct (model and step model)x(inputs and outputs).
const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}});
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
const auto& steps = plan.forTest_compoundGetSteps();
@@ -1735,7 +1746,8 @@
// No need to compare the original model to the model from the plan -- we
// didn't actually do any partitioning.
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice);
@@ -1787,7 +1799,8 @@
// No need to compare the original model to the model from the plan -- we
// didn't actually do any partitioning.
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good");
@@ -1805,7 +1818,8 @@
// No need to compare the original model to the model from the plan -- we
// didn't actually do any partitioning.
ExecutionPlan plan;
- ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, &plan),
+ ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER,
+ kDefaultRuntimePriority, {}, &plan),
ANEURALNETWORKS_NO_ERROR);
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base");