| /* |
| * Copyright (C) 2019 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "ExecutionBurstController" |
| |
| #include "ExecutionBurstController.h" |
| #include "ExecutionBurstUtils.h" |
| |
| #include <android-base/logging.h> |
| #include <android-base/thread_annotations.h> |
| #include <nnapi/IBurst.h> |
| #include <nnapi/IPreparedModel.h> |
| #include <nnapi/Result.h> |
| #include <nnapi/TypeUtils.h> |
| #include <nnapi/Types.h> |
| #include <nnapi/Validation.h> |
| #include <nnapi/hal/1.0/Conversions.h> |
| #include <nnapi/hal/1.0/HandleError.h> |
| #include <nnapi/hal/1.0/ProtectCallback.h> |
| #include <nnapi/hal/CommonUtils.h> |
| #include <nnapi/hal/TransferValue.h> |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <limits> |
| #include <memory> |
| #include <string> |
| #include <thread> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| #include "Callbacks.h" |
| #include "Conversions.h" |
| #include "Tracing.h" |
| #include "Utils.h" |
| |
| namespace android::hardware::neuralnetworks::V1_2::utils { |
| namespace { |
| |
| class BurstExecution final : public nn::IExecution, |
| public std::enable_shared_from_this<BurstExecution> { |
| struct PrivateConstructorTag {}; |
| |
| public: |
| static nn::GeneralResult<std::shared_ptr<const BurstExecution>> create( |
| std::shared_ptr<const ExecutionBurstController> controller, |
| std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation, |
| std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds); |
| |
| BurstExecution(PrivateConstructorTag tag, |
| std::shared_ptr<const ExecutionBurstController> controller, |
| std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation, |
| std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds); |
| |
| nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> compute( |
| const nn::OptionalTimePoint& deadline) const override; |
| |
| nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> computeFenced( |
| const std::vector<nn::SyncFence>& waitFor, const nn::OptionalTimePoint& deadline, |
| const nn::OptionalDuration& timeoutDurationAfterFence) const override; |
| |
| private: |
| const std::shared_ptr<const ExecutionBurstController> kController; |
| const std::vector<FmqRequestDatum> kRequest; |
| const hal::utils::RequestRelocation kRelocation; |
| const std::vector<ExecutionBurstController::OptionalCacheHold> kCacheHolds; |
| }; |
| |
| nn::GeneralResult<sp<IBurstContext>> executionBurstResultCallback( |
| V1_0::ErrorStatus status, const sp<IBurstContext>& burstContext) { |
| HANDLE_STATUS_HIDL(status) << "IPreparedModel::configureExecutionBurst failed with status " |
| << toString(status); |
| if (burstContext == nullptr) { |
| return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) |
| << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; |
| } |
| return burstContext; |
| } |
| |
| nn::GeneralResult<hidl_vec<hidl_memory>> getMemoriesHelper( |
| const hidl_vec<int32_t>& slots, |
| const std::shared_ptr<ExecutionBurstController::MemoryCache>& memoryCache) { |
| hidl_vec<hidl_memory> memories(slots.size()); |
| for (size_t i = 0; i < slots.size(); ++i) { |
| const int32_t slot = slots[i]; |
| const auto memory = NN_TRY(memoryCache->getMemory(slot)); |
| memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory)); |
| if (!memories[i].valid()) { |
| return NN_ERROR() << "memory at slot " << slot << " is invalid"; |
| } |
| } |
| return memories; |
| } |
| |
| } // namespace |
| |
| // MemoryCache methods |
| |
| ExecutionBurstController::MemoryCache::MemoryCache() { |
| constexpr size_t kPreallocatedCount = 1024; |
| std::vector<int32_t> freeSlotsSpace; |
| freeSlotsSpace.reserve(kPreallocatedCount); |
| mFreeSlots = std::stack<int32_t, std::vector<int32_t>>(std::move(freeSlotsSpace)); |
| mMemoryCache.reserve(kPreallocatedCount); |
| mCacheCleaner.reserve(kPreallocatedCount); |
| } |
| |
| void ExecutionBurstController::MemoryCache::setBurstContext(sp<IBurstContext> burstContext) { |
| std::lock_guard guard(mMutex); |
| mBurstContext = std::move(burstContext); |
| } |
| |
| std::pair<int32_t, ExecutionBurstController::MemoryCache::SharedCleanup> |
| ExecutionBurstController::MemoryCache::cacheMemory(const nn::SharedMemory& memory) { |
| std::unique_lock lock(mMutex); |
| base::ScopedLockAssertion lockAssert(mMutex); |
| |
| // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is |
| // not currently being freed. |
| auto iter = mMemoryIdToSlot.find(memory); |
| while (iter != mMemoryIdToSlot.end()) { |
| const int32_t slot = iter->second; |
| if (auto cleaner = mCacheCleaner.at(slot).lock()) { |
| return std::make_pair(slot, std::move(cleaner)); |
| } |
| |
| // If the code reaches this point, the Memory object was in the cache, but is currently |
| // being destroyed. This code waits until the cache entry has been freed, then loops to |
| // ensure the cache entry has been freed or has been made present by another thread. |
| mCond.wait(lock); |
| iter = mMemoryIdToSlot.find(memory); |
| } |
| |
| // Allocate a new cache entry. |
| const int32_t slot = allocateSlotLocked(); |
| mMemoryIdToSlot[memory] = slot; |
| mMemoryCache[slot] = memory; |
| |
| // Create reference-counted self-cleaning cache object. |
| auto self = weak_from_this(); |
| Task cleanup = [memory, memoryCache = std::move(self)] { |
| if (const auto lock = memoryCache.lock()) { |
| lock->freeMemory(memory); |
| } |
| }; |
| auto cleaner = std::make_shared<const Cleanup>(std::move(cleanup)); |
| mCacheCleaner[slot] = cleaner; |
| |
| return std::make_pair(slot, std::move(cleaner)); |
| } |
| |
| nn::GeneralResult<nn::SharedMemory> ExecutionBurstController::MemoryCache::getMemory(int32_t slot) { |
| std::lock_guard guard(mMutex); |
| if (slot < 0 || static_cast<size_t>(slot) >= mMemoryCache.size()) { |
| return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size(); |
| } |
| return mMemoryCache[slot]; |
| } |
| |
| void ExecutionBurstController::MemoryCache::freeMemory(const nn::SharedMemory& memory) { |
| { |
| std::lock_guard guard(mMutex); |
| const int32_t slot = mMemoryIdToSlot.at(memory); |
| if (mBurstContext) { |
| const auto ret = mBurstContext->freeMemory(slot); |
| if (!ret.isOk()) { |
| LOG(ERROR) << "IBustContext::freeMemory failed: " << ret.description(); |
| } |
| } |
| mMemoryIdToSlot.erase(memory); |
| mMemoryCache[slot] = {}; |
| mCacheCleaner[slot].reset(); |
| mFreeSlots.push(slot); |
| } |
| mCond.notify_all(); |
| } |
| |
| int32_t ExecutionBurstController::MemoryCache::allocateSlotLocked() { |
| constexpr size_t kMaxNumberOfSlots = std::numeric_limits<int32_t>::max(); |
| |
| // If there is a free slot, use it. |
| if (!mFreeSlots.empty()) { |
| const int32_t slot = mFreeSlots.top(); |
| mFreeSlots.pop(); |
| return slot; |
| } |
| |
| // Use a slot for the first time. |
| CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; |
| const int32_t slot = static_cast<int32_t>(mMemoryCache.size()); |
| mMemoryCache.emplace_back(); |
| mCacheCleaner.emplace_back(); |
| |
| return slot; |
| } |
| |
| // ExecutionBurstCallback methods |
| |
| ExecutionBurstController::ExecutionBurstCallback::ExecutionBurstCallback( |
| const std::shared_ptr<MemoryCache>& memoryCache) |
| : kMemoryCache(memoryCache) { |
| CHECK(memoryCache != nullptr); |
| } |
| |
| Return<void> ExecutionBurstController::ExecutionBurstCallback::getMemories( |
| const hidl_vec<int32_t>& slots, getMemories_cb cb) { |
| const auto memoryCache = kMemoryCache.lock(); |
| if (memoryCache == nullptr) { |
| LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories called after " |
| "the MemoryCache has been freed"; |
| cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}); |
| return Void(); |
| } |
| |
| const auto maybeMemories = getMemoriesHelper(slots, memoryCache); |
| if (!maybeMemories.has_value()) { |
| const auto& [message, code] = maybeMemories.error(); |
| LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories failed with " |
| << code << ": " << message; |
| cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); |
| return Void(); |
| } |
| |
| cb(V1_0::ErrorStatus::NONE, maybeMemories.value()); |
| return Void(); |
| } |
| |
| // ExecutionBurstController methods |
| |
| nn::GeneralResult<std::shared_ptr<const ExecutionBurstController>> ExecutionBurstController::create( |
| nn::SharedPreparedModel preparedModel, const sp<V1_2::IPreparedModel>& hidlPreparedModel, |
| std::chrono::microseconds pollingTimeWindow) { |
| // check inputs |
| if (preparedModel == nullptr || hidlPreparedModel == nullptr) { |
| return NN_ERROR() << "ExecutionBurstController::create passed a nullptr"; |
| } |
| |
| // create FMQ objects |
| auto [requestChannelSender, requestChannelDescriptor] = |
| NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength)); |
| auto [resultChannelReceiver, resultChannelDescriptor] = |
| NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow)); |
| |
| // check FMQ objects |
| CHECK(requestChannelSender != nullptr); |
| CHECK(requestChannelDescriptor != nullptr); |
| CHECK(resultChannelReceiver != nullptr); |
| CHECK(resultChannelDescriptor != nullptr); |
| |
| // create memory cache |
| auto memoryCache = std::make_shared<MemoryCache>(); |
| |
| // create callback object |
| auto burstCallback = sp<ExecutionBurstCallback>::make(memoryCache); |
| auto cb = hal::utils::CallbackValue(executionBurstResultCallback); |
| |
| // configure burst |
| const Return<void> ret = hidlPreparedModel->configureExecutionBurst( |
| burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb); |
| HANDLE_TRANSPORT_FAILURE(ret); |
| |
| auto burstContext = NN_TRY(cb.take()); |
| memoryCache->setBurstContext(burstContext); |
| |
| // create death handler object |
| auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext)); |
| deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get()); |
| deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get()); |
| |
| // make and return controller |
| return std::make_shared<const ExecutionBurstController>( |
| PrivateConstructorTag{}, std::move(preparedModel), std::move(requestChannelSender), |
| std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext), |
| std::move(memoryCache), std::move(deathHandler)); |
| } |
| |
| ExecutionBurstController::ExecutionBurstController( |
| PrivateConstructorTag /*tag*/, nn::SharedPreparedModel preparedModel, |
| std::unique_ptr<RequestChannelSender> requestChannelSender, |
| std::unique_ptr<ResultChannelReceiver> resultChannelReceiver, |
| sp<ExecutionBurstCallback> callback, sp<IBurstContext> burstContext, |
| std::shared_ptr<MemoryCache> memoryCache, neuralnetworks::utils::DeathHandler deathHandler) |
| : kPreparedModel(std::move(preparedModel)), |
| mRequestChannelSender(std::move(requestChannelSender)), |
| mResultChannelReceiver(std::move(resultChannelReceiver)), |
| mBurstCallback(std::move(callback)), |
| mBurstContext(std::move(burstContext)), |
| mMemoryCache(std::move(memoryCache)), |
| kDeathHandler(std::move(deathHandler)) {} |
| |
| ExecutionBurstController::OptionalCacheHold ExecutionBurstController::cacheMemory( |
| const nn::SharedMemory& memory) const { |
| auto [slot, hold] = mMemoryCache->cacheMemory(memory); |
| return hold; |
| } |
| |
| nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> |
| ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming measure, |
| const nn::OptionalTimePoint& deadline, |
| const nn::OptionalDuration& loopTimeoutDuration) const { |
| // This is the first point when we know an execution is occurring, so begin to collect |
| // systraces. Note that the first point we can begin collecting systraces in |
| // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so |
| // ExecutionBurstServer collects systraces at different points in the code. |
| NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute"); |
| |
| // if the request is valid but of a higher version than what's supported in burst execution, |
| // fall back to another execution path |
| if (const auto version = NN_TRY(nn::validate(request)); version > nn::Version::ANDROID_Q) { |
| // fallback to another execution path if the packet could not be sent |
| return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration); |
| } |
| |
| // ensure that request is ready for IPC |
| std::optional<nn::Request> maybeRequestInShared; |
| hal::utils::RequestRelocation relocation; |
| const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared( |
| &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding, |
| &maybeRequestInShared, &relocation)); |
| |
| // clear pools field of request, as they will be provided via slots |
| const auto requestWithoutPools = nn::Request{ |
| .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}}; |
| auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools)); |
| const auto hidlMeasure = NN_TRY(convert(measure)); |
| |
| std::vector<int32_t> slots; |
| std::vector<OptionalCacheHold> holds; |
| slots.reserve(requestInShared.pools.size()); |
| holds.reserve(requestInShared.pools.size()); |
| for (const auto& memoryPool : requestInShared.pools) { |
| auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool)); |
| slots.push_back(slot); |
| holds.push_back(std::move(hold)); |
| } |
| |
| // send request packet |
| const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots); |
| const auto fallback = [this, &request, measure, &deadline, &loopTimeoutDuration] { |
| return kPreparedModel->execute(request, measure, deadline, loopTimeoutDuration); |
| }; |
| return executeInternal(requestPacket, relocation, fallback); |
| } |
| |
| // See IBurst::createReusableExecution for information on this method. |
| nn::GeneralResult<nn::SharedExecution> ExecutionBurstController::createReusableExecution( |
| const nn::Request& request, nn::MeasureTiming measure, |
| const nn::OptionalDuration& loopTimeoutDuration) const { |
| NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::createReusableExecution"); |
| |
| // if the request is valid but of a higher version than what's supported in burst execution, |
| // fall back to another execution path |
| if (const auto version = NN_TRY(nn::validate(request)); version > nn::Version::ANDROID_Q) { |
| // fallback to another execution path if the packet could not be sent |
| return kPreparedModel->createReusableExecution(request, measure, loopTimeoutDuration); |
| } |
| |
| // ensure that request is ready for IPC |
| std::optional<nn::Request> maybeRequestInShared; |
| hal::utils::RequestRelocation relocation; |
| const nn::Request& requestInShared = NN_TRY(hal::utils::convertRequestFromPointerToShared( |
| &request, nn::kDefaultRequestMemoryAlignment, nn::kMinMemoryPadding, |
| &maybeRequestInShared, &relocation)); |
| |
| // clear pools field of request, as they will be provided via slots |
| const auto requestWithoutPools = nn::Request{ |
| .inputs = requestInShared.inputs, .outputs = requestInShared.outputs, .pools = {}}; |
| auto hidlRequest = NN_TRY(V1_0::utils::unvalidatedConvert(requestWithoutPools)); |
| const auto hidlMeasure = NN_TRY(convert(measure)); |
| |
| std::vector<int32_t> slots; |
| std::vector<OptionalCacheHold> holds; |
| slots.reserve(requestInShared.pools.size()); |
| holds.reserve(requestInShared.pools.size()); |
| for (const auto& memoryPool : requestInShared.pools) { |
| auto [slot, hold] = mMemoryCache->cacheMemory(std::get<nn::SharedMemory>(memoryPool)); |
| slots.push_back(slot); |
| holds.push_back(std::move(hold)); |
| } |
| |
| const auto requestPacket = serialize(hidlRequest, hidlMeasure, slots); |
| return BurstExecution::create(shared_from_this(), std::move(requestPacket), |
| std::move(relocation), std::move(holds)); |
| } |
| |
| nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> |
| ExecutionBurstController::executeInternal(const std::vector<FmqRequestDatum>& requestPacket, |
| const hal::utils::RequestRelocation& relocation, |
| FallbackFunction fallback) const { |
| NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, |
| "ExecutionBurstController::executeInternal"); |
| |
| // Ensure that at most one execution is in flight at any given time. |
| const bool alreadyInFlight = mExecutionInFlight.test_and_set(); |
| if (alreadyInFlight) { |
| return NN_ERROR() << "IBurst already has an execution in flight"; |
| } |
| const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); }); |
| |
| if (relocation.input) { |
| relocation.input->flush(); |
| } |
| |
| // send request packet |
| const auto sendStatus = mRequestChannelSender->sendPacket(requestPacket); |
| if (!sendStatus.ok()) { |
| // fallback to another execution path if the packet could not be sent |
| if (fallback) { |
| return fallback(); |
| } |
| return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error(); |
| } |
| |
| // get result packet |
| const auto [status, outputShapes, timing] = NN_TRY(mResultChannelReceiver->getBlocking()); |
| |
| if (relocation.output) { |
| relocation.output->flush(); |
| } |
| return executionCallback(status, outputShapes, timing); |
| } |
| |
| nn::GeneralResult<std::shared_ptr<const BurstExecution>> BurstExecution::create( |
| std::shared_ptr<const ExecutionBurstController> controller, |
| std::vector<FmqRequestDatum> request, hal::utils::RequestRelocation relocation, |
| std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds) { |
| if (controller == nullptr) { |
| return NN_ERROR() << "V1_2::utils::BurstExecution::create must have non-null controller"; |
| } |
| |
| return std::make_shared<const BurstExecution>(PrivateConstructorTag{}, std::move(controller), |
| std::move(request), std::move(relocation), |
| std::move(cacheHolds)); |
| } |
| |
| BurstExecution::BurstExecution(PrivateConstructorTag /*tag*/, |
| std::shared_ptr<const ExecutionBurstController> controller, |
| std::vector<FmqRequestDatum> request, |
| hal::utils::RequestRelocation relocation, |
| std::vector<ExecutionBurstController::OptionalCacheHold> cacheHolds) |
| : kController(std::move(controller)), |
| kRequest(std::move(request)), |
| kRelocation(std::move(relocation)), |
| kCacheHolds(std::move(cacheHolds)) {} |
| |
| nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> BurstExecution::compute( |
| const nn::OptionalTimePoint& /*deadline*/) const { |
| return kController->executeInternal(kRequest, kRelocation, /*fallback=*/nullptr); |
| } |
| |
| nn::GeneralResult<std::pair<nn::SyncFence, nn::ExecuteFencedInfoCallback>> |
| BurstExecution::computeFenced(const std::vector<nn::SyncFence>& /*waitFor*/, |
| const nn::OptionalTimePoint& /*deadline*/, |
| const nn::OptionalDuration& /*timeoutDurationAfterFence*/) const { |
| return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) |
| << "IExecution::computeFenced is not supported on burst object"; |
| } |
| |
| } // namespace android::hardware::neuralnetworks::V1_2::utils |