| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "ExecutionPlan" |
| |
| #include "ExecutionPlan.h" |
| |
| #include <ControlFlow.h> |
| #include <CpuExecutor.h> |
| #include <GraphDump.h> |
| #include <LegacyUtils.h> |
| #include <MetaModel.h> |
| #include <OperationsUtils.h> |
| #include <TokenHasher.h> |
| #include <Tracing.h> |
| #include <android-base/logging.h> |
| #include <fcntl.h> |
| #include <nnapi/IBurst.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| |
| #include <algorithm> |
| #include <functional> |
| #include <map> |
| #include <memory> |
| #include <mutex> |
| #include <queue> |
| #include <set> |
| #include <string> |
| #include <type_traits> |
| #include <unordered_set> |
| #include <utility> |
| #include <vector> |
| |
| #include "BurstBuilder.h" |
| #include "CompilationBuilder.h" |
| #include "ExecutionBuilder.h" |
| #include "ExecutionCallback.h" |
| #include "Manager.h" |
| #include "ModelBuilder.h" |
| #include "TypeManager.h" |
| |
| namespace android { |
| namespace nn { |
| |
| namespace { |
| |
| // The index of the main model in SourceModels. |
| constexpr uint32_t kMainModelInSourceModels = 0; |
| |
| constexpr uint32_t kNoPadding = 1; |
| |
| static bool updateTokenFromMetaData(TokenHasher* token, |
| const std::vector<TokenValuePair>& metaData) { |
| // Combines the TokenValuePair and corresponding extension name. |
| std::vector<std::tuple<const char*, uint16_t, const uint8_t*, size_t>> metaDataWithExtension; |
| for (auto p : metaData) { |
| uint16_t prefix = static_cast<uint32_t>(p.token) >> kExtensionTypeBits; |
| uint16_t extensionEnum = static_cast<uint32_t>(p.token) & kTypeWithinExtensionMask; |
| const Extension* extension; |
| if (!TypeManager::get()->getExtensionInfo(prefix, &extension)) { |
| LOG(ERROR) << "Prefix " << prefix << " could not be found"; |
| return false; |
| } |
| metaDataWithExtension.push_back(std::make_tuple(extension->name.c_str(), extensionEnum, |
| p.value.data(), p.value.size())); |
| } |
| // Sort with extension name and extension enum. |
| std::sort(metaDataWithExtension.begin(), metaDataWithExtension.end(), |
| [](const auto& a, const auto& b) { |
| if (int r = strcmp(std::get<0>(a), std::get<0>(b))) { |
| return r < 0; |
| } else { |
| return std::get<1>(a) < std::get<1>(b); |
| } |
| }); |
| // Update the cache token with the sorted array. |
| for (auto [extensionName, extensionEnum, value, valueSize] : metaDataWithExtension) { |
| if (!token->updateFromString(extensionName) || |
| !token->update(&extensionEnum, sizeof(uint16_t)) || !token->update(value, valueSize)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| // Compiles the model on device. |
| // If compilation caching is available, depending on ExecutionPlan::mState, the token may only have |
| // been initialized by the user provided token (SIMPLE body), or is already re-hashed by the |
| // operation indices to be executed (COMPOUND body). The token will be re-hashed further by the |
| // device name, device version string, and the execution preference in this function. |
| int compile(const Device& device, const ModelBuilder& model, int executionPreference, |
| int compilationPriority, const OptionalTimePoint& deadline, const CacheInfo& cacheInfo, |
| TokenHasher* token, const std::vector<TokenValuePair>& metaData, |
| std::shared_ptr<RuntimePreparedModel>* preparedModel) { |
| CHECK(token != nullptr); |
| CHECK(preparedModel != nullptr); |
| *preparedModel = nullptr; |
| |
| std::optional<CacheToken> cacheToken; |
| if (device.isCachingSupported() && token->ok() && |
| token->updateFromString(device.getName().c_str()) && |
| token->updateFromString(device.getVersionString().c_str()) && |
| token->update(&executionPreference, sizeof(executionPreference)) && |
| token->update(&compilationPriority, sizeof(compilationPriority)) && |
| updateTokenFromMetaData(token, metaData) && token->finish()) { |
| cacheToken = CacheToken{}; |
| const uint8_t* tokenPtr = token->getCacheToken(); |
| std::copy(tokenPtr, tokenPtr + cacheToken->size(), cacheToken->begin()); |
| } |
| |
| const ModelFactory makeModel = [&model] { return model.makeModel(); }; |
| const ExecutionPreference preference = static_cast<ExecutionPreference>(executionPreference); |
| const Priority priority = convertToCanonicalPriority(compilationPriority); |
| std::vector<ExtensionNameAndPrefix> extensionNameAndPrefix = |
| TypeManager::get()->getExtensionNameAndPrefix(metaData); |
| const auto [n, returnedPreparedModel] = |
| device.prepareModel(makeModel, preference, priority, deadline, cacheInfo, cacheToken, |
| metaData, extensionNameAndPrefix); |
| *preparedModel = returnedPreparedModel; |
| return n; |
| } |
| |
| typedef std::function<void(uint32_t)> OperationReadyCallback; |
| |
| int copyOperandExtraParams(ModelBuilder& model, uint32_t toOperandIndex, |
| const Operand& fromOperand) { |
| if (fromOperand.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL && |
| std::holds_alternative<Operand::SymmPerChannelQuantParams>(fromOperand.extraParams)) { |
| auto& fromChannelQuant = |
| std::get<Operand::SymmPerChannelQuantParams>(fromOperand.extraParams); |
| ANeuralNetworksSymmPerChannelQuantParams toChannelQuant = { |
| .channelDim = fromChannelQuant.channelDim, |
| .scaleCount = static_cast<uint32_t>(fromChannelQuant.scales.size()), |
| .scales = fromChannelQuant.scales.data(), |
| }; |
| return model.setOperandSymmPerChannelQuantParams(toOperandIndex, toChannelQuant); |
| } else if (isExtension(fromOperand.type) && |
| std::holds_alternative<Operand::ExtensionParams>(fromOperand.extraParams)) { |
| auto extensionData = std::get<Operand::ExtensionParams>(fromOperand.extraParams); |
| return model.setOperandExtensionData(toOperandIndex, extensionData.data(), |
| extensionData.size()); |
| } else if (!std::holds_alternative<Operand::NoParams>(fromOperand.extraParams) || |
| fromOperand.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) { |
| LOG(ERROR) << "Type " << fromOperand.type |
| << " has an unexpected extraParams variant: " << fromOperand.extraParams.index(); |
| return ANEURALNETWORKS_BAD_DATA; |
| } else { |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| } |
| |
| // This class tracks whether we know the value of an operand as operations |
| // are processed. |
| class OperandTracker { |
| public: |
| // Creates the tracker for this model. Figure out which operations can be |
| // executed right away and cb for each one of them. |
| OperandTracker(const ModelBuilder* model, OperationReadyCallback cb); |
| // Mark the specified operation as having been processed. The output |
| // of the operation now being known, this may make new operations to be |
| // able to run. Call cb for each one of them. |
| void markProcessed(uint32_t operationIndex, OperationReadyCallback cb); |
| |
| private: |
| const ModelBuilder* mModel; |
| std::multimap<uint32_t, uint32_t> mOperandToOperations; |
| std::vector<uint32_t> mUnknownInputCount; // For each operation |
| }; |
| |
| OperandTracker::OperandTracker(const ModelBuilder* model, OperationReadyCallback cb) |
| : mModel(model) { |
| const auto& operations = mModel->getOperations(); |
| mUnknownInputCount.resize(operations.size()); |
| for (uint32_t operationIndex = 0; operationIndex < operations.size(); operationIndex++) { |
| const Operation& operation = operations[operationIndex]; |
| uint32_t count = 0; |
| for (uint32_t operandIndex : operation.inputs) { |
| auto lifetime = mModel->getOperand(operandIndex).lifetime; |
| if (lifetime == Operand::LifeTime::TEMPORARY_VARIABLE || |
| lifetime == Operand::LifeTime::SUBGRAPH_OUTPUT) { |
| count++; |
| mOperandToOperations.emplace(operandIndex, operationIndex); |
| } |
| } |
| if (count == 0) { |
| cb(operationIndex); |
| } |
| mUnknownInputCount[operationIndex] = count; |
| } |
| } |
| |
| void OperandTracker::markProcessed(uint32_t operationIndex, OperationReadyCallback cb) { |
| // Mark all its outputs as known. |
| const Operation& operation = mModel->getOperations()[operationIndex]; |
| for (uint32_t operandIndex : operation.outputs) { |
| auto range = mOperandToOperations.equal_range(operandIndex); |
| for (auto i = range.first; i != range.second; i++) { |
| uint32_t& count = mUnknownInputCount[i->second]; |
| if (--count == 0) { |
| cb(i->second); |
| } |
| } |
| } |
| } |
| |
| StaticTemporaryLocation addTemporary(uint32_t* totalSizeOfTemporaries, uint32_t size, |
| uint32_t alignment, uint32_t padding) { |
| // TODO: what about overflow? |
| *totalSizeOfTemporaries = roundUp(*totalSizeOfTemporaries, alignment); |
| const uint32_t offset = *totalSizeOfTemporaries; |
| size = roundUp(size, padding); |
| *totalSizeOfTemporaries += size; |
| return {.offset = offset, .paddedLength = size}; |
| }; |
| |
| std::string toString(SourceOperandIndex sourceOperandIndex) { |
| return "(" + std::to_string(sourceOperandIndex.first) + ", " + |
| std::to_string(sourceOperandIndex.second) + ")"; |
| }; |
| |
| // A helper class to analyze the step roles of all partition boundary operands. |
| // |
| // To use, call StepRoleAnalyzer::analyze and pass in a setup function that configures the analyzer |
| // with the following two methods: |
| // - addRole: Add a step role to a boundary operand |
| // - setUsedBy: Specify that the memory of the "source" operand may be directly used by the "dest" |
| // operand. All of the step roles of the "dest" operand are also possible step roles of the |
| // "source" operand. This is useful for interpreted control flow, e.g., the outer input operand |
| // of an interpreted IF operation may be directly used as all step roles of the corresponding |
| // input operand of the then and else models. Note that this relationship is directional -- |
| // (A->B && B->C) implies A->C, but (A->C && B->C) does not imply A->B or B->A (A->B is a |
| // shorthand for setUsedBy(A, B)). The setup function must guarantee that the final graph |
| // produced by the used-by relationship is acyclic. This is true for the partitioner algorithm |
| // because there must be a root operand of each step role for the memory to be allocated on |
| // behalf of. |
| // |
| class StepRoleAnalyzer { |
| public: |
| static std::map<SourceOperandIndex, std::set<StepRole>> analyze( |
| const std::function<void(StepRoleAnalyzer&)>& setup) { |
| StepRoleAnalyzer analyzer; |
| setup(analyzer); |
| return analyzer.finish(); |
| } |
| |
| void addRole(const ExecutionStep& step, uint32_t operandIndex, IOType type, |
| uint32_t stepIOIndex) { |
| SourceOperandIndex source = {step.getSourceModelIndex(), operandIndex}; |
| mRoles[source].emplace(step.getIndex(), type, stepIOIndex); |
| } |
| |
| void setUsedBy(const SourceOperandIndex& source, const SourceOperandIndex& dest) { |
| mUsedBy[source].emplace(dest); |
| } |
| |
| private: |
| StepRoleAnalyzer() = default; |
| |
| // Merges the step roles of the destination operands to the source operands |
| // and returns the final map. |
| std::map<SourceOperandIndex, std::set<StepRole>> finish() { |
| for (const auto& [source, _] : mUsedBy) { |
| finishHelper(source); |
| } |
| return std::move(mRoles); |
| } |
| |
| void finishHelper(SourceOperandIndex current) { |
| if (mProcessedOperands.count(current) > 0) return; |
| mProcessedOperands.insert(current); |
| const auto it = mUsedBy.find(current); |
| if (it != mUsedBy.end()) { |
| auto& roles = mRoles[current]; |
| // Merge the step roles of the destination operands. |
| for (const auto& dest : it->second) { |
| finishHelper(dest); |
| const auto& destRoles = mRoles[dest]; |
| roles.insert(destRoles.begin(), destRoles.end()); |
| } |
| } |
| } |
| |
| // A map from the source operand to its step roles. |
| std::map<SourceOperandIndex, std::set<StepRole>> mRoles; |
| // A map from the source operand to a set of destination operands that may directly |
| // use the memory of the source operand. |
| std::map<SourceOperandIndex, std::set<SourceOperandIndex>> mUsedBy; |
| // Used in finish to track which operand has been processed. |
| std::set<SourceOperandIndex> mProcessedOperands; |
| }; |
| |
| } // namespace |
| |
| void DynamicTemporaries::vlogDump(const char* context) const { |
| if (empty()) { |
| return; |
| } |
| if (context) { |
| VLOG(EXECUTION) << "DynamicTemporaries: \"" << context << "\""; |
| } |
| for (const auto& temp : mSourceOperandToTemporary) { |
| VLOG(EXECUTION) << "DynamicTemporaries: sourceOperandIndex = " << toString(temp.first) |
| << ", stepIndex = " << temp.second.stepIndex |
| << ", offset = " << temp.second.offset |
| << ", dimensions = " << toString(temp.second.dimensions) |
| << ", paddedLength = " << temp.second.paddedLength |
| << ", alignment = " << temp.second.alignment |
| << ", padding = " << temp.second.padding; |
| } |
| } |
| |
| void DynamicTemporaries::declare(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex, |
| const Dimensions& initialDimensions, uint32_t initialLength, |
| uint32_t alignment, uint32_t padding) { |
| VLOG(EXECUTION) << "DynamicTemporaries::declare(sourceOperandIndex = " |
| << toString(sourceOperandIndex) << ", stepIndex = " << stepIndex |
| << ", initialDimensions = " << toString(initialDimensions) |
| << ", initialLength = " << initialLength << ", alignment = " << alignment |
| << ", padding = " << padding << ")"; |
| CHECK(!mDeclared); |
| CHECK_GT(initialLength, 0u); |
| const uint32_t paddedLength = roundUp(initialLength, padding); |
| auto [_, isNew] = mSourceOperandToTemporary.emplace( |
| sourceOperandIndex, InternalLocationAndShape{stepIndex, 0, initialDimensions, |
| paddedLength, alignment, padding}); |
| CHECK(isNew); |
| mStepIndexToSourceOperandIndexes[stepIndex].emplace_back(sourceOperandIndex); |
| } |
| |
| bool DynamicTemporaries::redeclare(SourceOperandIndex sourceOperandIndex, |
| const Dimensions& newDimensions, uint32_t newLength) { |
| auto createAndLogResult = [sourceOperandIndex, &newDimensions, newLength](bool changedShape) { |
| VLOG(EXECUTION) << "DynamicTemporaries::redeclare(sourceOperandIndex = " |
| << toString(sourceOperandIndex) |
| << ", newDimensions = " << toString(newDimensions) |
| << ", newLength = " << newLength << ") -> " << toString(changedShape); |
| return changedShape; |
| }; |
| |
| CHECK(mDeclared); |
| CHECK_GT(newLength, 0u); |
| |
| InternalLocationAndShape& temp = mSourceOperandToTemporary.at(sourceOperandIndex); |
| const uint32_t paddedLength = roundUp(newLength, temp.padding); |
| if (temp.paddedLength == paddedLength && temp.dimensions == newDimensions) { |
| return createAndLogResult(false); |
| } |
| if (temp.paddedLength < paddedLength) { |
| // Otherwise allocation remains valid, even if it may be suboptimal |
| // (because it uses more space than needed). Use case: Don't force |
| // client to allocate again just because the client reported more |
| // accurate shape information. |
| mAllocatedStepIndexes.erase(temp.stepIndex); |
| } |
| temp.paddedLength = paddedLength; |
| temp.dimensions = newDimensions; |
| return createAndLogResult(true); |
| } |
| |
| int DynamicTemporaries::allocate(uint32_t stepIndex) { |
| VLOG(EXECUTION) << "DynamicTemporaries::allocate(stepIndex = " << stepIndex << ")"; |
| |
| CHECK(mDeclared); |
| |
| const auto sourceOperandIndexesI = mStepIndexToSourceOperandIndexes.find(stepIndex); |
| if (sourceOperandIndexesI == mStepIndexToSourceOperandIndexes.end()) { |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| // perform layout |
| uint32_t newSize = 0; |
| for (const auto& sourceOperandIndex : sourceOperandIndexesI->second) { |
| InternalLocationAndShape& temp = mSourceOperandToTemporary.at(sourceOperandIndex); |
| // temp.paddedLength is already padded in declare and redeclare. |
| CHECK(temp.paddedLength % temp.padding == 0); |
| temp.offset = addTemporary(&newSize, temp.paddedLength, temp.alignment, kNoPadding).offset; |
| } |
| |
| // perform (re-)allocation |
| // TODO: Today we may shrink the allocation in order to avoid wasting memory. Is this important |
| // to conserve memory, or do we waste time reallocating? |
| const double kWaste = 0.2 /* arbitrary */; // Willing to waste space to avoid |
| // deallocation/reallocation overhead |
| auto& memory = mStepIndexToMemory[stepIndex]; |
| const uint32_t oldSize = (memory ? memory->getSize() : 0); |
| if ((oldSize >= newSize) && (oldSize <= newSize * (1 + kWaste))) { |
| // Suitable allocation already exists; nothing to do |
| } else { |
| int n; |
| std::tie(n, memory) = MemoryAshmem::create(newSize); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "Failed to allocate dynamic temporaries of size " << newSize |
| << " for step " << stepIndex; |
| mAllocatedStepIndexes.erase(stepIndex); |
| return n; |
| } |
| } |
| |
| mAllocatedStepIndexes.insert(stepIndex); |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| bool DynamicTemporaries::allocated(uint32_t stepIndex) const { |
| return (mStepIndexToSourceOperandIndexes.find(stepIndex) == |
| mStepIndexToSourceOperandIndexes.end()) || |
| mAllocatedStepIndexes.count(stepIndex); |
| } |
| |
| std::optional<DynamicTemporaries::LocationAndShape> DynamicTemporaries::lookup( |
| SourceOperandIndex sourceOperandIndex, bool mustBeAllocated) const { |
| CHECK(mDeclared); |
| if (auto it = mSourceOperandToTemporary.find(sourceOperandIndex); |
| it != mSourceOperandToTemporary.end()) { |
| const InternalLocationAndShape& temp = it->second; |
| const bool isAllocated = allocated(temp.stepIndex); |
| if (mustBeAllocated) { |
| CHECK(isAllocated) << "Source operand " << toString(sourceOperandIndex) |
| << " must be allocated"; |
| } |
| if (isAllocated) { |
| return LocationAndShape{mStepIndexToMemory.at(temp.stepIndex).get(), temp.offset, |
| &temp.dimensions, temp.paddedLength}; |
| } else { |
| return LocationAndShape{nullptr, ~uint32_t(0), &temp.dimensions, temp.paddedLength}; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| ExecutionStep::ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, uint32_t sourceModelIndex, |
| std::shared_ptr<Device> device) |
| : mPlan(plan), |
| mIndex(stepIndex), |
| mSourceModelIndex(sourceModelIndex), |
| mStepModel(), |
| mDevice(device), |
| mToken(plan->getCacheToken()) {} |
| |
| // Adds an operand if it has not been added already. |
| // Sets the index in the step model for the corresponding operand. |
| int ExecutionStep::addOperand(uint32_t sourceOperandIndex, uint32_t* stepOperandIndex, |
| OperandKind kind) { |
| // Have we added this operand already? |
| auto i = mOperandMap.find(sourceOperandIndex); |
| if (i != mOperandMap.end()) { |
| CHECK(kind == INPUT); |
| *stepOperandIndex = i->second; |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| // First time we add this operand. |
| *stepOperandIndex = mStepModel.operandCount(); |
| mOperandMap.emplace(sourceOperandIndex, *stepOperandIndex); |
| |
| // Add the operand to the step model. |
| const ModelBuilder& sourceModel = *getSourceModel(); |
| const Operand& operand = sourceModel.getOperand(sourceOperandIndex); |
| ANeuralNetworksOperandType type = { |
| .type = static_cast<int32_t>(operand.type), |
| .dimensionCount = static_cast<uint32_t>(operand.dimensions.size()), |
| .dimensions = operand.dimensions.size() > 0 ? operand.dimensions.data() : nullptr, |
| .scale = operand.scale, |
| .zeroPoint = operand.zeroPoint, |
| }; |
| |
| int n = mStepModel.addOperand(type); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "Previous error occurred when partitioning the graph"; |
| return n; |
| } |
| |
| n = copyOperandExtraParams(mStepModel, *stepOperandIndex, operand); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "Error when copying extra parameters to the operand"; |
| return n; |
| } |
| |
| // Sets its value. |
| switch (operand.lifetime) { |
| case Operand::LifeTime::CONSTANT_COPY: { |
| const uint8_t* data = sourceModel.getPointerToOperandValue(operand.location.offset); |
| n = mStepModel.setOperandValue(*stepOperandIndex, data, operand.location.length); |
| } break; |
| case Operand::LifeTime::CONSTANT_REFERENCE: { |
| const RuntimeMemory* memory = sourceModel.getMemories()[operand.location.poolIndex]; |
| n = mStepModel.setOperandValueFromMemory( |
| *stepOperandIndex, memory, operand.location.offset, operand.location.length); |
| } break; |
| case Operand::LifeTime::NO_VALUE: { |
| n = mStepModel.setOperandValue(*stepOperandIndex, nullptr, 0); |
| } break; |
| case Operand::LifeTime::TEMPORARY_VARIABLE: { // handled similarly to SUBGRAPH_OUTPUT |
| if (kind == INPUT) { |
| // The first time we've seen this operand is as an |
| // input. That means it must be defined by a |
| // different partition, and is an input to this one. |
| mTempsAsStepModelInputs.emplace_back(sourceOperandIndex, *stepOperandIndex); |
| } else { |
| // The first time we've seen this operand is as an |
| // output. It may be an input to a different |
| // partition, so keep track of it. |
| mPlan->recordTemporaryDef(SourceOperandIndex(mSourceModelIndex, sourceOperandIndex), |
| mIndex); |
| } |
| } break; |
| case Operand::LifeTime::SUBGRAPH_INPUT: { |
| mModelInputs.emplace_back(sourceOperandIndex, *stepOperandIndex); |
| } break; |
| case Operand::LifeTime::SUBGRAPH_OUTPUT: { // handled similarly to TEMPORARY_VARIABLE |
| if (kind == INPUT) { |
| // The first time we've seen this operand is as an |
| // input. That means it must be defined by a |
| // different partition, and is an input to this one. |
| mOutputsAsStepModelInputs.emplace_back(sourceOperandIndex, *stepOperandIndex); |
| } else { |
| // The first time we've seen this operand is as an |
| // output. |
| mModelOutputs.emplace_back(sourceOperandIndex, *stepOperandIndex); |
| // It may be an input to a different partition, so keep track of |
| // it. |
| mPlan->recordOutputDef(SourceOperandIndex(mSourceModelIndex, sourceOperandIndex), |
| mIndex); |
| } |
| } break; |
| case Operand::LifeTime::SUBGRAPH: { |
| const ModelBuilder* model = sourceModel.getReferencedModel(operand); |
| n = mStepModel.setOperandValueFromModel(*stepOperandIndex, model); |
| } break; |
| case Operand::LifeTime::POINTER: { |
| const void* data = std::get<const void*>(operand.location.pointer); |
| n = mStepModel.setOperandValue(*stepOperandIndex, data, operand.location.length); |
| } break; |
| } |
| |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "Previous error occurred when partitioning the graph"; |
| } |
| return n; |
| } |
| |
| int ExecutionStep::addOperation(int operationIndex) { |
| const Operation& operation = getSourceModel()->getOperation(operationIndex); |
| if (mToken.ok()) { |
| mToken.update(&mSourceModelIndex, sizeof(mSourceModelIndex)); |
| mToken.update(&operationIndex, sizeof(operationIndex)); |
| } |
| |
| // Convert the input and output operand indexes. |
| // |
| // We expect operations to be added in topological order. Therefore: |
| // |
| // - We may not have seen an input if it is a model input, a |
| // constant, or an operand written by a different partition. |
| // |
| // - We should not have seen any outputs. |
| auto addOperands = [this](const std::vector<uint32_t>& sourceModelOperands, |
| std::vector<uint32_t>* stepModelOperands, OperandKind kind) -> int { |
| const uint32_t operandCount = static_cast<uint32_t>(sourceModelOperands.size()); |
| for (uint32_t i = 0; i < operandCount; i++) { |
| NN_RETURN_IF_ERROR(addOperand(sourceModelOperands[i], &stepModelOperands->at(i), kind)); |
| } |
| return ANEURALNETWORKS_NO_ERROR; |
| }; |
| |
| const uint32_t inputCount = static_cast<uint32_t>(operation.inputs.size()); |
| const uint32_t outputCount = static_cast<uint32_t>(operation.outputs.size()); |
| std::vector<uint32_t> inputs(inputCount); |
| std::vector<uint32_t> outputs(outputCount); |
| NN_RETURN_IF_ERROR(addOperands(operation.inputs, &inputs, INPUT)); |
| NN_RETURN_IF_ERROR(addOperands(operation.outputs, &outputs, OUTPUT)); |
| return mStepModel.addOperation(static_cast<uint32_t>(operation.type), inputCount, inputs.data(), |
| outputCount, outputs.data()); |
| } |
| |
| void ExecutionStep::mapInputsAndOutputs( |
| std::shared_ptr<StepExecutor> executor, |
| const std::vector<OutputShape>* mainModelOutputShapes, const RuntimeMemory* temporaryMemory, |
| const std::map<SourceOperandIndex, StaticTemporaryLocation>& |
| sourceOperandToLocationOfTemporary, |
| const DynamicTemporaries& dynamicTemporaries, |
| const std::map<SourceOperandIndex, uint32_t>& sourceOperandToInputIndex, |
| const std::map<SourceOperandIndex, uint32_t>& sourceOperandToOutputIndex, |
| const std::map<SourceOperandIndex, ConstantReferenceLocation>& |
| sourceOperandToConstantReference) const { |
| auto mapInput = [&](uint32_t stepModelOperandIndex, uint32_t stepInputIndex) { |
| SourceOperandIndex sourceOperandIndex(mSourceModelIndex, stepModelOperandIndex); |
| if (auto it = sourceOperandToLocationOfTemporary.find(sourceOperandIndex); |
| it != sourceOperandToLocationOfTemporary.end()) { |
| const auto& loc = it->second; |
| executor->setInputFromMemory(stepInputIndex, temporaryMemory, loc.offset, |
| loc.paddedLength); |
| } else if (auto loc = dynamicTemporaries.lookup(sourceOperandIndex); loc != std::nullopt) { |
| executor->setInputFromMemory(stepInputIndex, loc->memory, loc->offset, |
| loc->paddedLength, *loc->dimensions); |
| } else if (auto it = sourceOperandToInputIndex.find(sourceOperandIndex); |
| it != sourceOperandToInputIndex.end()) { |
| executor->mapInput(it->second, stepInputIndex); |
| } else if (auto it = sourceOperandToOutputIndex.find(sourceOperandIndex); |
| it != sourceOperandToOutputIndex.end()) { |
| executor->mapOutputToInput(it->second, stepInputIndex, |
| mainModelOutputShapes |
| ? &mainModelOutputShapes->at(it->second).dimensions |
| : nullptr); |
| } else if (auto it = sourceOperandToConstantReference.find(sourceOperandIndex); |
| it != sourceOperandToConstantReference.end()) { |
| // Constant partition boundary operand. This could be an IF branch |
| // model input or a WHILE variable initializer. |
| const auto& loc = it->second; |
| executor->setInputFromMemory(stepInputIndex, loc.memory, loc.offset, loc.length); |
| } else { |
| CHECK(false) << "Cannot map step input " << stepInputIndex << " from operand " |
| << toString(sourceOperandIndex); |
| } |
| }; |
| auto mapOutput = [&](uint32_t stepModelOperandIndex, uint32_t stepOutputIndex) { |
| SourceOperandIndex sourceOperandIndex(mSourceModelIndex, stepModelOperandIndex); |
| if (auto it = sourceOperandToLocationOfTemporary.find(sourceOperandIndex); |
| it != sourceOperandToLocationOfTemporary.end()) { |
| const auto& loc = it->second; |
| executor->setOutputFromMemory(stepOutputIndex, temporaryMemory, loc.offset, |
| loc.paddedLength); |
| } else if (auto loc = dynamicTemporaries.lookup(sourceOperandIndex); loc != std::nullopt) { |
| executor->setOutputFromMemory(stepOutputIndex, loc->memory, loc->offset, |
| loc->paddedLength, *loc->dimensions); |
| } else if (auto it = sourceOperandToOutputIndex.find(sourceOperandIndex); |
| it != sourceOperandToOutputIndex.end()) { |
| executor->mapOutput(it->second, stepOutputIndex); |
| } else { |
| CHECK(false) << "Cannot map step output " << stepOutputIndex << " from operand " |
| << toString(sourceOperandIndex); |
| } |
| }; |
| for (uint32_t i = 0, n = mStepModelInputs.size(); i < n; ++i) { |
| mapInput(mStepModelInputs[i].first, i); |
| } |
| for (uint32_t i = 0, n = mStepModelOutputs.size(); i < n; ++i) { |
| mapOutput(mStepModelOutputs[i].first, i); |
| } |
| } |
| |
| void ExecutionPlan::CompoundBody::findModelOutputsThatAreDownstreamInputs() { |
| auto declareModelOutputIsDownstreamInput = |
| [this](const SourceOperandIndex& sourceOperandIndex) { |
| const auto it = mOutputToDefiningExecutionStep.find(sourceOperandIndex); |
| CHECK(it != mOutputToDefiningExecutionStep.end()); |
| uint32_t stepIndex = it->second; |
| CHECK_LT(stepIndex, mSteps.size()); |
| VLOG(COMPILATION) |
| << "ExecutionStep(" << stepIndex |
| << ")->declareModelOutputIsDownstreamInput(mSourceOperandToOutputIndex.at" |
| << toString(sourceOperandIndex) << ")"; |
| CHECK(mSourceOperandToOutputIndex.find(sourceOperandIndex) != |
| mSourceOperandToOutputIndex.end()); |
| mSteps[stepIndex]->executionStep()->declareModelOutputIsDownstreamInput( |
| mSourceOperandToOutputIndex.at(sourceOperandIndex)); |
| }; |
| for (const auto& logicalStep : mSteps) { |
| if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| for (const auto& output : step->getOutputsAsStepModelInputs()) { |
| SourceOperandIndex sourceOperandIndex(step->getSourceModelIndex(), output.first); |
| declareModelOutputIsDownstreamInput(sourceOperandIndex); |
| } |
| } |
| } |
| } |
| |
| void ExecutionPlan::CompoundBody::findTempsAsStepModelOutputs() { |
| auto recordAsOutputIfTemporary = [this](const SourceOperandIndex& sourceOperandIndex) { |
| const auto it = mTemporaryToDefiningExecutionStep.find(sourceOperandIndex); |
| if (it == mTemporaryToDefiningExecutionStep.end()) { |
| // The operand is not a temporary or is not defined by an |
| // ExecutionStep (i.e. it's an output of an IF or a WHILE). |
| // The latter case is handled by ExecutionPlan::makeController(). |
| return; |
| } |
| uint32_t stepIndex = it->second; |
| CHECK_LT(stepIndex, mSteps.size()); |
| mSteps[stepIndex]->executionStep()->recordTempAsStepModelOutput(sourceOperandIndex.second); |
| }; |
| for (const auto& logicalStep : mSteps) { |
| if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| for (const auto& input : step->getTempsAsStepModelInputs()) { |
| SourceOperandIndex sourceOperandIndex(step->getSourceModelIndex(), input.first); |
| recordAsOutputIfTemporary(sourceOperandIndex); |
| } |
| } else if (const IfStep* step = logicalStep->tryIfStep()) { |
| recordAsOutputIfTemporary(step->conditionOperandIndex); |
| for (const SourceOperandIndex& sourceOperandIndex : step->outerInputOperands) { |
| recordAsOutputIfTemporary(sourceOperandIndex); |
| } |
| } else if (const WhileStep* step = logicalStep->tryWhileStep()) { |
| for (const SourceOperandIndex& sourceOperandIndex : step->outerInputOperands) { |
| recordAsOutputIfTemporary(sourceOperandIndex); |
| } |
| } else { |
| CHECK(logicalStep->isGoto()); |
| } |
| } |
| } |
| |
| void ExecutionStep::declareModelOutputIsDownstreamInput(uint32_t mainModelOutputIndex) { |
| VLOG(COMPILATION) << "ExecutionStep(" << mIndex << ")::declareModelOutputIsDownstreamInput(" |
| << mainModelOutputIndex << ")"; |
| const auto it = std::find(mOutputIndexStepModelToMainModel.begin(), |
| mOutputIndexStepModelToMainModel.end(), mainModelOutputIndex); |
| CHECK(it != mOutputIndexStepModelToMainModel.end()); |
| const uint32_t stepModelOutputIndex = it - mOutputIndexStepModelToMainModel.begin(); |
| CHECK(stepModelOutputIndex < mModelOutputs.size()); |
| mModelOutputsThatAreDownstreamInputs.insert(stepModelOutputIndex); |
| } |
| |
| void ExecutionStep::recordTempAsStepModelOutput(uint32_t stepOperandIndex) { |
| const auto it = mOperandMap.find(stepOperandIndex); |
| CHECK(it != mOperandMap.end()); |
| mTempsAsStepModelOutputs.emplace(stepOperandIndex, it->second); |
| } |
| |
| const ModelBuilder* ExecutionStep::getSourceModel() const { |
| return mPlan->getSourceModels().getModel(mSourceModelIndex); |
| } |
| |
| void ExecutionStep::logStepModel() const { |
| VLOG(COMPILATION) << "ExecutionStep::finishStepModel, step " << mIndex; |
| |
| auto logRemapEntry = [](std::string& toLog, const std::pair<uint32_t, uint32_t>& e) { |
| if (!toLog.empty()) { |
| toLog += ", "; |
| } |
| toLog += toString(e.first); |
| toLog += "->"; |
| toLog += toString(e.second); |
| }; |
| |
| auto logRemapVector = [&logRemapEntry](const char* name, const RemapVectorType& map) { |
| std::string toLog; |
| for (const auto& e : map) { |
| logRemapEntry(toLog, e); |
| } |
| VLOG(COMPILATION) << name << ": " << toLog; |
| }; |
| auto logRemapSet = [&logRemapEntry](const char* name, const StepModelOutputSetType& set) { |
| std::string toLog; |
| for (const auto& e : set) { |
| logRemapEntry(toLog, e); |
| } |
| VLOG(COMPILATION) << name << ": " << toLog; |
| }; |
| |
| logRemapVector("step model inputs", mStepModelInputs); |
| logRemapVector("step model outputs", mStepModelOutputs); |
| logRemapVector("model inputs", mModelInputs); |
| logRemapVector("model outputs", mModelOutputs); |
| logRemapVector("temps as step model inputs", mTempsAsStepModelInputs); |
| logRemapSet("temps as step model outputs", mTempsAsStepModelOutputs); |
| logRemapVector("outputs as step model inputs", mOutputsAsStepModelInputs); |
| } |
| |
| static bool hasUnknownSize(const Operand& operand) { |
| if (operand.dimensions.empty()) { |
| return TypeManager::get()->isTensorType(operand.type); |
| } |
| for (const Dimension& dimension : operand.dimensions) { |
| if (dimension == 0) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| int ExecutionStep::finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize, |
| int32_t executionPreference, int32_t priority) { |
| CHECK(mDevice != nullptr); |
| |
| for (const auto& stepModelOutput : mTempsAsStepModelOutputs) { |
| const Operand& operand = mStepModel.getOperand(stepModelOutput.second); |
| if (hasUnknownSize(operand)) { |
| *hasOutputOfUnknownSize = true; |
| VLOG(COMPILATION) << "StepModelOutput (operand#" << stepModelOutput.first |
| << " of source graph) has unknown size: " << operand; |
| } |
| } |
| |
| mStepModel.relaxComputationFloat32toFloat16(mainModel->isComputationFloat32RelaxedToFloat16()); |
| |
| mStepModelInputs.insert(mStepModelInputs.end(), mModelInputs.begin(), mModelInputs.end()); |
| mStepModelInputs.insert(mStepModelInputs.end(), mTempsAsStepModelInputs.begin(), |
| mTempsAsStepModelInputs.end()); |
| mStepModelInputs.insert(mStepModelInputs.end(), mOutputsAsStepModelInputs.begin(), |
| mOutputsAsStepModelInputs.end()); |
| |
| mStepModelOutputs.insert(mStepModelOutputs.end(), mModelOutputs.begin(), mModelOutputs.end()); |
| mStepModelOutputs.insert(mStepModelOutputs.end(), mTempsAsStepModelOutputs.begin(), |
| mTempsAsStepModelOutputs.end()); |
| |
| // A step model with no inputs or no outputs is an invalid model. Note that we would like to |
| // attempt full CPU fallback if allowed, so we return OP_FAILED here rather than BAD_DATA from |
| // model validation. |
| if (hasNoInputsOrNoOutputs()) { |
| VLOG(COMPILATION) << "ExecutionStep::finishStepModel: finishing step model with no inputs " |
| "or no outputs"; |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| if (mSourceModelIndex == kMainModelInSourceModels) { |
| std::map<uint32_t, uint32_t> mainModelOperandToInputIndex; |
| for (uint32_t i = 0, n = mainModel->inputCount(); i < n; ++i) { |
| mainModelOperandToInputIndex[mainModel->getInputOperandIndex(i)] = i; |
| } |
| std::map<uint32_t, uint32_t> mainModelOperandToOutputIndex; |
| for (uint32_t i = 0, n = mainModel->outputCount(); i < n; ++i) { |
| mainModelOperandToOutputIndex[mainModel->getOutputOperandIndex(i)] = i; |
| } |
| |
| // mInputIndexStepModelToMainModel is ordered by step model input index and relies on |
| // mModelInputs being the first inputs, as specified by mStepModelInputs. |
| mInputIndexStepModelToMainModel.resize(mModelInputs.size()); |
| std::transform(mModelInputs.begin(), mModelInputs.end(), |
| mInputIndexStepModelToMainModel.begin(), |
| [&mainModelOperandToInputIndex](auto& e) { |
| uint32_t sourceOperandIndex = e.first; |
| return mainModelOperandToInputIndex[sourceOperandIndex]; |
| }); |
| |
| // mOutputIndexStepModelToMainModel is ordered by step model output index and relies on |
| // mModelOutputs being the first outputs, as specified by mStepModelOutputs. |
| mOutputIndexStepModelToMainModel.resize(mModelOutputs.size()); |
| std::transform(mModelOutputs.begin(), mModelOutputs.end(), |
| mOutputIndexStepModelToMainModel.begin(), |
| [&mainModelOperandToOutputIndex](auto& e) { |
| uint32_t sourceOperandIndex = e.first; |
| return mainModelOperandToOutputIndex[sourceOperandIndex]; |
| }); |
| |
| // mOutputsAsStepModelInputsIndexToMainModel is ordered by step model input index and relies |
| // on mOutputsAsStepModelInputs being the first outputs. |
| mOutputsAsStepModelInputsIndexToMainModel.resize(mOutputsAsStepModelInputs.size()); |
| std::transform(mOutputsAsStepModelInputs.begin(), mOutputsAsStepModelInputs.end(), |
| mOutputsAsStepModelInputsIndexToMainModel.begin(), |
| [&mainModelOperandToOutputIndex](auto& e) { |
| uint32_t sourceOperandIndex = e.first; |
| return mainModelOperandToOutputIndex[sourceOperandIndex]; |
| }); |
| } |
| |
| if (VLOG_IS_ON(COMPILATION)) { |
| logStepModel(); |
| } |
| |
| std::vector<uint32_t> inputs(mStepModelInputs.size()); |
| std::vector<uint32_t> outputs(mStepModelOutputs.size()); |
| std::transform(mStepModelInputs.begin(), mStepModelInputs.end(), inputs.begin(), |
| [](auto& e) { return e.second; }); |
| std::transform(mStepModelOutputs.begin(), mStepModelOutputs.end(), outputs.begin(), |
| [](auto& e) { return e.second; }); |
| NN_RETURN_IF_ERROR(mStepModel.identifyInputsAndOutputs(inputs.size(), inputs.data(), |
| outputs.size(), outputs.data())); |
| NN_RETURN_IF_ERROR(mStepModel.finish()); |
| |
| // TODO: Move compilation elsewhere? |
| VLOG(COMPILATION) << "ExecutionStep::finishStepModel, compilation on " << mDevice->getName(); |
| return compile(*mDevice, mStepModel, executionPreference, priority, {}, *mPlan->getCacheInfo(), |
| &mToken, {}, &mPreparedStepModel); |
| } |
| |
| void ExecutionStep::dump() const { |
| if (VLOG_IS_ON(COMPILATION)) { |
| VLOG(COMPILATION) << "Step#" << mIndex << ": execute on " << mDevice->getName(); |
| logModelToInfo(mStepModel.makeModel()); |
| } |
| } |
| |
| std::ostream& operator<<(std::ostream& os, const IfStep& step) { |
| return os << "Step#" << step.index << ": if " << toString(step.conditionOperandIndex) |
| << " then=" << step.thenStepIndex << " else=" << step.elseStepIndex; |
| } |
| |
| std::ostream& operator<<(std::ostream& os, const WhileStep& step) { |
| return os << "Step#" << step.index << ": while cond=" << step.condStepIndex |
| << " body=" << step.bodyStepIndex << " exit=" << step.exitStepIndex; |
| } |
| |
| std::ostream& operator<<(std::ostream& os, const GotoStep& step) { |
| return os << "Step#" << step.index << ": goto " << step.gotoStepIndex; |
| } |
| |
| void LogicalStep::dump() const { |
| if (VLOG_IS_ON(COMPILATION)) { |
| if (const IfStep* step = tryIfStep()) { |
| VLOG(COMPILATION) << *step; |
| } else if (const WhileStep* step = tryWhileStep()) { |
| VLOG(COMPILATION) << *step; |
| } else if (const GotoStep* step = tryGotoStep()) { |
| VLOG(COMPILATION) << *step; |
| } else { |
| executionStep()->dump(); |
| } |
| } |
| } |
| |
| int ExecutionPlan::CompoundBody::finish(const SourceModels* sourceModels, |
| int32_t executionPreference, int32_t priority, |
| const OptionalTimePoint& deadline, |
| const std::vector<TokenValuePair>& metadata, |
| int simulateFailureResultCode) { |
| CHECK(!mSuccessfulFinish); |
| CHECK(!deadline.has_value()); |
| CHECK(metadata.empty()); |
| |
| const ModelBuilder* mainModel = sourceModels->getModel(kMainModelInSourceModels); |
| |
| auto containsUnknownSize = [sourceModels](const std::vector<SourceOperandIndex>& operands) { |
| for (const auto& sourceOperandIndex : operands) { |
| const ModelBuilder* sourceModel = sourceModels->getModel(sourceOperandIndex.first); |
| const Operand& operand = sourceModel->getOperand(sourceOperandIndex.second); |
| if (hasUnknownSize(operand)) { |
| return true; |
| } |
| } |
| return false; |
| }; |
| |
| findTempsAsStepModelOutputs(); |
| for (const auto& logicalStep : mSteps) { |
| if (ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| bool stepHasDynamicTemporaries = false; |
| int n = step->finishStepModel(mainModel, &stepHasDynamicTemporaries, |
| executionPreference, priority); |
| if (stepHasDynamicTemporaries) { |
| mHasDynamicTemporaries = true; |
| if (!isCompliantVersion(kHalVersionV1_2ToApi.canonical, |
| step->getDevice()->getFeatureLevel())) { |
| // Until HAL 1.2, an Operand with lifetime SUBGRAPH_OUTPUT |
| // must have fully specified dimensions either in the |
| // Operand or in the RequestArgument. In the case of a |
| // dynamic temporary, we won't be able to supply fully |
| // specified dimensions in either. |
| VLOG(COMPILATION) |
| << "ExecutionPlan::CompoundBody::finish -- step#" << step->getIndex() |
| << " defines dynamic temporaries but is scheduled on pre-1.2 device " |
| << step->getDevice()->getName(); |
| if (n == ANEURALNETWORKS_NO_ERROR) { |
| n = ANEURALNETWORKS_OP_FAILED; |
| } |
| } |
| } |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| VLOG(COMPILATION) |
| << "ExecutionPlan::CompoundBody::finish -- finishStepModel failed"; |
| return n; |
| } |
| } else if (IfStep* step = logicalStep->tryIfStep()) { |
| // The partitioner does not support dynamic temporaries (b/132458982). |
| CHECK(!containsUnknownSize(step->outerInputOperands)); |
| CHECK(!containsUnknownSize(step->outerOutputOperands)); |
| // step->conditionOperandIndex has a static shape. See b/158557728. |
| CHECK(!containsUnknownSize(step->thenBranchInputOperands)); |
| CHECK(!containsUnknownSize(step->thenBranchOutputOperands)); |
| CHECK(!containsUnknownSize(step->elseBranchInputOperands)); |
| CHECK(!containsUnknownSize(step->elseBranchOutputOperands)); |
| } else if (WhileStep* step = logicalStep->tryWhileStep()) { |
| // The partitioner does not support dynamic temporaries (b/132458982). |
| CHECK(!containsUnknownSize(step->outerInputOperands)); |
| CHECK(!containsUnknownSize(step->outerOutputOperands)); |
| CHECK(!containsUnknownSize(step->condInputOperands)); |
| // step->condOutputOperand has a static shape. See b/158557728. |
| CHECK(!containsUnknownSize(step->bodyInputOperands)); |
| CHECK(!containsUnknownSize(step->bodyOutputOperands)); |
| } else { |
| CHECK(logicalStep->isGoto()); |
| } |
| } |
| |
| if (simulateFailureResultCode != ANEURALNETWORKS_NO_ERROR) { |
| VLOG(COMPILATION) << "ExecutionPlan::CompoundeBody::finish: simulating failure, ResultCode " |
| << simulateFailureResultCode; |
| return simulateFailureResultCode; |
| } |
| |
| for (uint32_t i = 0, n = mainModel->inputCount(); i < n; ++i) { |
| SourceOperandIndex index(kMainModelInSourceModels, mainModel->getInputOperandIndex(i)); |
| mSourceOperandToInputIndex[index] = i; |
| } |
| for (uint32_t i = 0, n = mainModel->outputCount(); i < n; ++i) { |
| SourceOperandIndex index(kMainModelInSourceModels, mainModel->getOutputOperandIndex(i)); |
| mSourceOperandToOutputIndex[index] = i; |
| } |
| |
| findControlFlowBoundaryConstants(sourceModels); |
| findModelOutputsThatAreDownstreamInputs(); |
| findMemoryStepRoles(); |
| |
| mSuccessfulFinish = true; |
| LOG(INFO) << "ExecutionPlan::CompoundBody::finish: compilation finished successfully"; |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| void ExecutionPlan::CompoundBody::findControlFlowBoundaryConstants( |
| const SourceModels* sourceModels) { |
| auto handleBoundaryConstants = [this, |
| sourceModels](const SourceOperandIndex& sourceOperandIndex) { |
| const ModelBuilder* sourceModel = sourceModels->getModel(sourceOperandIndex.first); |
| const Operand& operand = sourceModel->getOperand(sourceOperandIndex.second); |
| const DataLocation& location = operand.location; |
| if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY) { |
| mSourceOperandToBoundaryConstantCopy[sourceOperandIndex] = { |
| .buffer = sourceModel->getPointerToOperandValue(location.offset), |
| .length = location.length, |
| }; |
| } else if (operand.lifetime == Operand::LifeTime::POINTER) { |
| mSourceOperandToBoundaryConstantCopy[sourceOperandIndex] = { |
| .buffer = static_cast<const uint8_t*>(std::get<const void*>(location.pointer)), |
| .length = location.length, |
| }; |
| } else if (operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE) { |
| mSourceOperandToBoundaryConstantReference[sourceOperandIndex] = { |
| .memory = sourceModel->getMemories()[location.poolIndex], |
| .offset = location.offset, |
| .length = location.length, |
| }; |
| } |
| }; |
| for (const auto& logicalStep : mSteps) { |
| if (const IfStep* step = logicalStep->tryIfStep()) { |
| handleBoundaryConstants(step->conditionOperandIndex); |
| for (const auto& sourceOperandIndex : step->outerInputOperands) { |
| handleBoundaryConstants(sourceOperandIndex); |
| } |
| } else if (const WhileStep* step = logicalStep->tryWhileStep()) { |
| for (const auto& sourceOperandIndex : step->outerInputOperands) { |
| handleBoundaryConstants(sourceOperandIndex); |
| } |
| } |
| } |
| } |
| |
| void ExecutionPlan::CompoundBody::findMemoryStepRoles() { |
| mSourceOperandToStepRoles = StepRoleAnalyzer::analyze([this](StepRoleAnalyzer& analyzer) { |
| for (const auto& logicalStep : mSteps) { |
| if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| const auto& stepModelInputs = step->getStepModelInputs(); |
| for (uint32_t i = 0; i < stepModelInputs.size(); i++) { |
| const auto& [sourceIndex, stepIndex] = stepModelInputs[i]; |
| analyzer.addRole(*step, sourceIndex, IOType::INPUT, i); |
| } |
| const auto& stepModelOutputs = step->getStepModelOutputs(); |
| for (uint32_t i = 0; i < stepModelOutputs.size(); i++) { |
| const auto& [sourceIndex, stepIndex] = stepModelOutputs[i]; |
| analyzer.addRole(*step, sourceIndex, IOType::OUTPUT, i); |
| } |
| } else if (const IfStep* step = logicalStep->tryIfStep()) { |
| // See ExecutionPlan::nextCompound(const IfStep*, ...). |
| // |
| // For interpreted IF operation, the outer input memories may be directly used by |
| // the SUBGRAPH_INPUTs of the then and else model. |
| CHECK_EQ(step->thenBranchInputOperands.size(), step->outerInputOperands.size()); |
| CHECK_EQ(step->elseBranchInputOperands.size(), step->outerInputOperands.size()); |
| for (uint32_t i = 0; i < step->outerInputOperands.size(); i++) { |
| analyzer.setUsedBy(step->outerInputOperands[i], |
| step->thenBranchInputOperands[i]); |
| analyzer.setUsedBy(step->outerInputOperands[i], |
| step->elseBranchInputOperands[i]); |
| } |
| // For interpreted IF operation, the outer output memories may be directly used by |
| // the SUBGRAPH_OUTPUTs of the then and else model. |
| CHECK_EQ(step->thenBranchOutputOperands.size(), step->outerOutputOperands.size()); |
| CHECK_EQ(step->elseBranchOutputOperands.size(), step->outerOutputOperands.size()); |
| for (uint32_t i = 0; i < step->outerOutputOperands.size(); i++) { |
| analyzer.setUsedBy(step->outerOutputOperands[i], |
| step->thenBranchOutputOperands[i]); |
| analyzer.setUsedBy(step->outerOutputOperands[i], |
| step->elseBranchOutputOperands[i]); |
| } |
| } else if (const WhileStep* step = logicalStep->tryWhileStep()) { |
| // See ExecutionPlan::nextCompound(const WhileStep*, ...). |
| // |
| // For interpreted WHILE operation, the following memories are involved: |
| // a. the outer input memories to the WHILE operation |
| // b. the outer output memories to the WHILE operation |
| // c. the output memory of the condition model |
| // d. one set of output memories of the body model |
| // e. another set of output memories of the body model |
| // |
| // The memories are used in the following ways: |
| // |
| // - Condition model: |
| // * In the first iteration: inputs use (a); output uses (c) |
| // * In the following iterations: inputs use (d) or (e) for input-output and |
| // state-only operands, and (a) for input-only operands; output uses (c) |
| // |
| // - Body model: |
| // * In all iterations: inputs are the same as the condition model; outputs use |
| // (d) or (e) |
| // |
| // Therefore, we configure the analyzer with the following used-by relationships: |
| // - The outer input memories (a) may be directly used by the SUBGRAPH_INPUTs of |
| // the condition model for all inputs in the first iteration, as well as the |
| // input-only operands in the following iterations. |
| CHECK_EQ(step->condInputOperands.size(), step->outerInputOperands.size()); |
| for (uint32_t i = 0; i < step->outerInputOperands.size(); i++) { |
| analyzer.setUsedBy(step->outerInputOperands[i], step->condInputOperands[i]); |
| } |
| // - The output memories of the body model (d) and (e) may be directly used by the |
| // SUBGRAPH_INPUTs of the condition model for input-output and state-only operands |
| // after the first iteration. |
| CHECK_GE(step->condInputOperands.size(), step->bodyOutputOperands.size()); |
| for (uint32_t i = 0; i < step->bodyOutputOperands.size(); i++) { |
| analyzer.setUsedBy(step->bodyOutputOperands[i], step->condInputOperands[i]); |
| } |
| // - The SUBGRAPH_INPUTs of the condition model are directly used by the |
| // SUBGRAPH_INPUTs of the body model for all inputs in all iterations. |
| CHECK_EQ(step->bodyInputOperands.size(), step->condInputOperands.size()); |
| for (uint32_t i = 0; i < step->bodyInputOperands.size(); i++) { |
| analyzer.setUsedBy(step->condInputOperands[i], step->bodyInputOperands[i]); |
| } |
| } else if (logicalStep->isGoto()) { |
| // Nothing to do. |
| } else { |
| CHECK(false) << "Unexpected LogicalStep kind"; |
| } |
| } |
| }); |
| } |
| |
| int ExecutionPlan::SimpleBody::finish(const SourceModels*, int32_t executionPreference, |
| int32_t priority, const OptionalTimePoint& deadline, |
| const std::vector<TokenValuePair>& metadata, |
| int simulateFailureResultCode) { |
| CHECK(!mSuccessfulFinish); |
| CHECK(mDevice != nullptr); |
| VLOG(COMPILATION) << "ExecutionPlan::SimpleBody::finish, compilation"; |
| int n = compile(*mDevice, *mModel, executionPreference, priority, deadline, *mCacheInfo, |
| &mToken, metadata, &mPreparedModel); |
| if (n == ANEURALNETWORKS_NO_ERROR && simulateFailureResultCode != ANEURALNETWORKS_NO_ERROR) { |
| VLOG(COMPILATION) << "ExecutionPlan::SimpleBody::finish: simulating failure, ResultCode " |
| << simulateFailureResultCode; |
| n = simulateFailureResultCode; |
| } |
| mSuccessfulFinish = (n == ANEURALNETWORKS_NO_ERROR); |
| if (mSuccessfulFinish) { |
| LOG(INFO) << "ExecutionPlan::SimpleBody::finish: compilation finished successfully on " |
| << mDevice->getName(); |
| } |
| return n; |
| } |
| |
| int ExecutionPlan::finish(int32_t executionPreference, int32_t priority, |
| const OptionalTimePoint& deadline, |
| const std::vector<TokenValuePair>& metadata, |
| int simulateFailureResultCode) { |
| CHECK(mBody != nullptr); |
| return mBody->finish(&getSourceModels(), executionPreference, priority, deadline, metadata, |
| simulateFailureResultCode); |
| } |
| |
| ExecutionPlan::Controller::Controller( |
| const ExecutionPlan* plan, ExecutionBuilder* executionBuilder, |
| const BurstBuilder* burstBuilder, uint32_t totalSizeOfTemporaries, |
| std::map<SourceOperandIndex, StaticTemporaryLocation> sourceOperandToLocationOfTemporary, |
| std::map<SourceOperandIndex, StaticTemporaryLocation> sourceOperandToLocationOfTemporary2, |
| std::map<SourceOperandIndex, uint32_t> sourceOperandToInputIndex, |
| std::map<SourceOperandIndex, uint32_t> sourceOperandToOutputIndex, |
| const std::map<SourceOperandIndex, ConstantCopyLocation>& sourceOperandToConstantCopy, |
| std::map<SourceOperandIndex, ConstantReferenceLocation> sourceOperandToConstantReference, |
| DynamicTemporaries dynamicTemporaries) |
| : mPlan(plan), |
| mExecutionBuilder(executionBuilder), |
| mBurstBuilder(burstBuilder), |
| mSourceOperandToLocationOfTemporary(std::move(sourceOperandToLocationOfTemporary)), |
| mSourceOperandToLocationOfTemporary2(std::move(sourceOperandToLocationOfTemporary2)), |
| mSourceOperandToInputIndex(std::move(sourceOperandToInputIndex)), |
| mSourceOperandToOutputIndex(std::move(sourceOperandToOutputIndex)), |
| mSourceOperandToConstantReference(std::move(sourceOperandToConstantReference)), |
| mDynamicTemporaries(std::move(dynamicTemporaries)), |
| mNextStepIndex(0), |
| mFallbackNextStepIndex(kBadStepIndex), |
| mLastStepSyncFd(-1) { |
| if (totalSizeOfTemporaries == 0) { |
| return; |
| } |
| int n; |
| std::tie(n, mTemporaries) = MemoryAshmem::create(totalSizeOfTemporaries); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "ExecutionPlan::Controller failed to allocate temporaries"; |
| mNextStepIndex = kBadStepIndex; |
| } |
| for (const auto& [sourceOperandIndex, location] : sourceOperandToConstantCopy) { |
| memcpy(mTemporaries->getPointer() + |
| mSourceOperandToLocationOfTemporary[sourceOperandIndex].offset, |
| location.buffer, location.length); |
| } |
| } |
| |
| // Attempt to create a burst object for each PreparedModel/Partition. If the |
| // burst controller object cannot be made, return a nullptr in its place to |
| // indicate the regular execution path should be used. This can occur either |
| // because PreparedModel was nullptr (cpu was best choice), or because the |
| // IPreparedModel was of insufficient version or failed to configure the burst. |
| std::vector<SharedBurst> ExecutionPlan::makeBursts() const { |
| switch (mState) { |
| // burst object for each partition in the compound case |
| case COMPOUND: { |
| std::vector<SharedBurst> bursts; |
| bursts.reserve(compound()->mSteps.size()); |
| for (const auto& logicalStep : compound()->mSteps) { |
| if (!logicalStep->isExecution()) { |
| bursts.push_back(nullptr); |
| continue; |
| } |
| if (const auto preparedModel = |
| logicalStep->executionStep()->getPreparedStepModel()) { |
| const auto maybeBurst = preparedModel->configureExecutionBurst(); |
| if (!maybeBurst.has_value()) { |
| LOG(ERROR) << "preparedModel->configureExecutionBurst() failed with " |
| << maybeBurst.error().code << ": " << maybeBurst.error().message; |
| } |
| bursts.push_back(maybeBurst.value_or(nullptr)); |
| } else { |
| bursts.push_back(nullptr); |
| } |
| } |
| return bursts; |
| } |
| // single burst object for the simple case |
| case SIMPLE: { |
| std::vector<SharedBurst> burst; |
| auto simpleBody = simple(); |
| if (const auto preparedModel = simpleBody->mPreparedModel) { |
| const auto maybeBurst = preparedModel->configureExecutionBurst(); |
| if (!maybeBurst.has_value()) { |
| LOG(ERROR) << "preparedModel->configureExecutionBurst() failed with " |
| << maybeBurst.error().code << ": " << maybeBurst.error().message; |
| } |
| burst.push_back(maybeBurst.value_or(nullptr)); |
| } else { |
| burst.push_back(nullptr); |
| } |
| return burst; |
| } |
| // no burst objects made |
| default: |
| return {}; |
| } |
| } |
| |
| std::shared_ptr<ExecutionPlan::Controller> ExecutionPlan::makeController( |
| ExecutionBuilder* executionBuilder, const BurstBuilder* burstBuilder) const { |
| CHECK(isValid()); |
| CHECK(mState != SIMPLE); |
| const auto* body = compound(); |
| // Create the layout for a RuntimeMemory object big enough to hold |
| // - every partition boundary TEMPORARY operand that is not a dynamic temporary, and |
| // - buffers required by the control flow implementation. |
| // |
| // TODO: Rethink this approach for managing temporaries. Some |
| // alternatives: |
| // |
| // 1) Adopt a memory layout scheme analogous to stack allocation, |
| // where objects of non-overlapping lifetime can occupy the same |
| // storage. We would still have a single Memory object in this |
| // case. |
| // |
| // 2) Do something like what CpuExecutor does, and do allocations |
| // and deallocations on the fly (during execution) before first |
| // reference and after last reference, respectively. This would |
| // mean having one Memory object per TEMPORARY; or, in a more |
| // complicated implementation, one Memory object per set of |
| // temporaries that have the same lifetime. Note that the Android |
| // system limits the number of shared memory objects, which are |
| // what our Memory objects represent. |
| // |
| uint32_t totalSizeOfTemporaries = 0; |
| // This function has two modes of operation: |
| // 1. When lifetime is TEMPORARY_VARIABLE, we allocate memory for |
| // TEMPORARY_VARIABLE source operands that are not dynamic temporaries, |
| // skip TEMPORARY_VARIABLE source operands that are dynamic temporaries, |
| // skip SUBGRAPH_OUTPUT source operands, and panic if we see a source |
| // operand of another lifetime. |
| // 2. When lifetime is SUBGRAPH_OUTPUT, we allocate memory for |
| // SUBGRAPH_OUTPUT source operands and panic if we see a source operand |
| // of another lifetime. |
| auto mapTemporary = [body, executionBuilder, &totalSizeOfTemporaries]( |
| const SourceOperandIndex& sourceOperandIndex, |
| std::map<SourceOperandIndex, StaticTemporaryLocation>* |
| sourceOperandToLocationOfTemporary, |
| Operand::LifeTime lifetime = |
| Operand::LifeTime::TEMPORARY_VARIABLE) { |
| CHECK(lifetime == Operand::LifeTime::TEMPORARY_VARIABLE || |
| lifetime == Operand::LifeTime::SUBGRAPH_OUTPUT); |
| const Operand& sourceOperand = executionBuilder->getSourceOperand(sourceOperandIndex); |
| if (lifetime == Operand::LifeTime::TEMPORARY_VARIABLE && |
| sourceOperand.lifetime == Operand::LifeTime::SUBGRAPH_OUTPUT) { |
| // See the caller for explanation. |
| return; |
| } |
| CHECK_EQ(sourceOperand.lifetime, lifetime); |
| const uint32_t size = TypeManager::get()->getSizeOfData(sourceOperand); |
| if (size != 0u) { |
| const auto memoryPreference = |
| body->getMemoryPreferenceOfSourceOperand(sourceOperandIndex); |
| const auto loc = addTemporary(&totalSizeOfTemporaries, size, memoryPreference.alignment, |
| memoryPreference.padding); |
| auto [_, isNew] = sourceOperandToLocationOfTemporary->emplace(sourceOperandIndex, loc); |
| CHECK(isNew); |
| VLOG(EXECUTION) << "temp: operand " << toString(sourceOperandIndex) |
| << " offset = " << loc.offset << " paddedLength = " << loc.paddedLength; |
| } else { |
| // Unknown size, hence dynamic temporary. The mapping will |
| // be established elsewhere (DynamicTemporaries::allocate()). |
| CHECK_EQ(lifetime, Operand::LifeTime::TEMPORARY_VARIABLE); |
| CHECK_EQ(sourceOperand.lifetime, Operand::LifeTime::TEMPORARY_VARIABLE); |
| } |
| }; |
| std::map<SourceOperandIndex, StaticTemporaryLocation> sourceOperandToLocationOfTemporary; |
| std::map<SourceOperandIndex, StaticTemporaryLocation> sourceOperandToLocationOfTemporary2; |
| for (const auto& logicalStep : body->mSteps) { |
| if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| // Allocate memory for ExecutionStep temporary outputs that are |
| // inputs to other steps, as determined by |
| // ExecutionPlan::CompoundBody::findTempsAsStepModelOutputs(). |
| // |
| // We don't allocate memory for step model output operands with |
| // source operand lifetime SUBGRAPH_OUTPUT because they will be |
| // - managed by the client (main model outputs), |
| // - assigned a location of another operand (when this step model |
| // output is a branch model output of an IF; see |
| // ExecutionPlan::nextCompound(const IfStep*, ...)), or |
| // - allocated by a WHILE (when this step model output |
| // is a condition or body model output of a WHILE; see the |
| // step->bodyOutputOperands and step->condOutputOperand handling |
| // below). |
| for (const auto& output : step->getTempsAsStepModelOutputs()) { |
| mapTemporary(SourceOperandIndex(step->getSourceModelIndex(), output.first), |
| &sourceOperandToLocationOfTemporary); |
| } |
| } else if (const IfStep* step = logicalStep->tryIfStep()) { |
| // Allocate memory for all temporary outputs of an IfStep because |
| // they are going to be written to by a branch model. We don't |
| // perform unused output operand optimisation for referenced models. |
| // |
| // We don't allocate memory for branch output operands because they |
| // use the same location as the corresponding outer output operands, |
| // as established in ExecutionPlan::nextCompound(const IfStep*, ...) |
| // |
| // We don't allocate memory for outer output operands with source |
| // operand lifetime SUBGRAPH_OUTPUT because they will be |
| // - managed by the client (main model outputs), |
| // - assigned a location of another operand (when this IF outer |
| // output is a branch model output of another IF; see |
| // ExecutionPlan::nextCompound(const IfStep*, ...)), or |
| // - allocated by a WHILE (when this IF outer output |
| // is a condition or body model output of a WHILE; see the |
| // step->bodyOutputOperands and step->condOutputOperand handling |
| // below). |
| for (const auto& sourceOperandIndex : step->outerOutputOperands) { |
| mapTemporary(sourceOperandIndex, &sourceOperandToLocationOfTemporary); |
| } |
| } else if (const WhileStep* step = logicalStep->tryWhileStep()) { |
| // Allocate memory for all temporary outputs of an WhileStep because |
| // they are going to be written to by the WHILE loop. |
| // |
| // We don't allocate memory for outer output operands with source |
| // operand lifetime SUBGRAPH_OUTPUT because they will be |
| // - managed by the client (main model outputs), |
| // - assigned a location of another operand (when this WHILE outer |
| // output is a branch model output of an IF; see |
| // ExecutionPlan::nextCompound(const IfStep*, ...)), or |
| // - allocated by another WHILE (when this WHILE outer output |
| // is a condition or body model output of another WHILE; see the |
| // step->bodyOutputOperands and step->condOutputOperand handling |
| // below). |
| for (const auto& sourceOperandIndex : step->outerOutputOperands) { |
| mapTemporary(sourceOperandIndex, &sourceOperandToLocationOfTemporary); |
| } |
| // Allocate memory for body model outputs. Note that we could use |
| // the outer output operand memory instead but we currently don't do |
| // so (b/148206073). |
| for (const auto& sourceOperandIndex : step->bodyOutputOperands) { |
| mapTemporary(sourceOperandIndex, &sourceOperandToLocationOfTemporary, |
| Operand::LifeTime::SUBGRAPH_OUTPUT); |
| // Allocate another set of temporaries for double buffering. |
| mapTemporary(sourceOperandIndex, &sourceOperandToLocationOfTemporary2, |
| Operand::LifeTime::SUBGRAPH_OUTPUT); |
| } |
| // Allocate memory for condition model output. |
| // TODO: Share one condition output memory region between all loops. |
| mapTemporary(step->condOutputOperand, &sourceOperandToLocationOfTemporary, |
| Operand::LifeTime::SUBGRAPH_OUTPUT); |
| } else { |
| CHECK(logicalStep->isGoto()); |
| } |
| } |
| // Allocate temporary memory for boundary CONSTANT_COPY operands. |
| for (const auto& [sourceOperandIndex, location] : body->mSourceOperandToBoundaryConstantCopy) { |
| const auto memoryPreference = body->getMemoryPreferenceOfSourceOperand(sourceOperandIndex); |
| const auto loc = addTemporary(&totalSizeOfTemporaries, location.length, |
| memoryPreference.alignment, memoryPreference.padding); |
| sourceOperandToLocationOfTemporary.emplace(sourceOperandIndex, loc); |
| VLOG(EXECUTION) << "temp (boundary constant): operand " << toString(sourceOperandIndex) |
| << " offset = " << loc.offset << " paddedLength = " << loc.paddedLength; |
| } |
| // Collect dynamic temporaries. |
| // TODO(b/157236079): Move some or all of this work to compilation time? |
| DynamicTemporaries dynamicTemporaries; |
| const TypeManager* typeManager = TypeManager::get(); |
| forEachDynamicTemporary([body, typeManager, &dynamicTemporaries]( |
| SourceOperandIndex sourceOperandIndex, |
| const Operand& sourceOperand, uint32_t definingStepIndex) { |
| CHECK(typeManager->isTensorType(sourceOperand.type)); |
| const auto memoryPreference = body->getMemoryPreferenceOfSourceOperand(sourceOperandIndex); |
| // TODO: For now we guess an initial size equal to element |
| // size, which is overly conservative. |
| const uint32_t size = typeManager->getSizeOfData(sourceOperand.type, {1}); |
| dynamicTemporaries.declare(sourceOperandIndex, definingStepIndex, sourceOperand.dimensions, |
| size, memoryPreference.alignment, memoryPreference.padding); |
| }); |
| dynamicTemporaries.endDeclarations(); |
| dynamicTemporaries.vlogDump("finished declarations"); |
| |
| return std::shared_ptr<Controller>(new Controller( |
| this, executionBuilder, burstBuilder, totalSizeOfTemporaries, |
| std::move(sourceOperandToLocationOfTemporary), |
| std::move(sourceOperandToLocationOfTemporary2), body->mSourceOperandToInputIndex, |
| body->mSourceOperandToOutputIndex, body->mSourceOperandToBoundaryConstantCopy, |
| body->mSourceOperandToBoundaryConstantReference, std::move(dynamicTemporaries))); |
| } |
| |
| // TODO: Find a better way to provide this functionality. |
| int ExecutionPlan::fallback(std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| *executor = nullptr; |
| if (burstController != nullptr) { |
| *burstController = nullptr; |
| } |
| |
| VLOG(EXECUTION) << "ExecutionPlan::fallback(" << SHOW_IF_DEBUG(controller << ", " << executor) |
| << "): mFallbackNextStepIndex = " << controller->mFallbackNextStepIndex; |
| |
| if (controller->mFallbackNextStepIndex == Controller::kBadStepIndex) { |
| // We haven't called next(). |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| if (controller->mNextStepIndex == Controller::kBadStepIndex) { |
| // The last call to next() did not produce an executor. |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| controller->mNextStepIndex = controller->mFallbackNextStepIndex; |
| return next(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| ExecutionPlan::Buffer::Buffer(void* pointer, uint32_t size) |
| : mInfo(RunTimePoolInfo::createFromExistingBuffer(static_cast<uint8_t*>(pointer), size)), |
| mOffset(0) {} |
| |
| ExecutionPlan::Buffer::Buffer(RunTimePoolInfo info, uint32_t offset) |
| : mInfo(std::move(info)), mOffset(offset) {} |
| |
| void* ExecutionPlan::Buffer::getPointer() const { |
| return mInfo.getBuffer() + mOffset; |
| } |
| |
| uint32_t ExecutionPlan::Buffer::getSize() const { |
| return mInfo.getSize() - mOffset; |
| } |
| |
| void ExecutionPlan::Buffer::flush() const { |
| mInfo.flush(); |
| } |
| |
| std::optional<ExecutionPlan::Buffer> ExecutionPlan::getBufferFromModelArgumentInfo( |
| const ModelArgumentInfo& info, const ExecutionBuilder* executionBuilder) const { |
| switch (info.state()) { |
| case ModelArgumentInfo::POINTER: { |
| return Buffer(info.buffer(), info.length()); |
| } break; |
| case ModelArgumentInfo::MEMORY: { |
| if (std::optional<RunTimePoolInfo> poolInfo = |
| executionBuilder->getRunTimePoolInfo(info.locationAndLength().poolIndex)) { |
| return Buffer(*poolInfo, info.locationAndLength().offset); |
| } else { |
| LOG(ERROR) << "Unable to map operand memory pool"; |
| return std::nullopt; |
| } |
| } break; |
| case ModelArgumentInfo::HAS_NO_VALUE: { |
| LOG(ERROR) << "Attempting to read an operand that has no value"; |
| return std::nullopt; |
| } break; |
| default: { |
| LOG(ERROR) << "Unexpected operand memory state: " << static_cast<int>(info.state()); |
| return std::nullopt; |
| } break; |
| } |
| } |
| |
| std::optional<ExecutionPlan::Buffer> ExecutionPlan::getBuffer( |
| std::shared_ptr<Controller> controller, SourceOperandIndex operandIndex) const { |
| const auto& sourceOperandToLocationOfTemporary = |
| controller->mSourceOperandToLocationOfTemporary; |
| const auto& sourceOperandToInputIndex = controller->mSourceOperandToInputIndex; |
| const auto& sourceOperandToOutputIndex = controller->mSourceOperandToOutputIndex; |
| const auto& sourceOperandToConstantReference = controller->mSourceOperandToConstantReference; |
| if (auto it = sourceOperandToLocationOfTemporary.find(operandIndex); |
| it != sourceOperandToLocationOfTemporary.end()) { |
| const uint32_t offset = it->second.offset; |
| const std::unique_ptr<MemoryAshmem>& memory = controller->mTemporaries; |
| return Buffer(memory->getPointer() + offset, memory->getSize() - offset); |
| } else if (auto it = sourceOperandToInputIndex.find(operandIndex); |
| it != sourceOperandToInputIndex.end()) { |
| const ModelArgumentInfo& info = controller->mExecutionBuilder->getInputInfo(it->second); |
| return getBufferFromModelArgumentInfo(info, controller->mExecutionBuilder); |
| } else if (auto it = sourceOperandToOutputIndex.find(operandIndex); |
| it != sourceOperandToOutputIndex.end()) { |
| const ModelArgumentInfo& info = controller->mExecutionBuilder->getOutputInfo(it->second); |
| return getBufferFromModelArgumentInfo(info, controller->mExecutionBuilder); |
| } else if (auto it = sourceOperandToConstantReference.find(operandIndex); |
| it != sourceOperandToConstantReference.end()) { |
| const ConstantReferenceLocation& location = it->second; |
| const std::optional<RunTimePoolInfo> info = location.memory->getRunTimePoolInfo(); |
| if (info == std::nullopt) { |
| return std::nullopt; |
| } |
| return Buffer(info->getBuffer() + location.offset, location.length); |
| } |
| return std::nullopt; |
| } |
| |
| int ExecutionPlan::readConditionValue(std::shared_ptr<Controller> controller, |
| SourceOperandIndex operandIndex, bool* value) const { |
| std::optional<ExecutionPlan::Buffer> buffer = getBuffer(controller, operandIndex); |
| if (buffer == std::nullopt) { |
| LOG(ERROR) << "Unable to read operand " << toString(operandIndex); |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| CHECK_GE(buffer->getSize(), sizeof(bool8)); |
| bool8 value8 = *static_cast<bool8*>(buffer->getPointer()); |
| *value = static_cast<bool>(value8); |
| VLOG(EXECUTION) << "readConditionValue: " << *value; |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| int ExecutionPlan::next(std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes, |
| int syncFdOfLastStep) const { |
| CHECK(mState == COMPOUND); |
| |
| controller->mLastStepSyncFd = syncFdOfLastStep; |
| *executor = nullptr; |
| if (burstController != nullptr) { |
| *burstController = nullptr; |
| } |
| |
| VLOG(EXECUTION) << "ExecutionPlan::next(" << SHOW_IF_DEBUG(controller << ", " << executor) |
| << "): mNextStepIndex = " << controller->mNextStepIndex; |
| |
| if (controller->mNextStepIndex == Controller::kBadStepIndex) { |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| return nextCompound(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| int ExecutionPlan::nextCompound(std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, |
| SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| if (controller->mNextStepIndex == Controller::kBadStepIndex) { |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| |
| auto compoundBody = compound(); |
| if (controller->mNextStepIndex == compoundBody->mSteps.size()) { |
| controller->mNextStepIndex = Controller::kBadStepIndex; // end |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| const auto& logicalStep = compoundBody->mSteps[controller->mNextStepIndex]; |
| if (const IfStep* step = logicalStep->tryIfStep()) { |
| return nextCompound(step, controller, executor, burstController, mainModelOutputShapes); |
| } else if (const WhileStep* step = logicalStep->tryWhileStep()) { |
| return nextCompound(step, controller, executor, burstController, mainModelOutputShapes); |
| } else if (const GotoStep* step = logicalStep->tryGotoStep()) { |
| return nextCompound(step, controller, executor, burstController, mainModelOutputShapes); |
| } else if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| return nextCompound(step, controller, executor, burstController, mainModelOutputShapes); |
| } else { |
| CHECK(false) << "Unknown step variant"; |
| return ANEURALNETWORKS_BAD_STATE; |
| } |
| } |
| |
| int ExecutionPlan::nextCompound(const ExecutionStep* step, std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, |
| SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| VLOG(EXECUTION) << "next: Step#" << controller->mNextStepIndex << ": execute on " |
| << step->getDevice()->getName(); |
| |
| NN_RETURN_IF_ERROR(controller->mDynamicTemporaries.allocate(step->getIndex())); |
| controller->mDynamicTemporaries.vlogDump("finished allocating for a step"); |
| |
| *executor = std::make_shared<StepExecutor>(controller->mExecutionBuilder, step->getStepModel(), |
| step->getDevice(), step->getPreparedStepModel(), |
| /*reusable=*/false, step, |
| &controller->mDynamicTemporaries); |
| |
| step->mapInputsAndOutputs( |
| *executor, mainModelOutputShapes, controller->mTemporaries.get(), |
| controller->mSourceOperandToLocationOfTemporary, controller->mDynamicTemporaries, |
| controller->mSourceOperandToInputIndex, controller->mSourceOperandToOutputIndex, |
| controller->mSourceOperandToConstantReference); |
| if (burstController != nullptr && controller->mBurstBuilder != nullptr) { |
| *burstController = controller->mBurstBuilder->getControllerAt(controller->mNextStepIndex); |
| } |
| |
| controller->mFallbackNextStepIndex = controller->mNextStepIndex; |
| controller->mNextStepIndex++; |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| // The first argument is the "source" operand, the second operand is the "destination". |
| void ExecutionPlan::Controller::setInput(const SourceOperandIndex& outerOperand, |
| const SourceOperandIndex& innerOperand) { |
| VLOG(EXECUTION) << "mapping input " << toString(innerOperand) << " from " |
| << toString(outerOperand); |
| #ifdef NN_DEBUGGABLE |
| CHECK_LE(mSourceOperandToLocationOfTemporary.count(innerOperand) + |
| mSourceOperandToInputIndex.count(innerOperand) + |
| mSourceOperandToOutputIndex.count(innerOperand) + |
| mSourceOperandToConstantReference.count(innerOperand), |
| 1u); |
| #endif |
| mSourceOperandToLocationOfTemporary.erase(innerOperand); |
| mSourceOperandToInputIndex.erase(innerOperand); |
| mSourceOperandToOutputIndex.erase(innerOperand); |
| mSourceOperandToConstantReference.erase(innerOperand); |
| if (auto it = mSourceOperandToLocationOfTemporary.find(outerOperand); |
| it != mSourceOperandToLocationOfTemporary.end()) { |
| mSourceOperandToLocationOfTemporary.emplace(innerOperand, it->second); |
| } else if (auto it = mSourceOperandToInputIndex.find(outerOperand); |
| it != mSourceOperandToInputIndex.end()) { |
| mSourceOperandToInputIndex.emplace(innerOperand, it->second); |
| } else if (auto it = mSourceOperandToOutputIndex.find(outerOperand); |
| it != mSourceOperandToOutputIndex.end()) { |
| mSourceOperandToOutputIndex.emplace(innerOperand, it->second); |
| } else if (auto it = mSourceOperandToConstantReference.find(outerOperand); |
| it != mSourceOperandToConstantReference.end()) { |
| mSourceOperandToConstantReference.emplace(innerOperand, it->second); |
| } else { |
| CHECK(false) << "Cannot set step model input operand " << toString(innerOperand) |
| << " from operand " << toString(outerOperand); |
| } |
| } |
| |
| // The first argument is the "source" operand, the second operand is the "destination". |
| void ExecutionPlan::Controller::setOutput(const SourceOperandIndex& outerOperand, |
| const SourceOperandIndex& innerOperand) { |
| VLOG(EXECUTION) << "mapping output " << toString(innerOperand) << " from " |
| << toString(outerOperand); |
| #ifdef NN_DEBUGGABLE |
| CHECK_LE(mSourceOperandToLocationOfTemporary.count(innerOperand) + |
| mSourceOperandToOutputIndex.count(innerOperand), |
| 1u); |
| #endif |
| mSourceOperandToLocationOfTemporary.erase(innerOperand); |
| mSourceOperandToOutputIndex.erase(innerOperand); |
| if (auto it = mSourceOperandToLocationOfTemporary.find(outerOperand); |
| it != mSourceOperandToLocationOfTemporary.end()) { |
| mSourceOperandToLocationOfTemporary.emplace(innerOperand, it->second); |
| } else if (auto it = mSourceOperandToOutputIndex.find(outerOperand); |
| it != mSourceOperandToOutputIndex.end()) { |
| mSourceOperandToOutputIndex.emplace(innerOperand, it->second); |
| } else { |
| CHECK(false) << "Cannot set step model output operand " << toString(innerOperand) |
| << " from operand " << toString(outerOperand); |
| } |
| } |
| |
| int ExecutionPlan::Controller::waitForLastStepSyncFence() const { |
| if (mLastStepSyncFd == -1) { |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| VLOG(EXECUTION) << "wait for mLastStepSyncFd " << mLastStepSyncFd; |
| auto r = syncWait(mLastStepSyncFd, -1); |
| int n = ANEURALNETWORKS_NO_ERROR; |
| if (r != FenceState::SIGNALED) { |
| LOG(ERROR) << "syncWait failed, fd: " << mLastStepSyncFd; |
| n = ANEURALNETWORKS_OP_FAILED; |
| } |
| return n; |
| } |
| |
| // Invocations of Controller::setInput/setOutput in this function must match with invocations of |
| // StepRoleAnalyzer::setUsedBy in the IfStep branch in |
| // ExecutionPlan::CompoundBody::findMemoryStepRoles. |
| int ExecutionPlan::nextCompound(const IfStep* step, std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, |
| SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| VLOG(EXECUTION) << "next: " << *step; |
| // If the last step has a sync fence, wait for it to signal before reading the condition value. |
| // This is safe because the steps are serialized when doing fenced compute. |
| NN_RETURN_IF_ERROR(controller->waitForLastStepSyncFence()); |
| bool condValue; |
| NN_RETURN_IF_ERROR(readConditionValue(controller, step->conditionOperandIndex, &condValue)); |
| controller->mNextStepIndex = condValue ? step->thenStepIndex : step->elseStepIndex; |
| const std::vector<SourceOperandIndex>& branchInputOperands = |
| condValue ? step->thenBranchInputOperands : step->elseBranchInputOperands; |
| const std::vector<SourceOperandIndex>& branchOutputOperands = |
| condValue ? step->thenBranchOutputOperands : step->elseBranchOutputOperands; |
| CHECK_EQ(branchInputOperands.size(), step->outerInputOperands.size()); |
| CHECK_EQ(branchOutputOperands.size(), step->outerOutputOperands.size()); |
| for (uint32_t i = 0, n = step->outerInputOperands.size(); i < n; ++i) { |
| // We have to do this assignment just before executing this step to |
| // accommodate cases when the IF resides within a WHILE condition or |
| // body model and for some j the i-th input of the IF branch model is |
| // - an input of the WHILE condition model (whileStep->condInputOperands[j]), |
| // - an input of the WHILE body model (whileStep->bodyInputOperands[j]), or |
| // - an output of the WHILE body model (whileStep->bodyOutputOperands[j]). |
| // In such cases, the WhileStep modifies the location of |
| // step->outerInputOperands[i] to implement double buffering. |
| controller->setInput(step->outerInputOperands[i], branchInputOperands[i]); |
| } |
| for (uint32_t i = 0, n = step->outerOutputOperands.size(); i < n; ++i) { |
| // We have to do this assignment just before executing this step to |
| // accommodate the case when the IF resides within a WHILE body |
| // model and the i-th output of the IF branch model is an |
| // output of the WHILE body model (whileStep->bodyOutputOperands[j] for |
| // some j). In that case, the WhileStep modifies the location of |
| // step->outerOutputOperands[i] to implement double buffering. |
| controller->setOutput(step->outerOutputOperands[i], branchOutputOperands[i]); |
| } |
| return nextCompound(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| // Invocations of Controller::setInput in this function must match with invocations of |
| // StepRoleAnalyzer::setUsedBy in the WhileStep branch in |
| // ExecutionPlan::CompoundBody::findMemoryStepRoles. |
| int ExecutionPlan::nextCompound(const WhileStep* step, std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, |
| SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| WhileState& state = controller->mWhileState[controller->mNextStepIndex]; |
| if (state.stage == WhileState::EVALUATE_CONDITION) { |
| state.iteration = state.iteration == WhileState::kOutsideLoop ? 0 : state.iteration + 1; |
| VLOG(EXECUTION) << "next: " << *step << ": iteration " << state.iteration |
| << ": evaluating condition"; |
| controller->mNextStepIndex = step->condStepIndex; |
| |
| if (state.iteration == 0) { |
| state.startTime = Clock::now(); |
| } |
| |
| // iteration = 0 cond inputs = outer inputs |
| // iteration = 1 cond inputs = body outputs |
| // iteration = 2 cond inputs = body outputs |
| // iteration = 3 cond inputs = ... |
| uint32_t loopBodyOutputCount = step->bodyOutputOperands.size(); |
| CHECK_EQ(step->condInputOperands.size(), step->outerInputOperands.size()); |
| CHECK_GE(step->condInputOperands.size(), loopBodyOutputCount); |
| for (uint32_t i = 0, n = step->condInputOperands.size(); i < n; ++i) { |
| bool operandIsInputOnly = i >= loopBodyOutputCount; |
| controller->setInput((state.iteration == 0 || operandIsInputOnly) |
| ? step->outerInputOperands[i] |
| : step->bodyOutputOperands[i], |
| step->condInputOperands[i]); |
| } |
| |
| state.stage = WhileState::EVALUATE_BODY; |
| return nextCompound(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| CHECK(state.stage == WhileState::EVALUATE_BODY); |
| std::chrono::nanoseconds timeoutDuration( |
| controller->mExecutionBuilder->getLoopTimeoutDuration()); |
| auto duration = Clock::now() - state.startTime; |
| if (duration > timeoutDuration) { |
| LOG(ERROR) << "WHILE loop timed out after " |
| << std::chrono::duration_cast<std::chrono::milliseconds>(duration).count() |
| << " ms"; |
| return ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT; |
| } |
| |
| // If the last step has a sync fence, wait for it to signal before reading the condition value. |
| // This is safe because the steps are serialized when doing fenced compute. |
| NN_RETURN_IF_ERROR(controller->waitForLastStepSyncFence()); |
| bool condValue; |
| NN_RETURN_IF_ERROR(readConditionValue(controller, step->condOutputOperand, &condValue)); |
| if (condValue) { |
| VLOG(EXECUTION) << "next: " << *step << ": iteration " << state.iteration |
| << ": evaluating body"; |
| controller->mNextStepIndex = step->bodyStepIndex; |
| |
| // iteration = 0 body inputs = cond inputs = outer inputs body outputs = tmp1 |
| // iteration = 1 body inputs = cond inputs = tmp1 body outputs = tmp2 |
| // iteration = 2 body inputs = cond inputs = tmp2 body outputs = tmp1 |
| // iteration = 3 body inputs = cond inputs = ... body outputs = ... |
| #ifdef NN_DEBUGGABLE |
| CHECK_GE(step->bodyInputOperands.size(), step->bodyOutputOperands.size()); |
| CHECK_EQ(step->bodyInputOperands.size(), step->outerInputOperands.size()); |
| CHECK_EQ(step->bodyInputOperands.size(), step->condInputOperands.size()); |
| CHECK_GE(step->bodyOutputOperands.size(), step->outerOutputOperands.size()); |
| #endif |
| for (uint32_t i = 0, n = step->bodyInputOperands.size(); i < n; ++i) { |
| controller->setInput(step->condInputOperands[i], step->bodyInputOperands[i]); |
| } |
| if (state.iteration != 0) { |
| for (const SourceOperandIndex& outputOperand : step->bodyOutputOperands) { |
| #ifdef NN_DEBUGGABLE |
| CHECK_EQ(controller->mSourceOperandToInputIndex.count(outputOperand), 0u); |
| CHECK_EQ(controller->mSourceOperandToOutputIndex.count(outputOperand), 0u); |
| CHECK_EQ(controller->mSourceOperandToLocationOfTemporary.count(outputOperand), 1u); |
| CHECK_EQ(controller->mSourceOperandToLocationOfTemporary2.count(outputOperand), 1u); |
| #endif |
| std::swap(controller->mSourceOperandToLocationOfTemporary[outputOperand], |
| controller->mSourceOperandToLocationOfTemporary2[outputOperand]); |
| } |
| } |
| } else { |
| VLOG(EXECUTION) << "next: " << *step << ": iteration " << state.iteration |
| << ": exiting loop"; |
| controller->mNextStepIndex = step->exitStepIndex; |
| |
| // Copy body outputs to outer outputs. |
| // TODO: Use outer outputs instead of tmp2 to avoid copying? |
| CHECK_LE(step->outerOutputOperands.size(), step->bodyOutputOperands.size()); |
| for (uint32_t i = 0, n = step->outerOutputOperands.size(); i < n; ++i) { |
| // condInputOperands[i] points to a body output operand from the |
| // last iteration if we've executed at least one iteration and to a |
| // WHILE operation input operand otherwise. |
| const SourceOperandIndex& innerOperand = step->condInputOperands[i]; |
| const SourceOperandIndex& outerOperand = step->outerOutputOperands[i]; |
| std::optional<Buffer> outerBuffer = getBuffer(controller, outerOperand); |
| if (outerBuffer == std::nullopt) { |
| // This should never happen. |
| LOG(ERROR) << "Unable to get outerBuffer for operand " << toString(outerOperand); |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| const Operand& sourceOperand = |
| controller->mExecutionBuilder->getSourceOperand(outerOperand); |
| const uint32_t size = TypeManager::get()->getSizeOfData(sourceOperand); |
| CHECK_NE(size, 0u); |
| std::optional<Buffer> innerBuffer = getBuffer(controller, innerOperand); |
| if (innerBuffer == std::nullopt) { |
| // This should never happen. |
| LOG(ERROR) << "Unable to get innerBuffer for operand " << toString(innerOperand); |
| return ANEURALNETWORKS_OP_FAILED; |
| } |
| CHECK_LE(size, innerBuffer->getSize()); |
| CHECK_LE(size, outerBuffer->getSize()); |
| memcpy(outerBuffer->getPointer(), innerBuffer->getPointer(), size); |
| outerBuffer->flush(); |
| } |
| state.iteration = WhileState::kOutsideLoop; |
| } |
| |
| state.stage = WhileState::EVALUATE_CONDITION; |
| return nextCompound(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| int ExecutionPlan::nextCompound(const GotoStep* step, std::shared_ptr<Controller> controller, |
| std::shared_ptr<StepExecutor>* executor, |
| SharedBurst* burstController, |
| const std::vector<OutputShape>* mainModelOutputShapes) const { |
| VLOG(EXECUTION) << "next: " << *step; |
| controller->mNextStepIndex = step->gotoStepIndex; |
| return nextCompound(controller, executor, burstController, mainModelOutputShapes); |
| } |
| |
| std::shared_ptr<StepExecutor> ExecutionPlan::makeStepExecutor( |
| bool reusable, ExecutionBuilder* executionBuilder) const { |
| auto simpleBody = simple(); |
| auto executor = std::make_shared<StepExecutor>(executionBuilder, simpleBody->mModel, |
| simpleBody->mDevice, simpleBody->mPreparedModel, |
| reusable); |
| executor->mapInputsAndOutputsTrivially(); |
| return executor; |
| } |
| |
| void ExecutionPlan::becomeCompoundIfEmpty() { |
| CHECK(mState != SIMPLE); |
| if (mState == EMPTY) { |
| mBody = new CompoundBody(this); |
| mState = COMPOUND; |
| } |
| } |
| |
| ExecutionStep* ExecutionPlan::createNewExecutionStep(uint32_t sourceModelIndex, |
| const std::shared_ptr<Device> device) { |
| becomeCompoundIfEmpty(); |
| auto step = std::make_shared<LogicalStep>(std::in_place_type<ExecutionStep>, this, |
| compound()->mSteps.size(), sourceModelIndex, device); |
| compound()->mSteps.push_back(step); |
| return step->executionStep(); |
| } |
| |
| IfStep* ExecutionPlan::createNewIfStep() { |
| becomeCompoundIfEmpty(); |
| auto step = std::make_shared<LogicalStep>(std::in_place_type<IfStep>); |
| step->ifStep()->index = compound()->mSteps.size(); |
| compound()->mSteps.push_back(step); |
| return step->ifStep(); |
| } |
| |
| WhileStep* ExecutionPlan::createNewWhileStep() { |
| becomeCompoundIfEmpty(); |
| auto step = std::make_shared<LogicalStep>(std::in_place_type<WhileStep>); |
| step->whileStep()->index = compound()->mSteps.size(); |
| compound()->mSteps.push_back(step); |
| return step->whileStep(); |
| } |
| |
| GotoStep* ExecutionPlan::createNewGotoStep() { |
| becomeCompoundIfEmpty(); |
| auto step = std::make_shared<LogicalStep>(std::in_place_type<GotoStep>); |
| step->gotoStep()->index = compound()->mSteps.size(); |
| compound()->mSteps.push_back(step); |
| return step->gotoStep(); |
| } |
| |
| void ExecutionPlan::becomeSingleStep(const std::shared_ptr<Device> device, |
| const ModelBuilder* model) { |
| CHECK(mState == EMPTY); |
| mBody = new SimpleBody(device, model, mCacheInfo, mToken); |
| mState = SIMPLE; |
| } |
| |
| void ExecutionPlan::recordOutputDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex) { |
| auto [it, isNew] = |
| compound()->mOutputToDefiningExecutionStep.emplace(sourceOperandIndex, stepIndex); |
| CHECK(isNew) << "Step " << stepIndex << " redefines output operand " |
| << toString(sourceOperandIndex) << " already defined by step " << it->second; |
| } |
| |
| void ExecutionPlan::recordTemporaryDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex) { |
| auto [it, isNew] = |
| compound()->mTemporaryToDefiningExecutionStep.emplace(sourceOperandIndex, stepIndex); |
| CHECK(isNew) << "Step " << stepIndex << " redefines temporary operand " |
| << toString(sourceOperandIndex) << " already defined by step " << it->second; |
| } |
| |
| void ExecutionPlan::dump() const { |
| if (mBody) { |
| mBody->dump(); |
| } else { |
| VLOG(COMPILATION) << "EMPTY"; |
| } |
| } |
| |
| void ExecutionPlan::reset() { |
| if (mBody) { |
| delete mBody; |
| mBody = nullptr; |
| } |
| mState = EMPTY; |
| } |
| |
| bool ExecutionPlan::isSimpleCpu() const { |
| return isSimple() && simple()->mDevice == DeviceManager::getCpuDevice(); |
| } |
| |
| ExecutionPlan::Kind ExecutionPlan::forTest_getKind() const { |
| switch (mState) { |
| case EMPTY: |
| return Kind::EMPTY; |
| case SIMPLE: |
| CHECK(mBody); |
| return mBody->mSuccessfulFinish ? Kind::SIMPLE : Kind::ERROR; |
| case COMPOUND: |
| CHECK(mBody); |
| return mBody->mSuccessfulFinish ? Kind::COMPOUND : Kind::ERROR; |
| default: |
| LOG(FATAL) << "unexpected state"; |
| return Kind::ERROR; |
| } |
| } |
| |
| std::shared_ptr<const Device> ExecutionPlan::forTest_simpleGetDevice() const { |
| return simple()->mDevice; |
| } |
| |
| const std::vector<std::shared_ptr<LogicalStep>>& ExecutionPlan::forTest_compoundGetSteps() const { |
| return compound()->mSteps; |
| } |
| |
| std::set<uint32_t> ExecutionPlan::forTest_flatGetDynamicTemporaries() const { |
| CHECK_EQ(getSourceModels().size(), size_t(1)); |
| std::set<uint32_t> ret; |
| forEachDynamicTemporary([&ret](SourceOperandIndex dynTemp, const Operand&, uint32_t) { |
| ret.insert(dynTemp.second); |
| }); |
| return ret; |
| } |
| |
| bool ExecutionPlan::hasDynamicTemporaries() const { |
| return mBody == nullptr ? false : mBody->hasDynamicTemporaries(); |
| } |
| |
| bool ExecutionPlan::forTest_hasStepModelWithNoInputsOrNoOutputs() const { |
| return mBody == nullptr ? false : mBody->hasStepModelWithNoInputsOrNoOutputs(); |
| } |
| |
| bool ExecutionPlan::CompoundBody::hasStepModelWithNoInputsOrNoOutputs() const { |
| return std::any_of(mSteps.begin(), mSteps.end(), [](const auto& logicalStep) { |
| const ExecutionStep* step = logicalStep->tryExecutionStep(); |
| return step != nullptr && step->hasNoInputsOrNoOutputs(); |
| }); |
| } |
| |
| const uint8_t* ExecutionPlan::forTest_simpleGetCacheToken() const { |
| return simple()->mToken.getCacheToken(); |
| } |
| |
| void ExecutionPlan::SimpleBody::dump() const { |
| VLOG(COMPILATION) << "SIMPLE for " << mDevice->getName(); |
| } |
| |
| void ExecutionPlan::CompoundBody::dump() const { |
| for (const auto& step : mSteps) { |
| step->dump(); |
| } |
| } |
| |
| SourceOperandIndex ExecutionPlan::getInputSourceOperand(uint32_t index) const { |
| const auto* mainModel = getSourceModels().getModel(kMainModelInSourceModels); |
| CHECK_LT(index, mainModel->inputCount()); |
| const auto operandIndex = mainModel->getInputOperandIndex(index); |
| return {kMainModelInSourceModels, operandIndex}; |
| } |
| |
| SourceOperandIndex ExecutionPlan::getOutputSourceOperand(uint32_t index) const { |
| const auto* mainModel = getSourceModels().getModel(kMainModelInSourceModels); |
| CHECK_LT(index, mainModel->outputCount()); |
| const auto operandIndex = mainModel->getOutputOperandIndex(index); |
| return {kMainModelInSourceModels, operandIndex}; |
| } |
| |
| void ExecutionPlan::SimpleBody::forEachStepRoleOfInput(uint32_t index, |
| const StepRoleCallback& callback) const { |
| callback(mPreparedModel.get(), IOType::INPUT, index); |
| } |
| |
| void ExecutionPlan::SimpleBody::forEachStepRoleOfOutput(uint32_t index, |
| const StepRoleCallback& callback) const { |
| callback(mPreparedModel.get(), IOType::OUTPUT, index); |
| } |
| |
| // Map an input role of the main model to the input/output roles in the step models. |
| void ExecutionPlan::CompoundBody::forEachStepRoleOfInput(uint32_t index, |
| const StepRoleCallback& callback) const { |
| const auto sourceOperandIndex = mPlan->getInputSourceOperand(index); |
| forEachStepRoleOfSourceOperand(sourceOperandIndex, callback); |
| } |
| |
| // Map an output role of the main model to the input/output roles in the step models. |
| void ExecutionPlan::CompoundBody::forEachStepRoleOfOutput(uint32_t index, |
| const StepRoleCallback& callback) const { |
| const auto sourceOperandIndex = mPlan->getOutputSourceOperand(index); |
| forEachStepRoleOfSourceOperand(sourceOperandIndex, callback); |
| } |
| |
| void ExecutionPlan::CompoundBody::forEachStepRoleOfSourceOperand( |
| const SourceOperandIndex& index, const StepRoleCallback& callback) const { |
| const auto it = mSourceOperandToStepRoles.find(index); |
| if (it == mSourceOperandToStepRoles.end()) return; |
| for (const auto& [stepIndex, type, ioIndex] : it->second) { |
| CHECK_LT(stepIndex, mSteps.size()); |
| const auto* step = mSteps[stepIndex]->executionStep(); |
| callback(step->getPreparedStepModel().get(), type, ioIndex); |
| } |
| } |
| |
| MemoryPreference ExecutionPlan::getMemoryPreference(IOType type, uint32_t index) const { |
| CHECK(mState == SIMPLE || mState == COMPOUND); |
| if (mState == SIMPLE) { |
| return simple()->mPreparedModel->getMemoryPreference(); |
| } else { |
| const auto sourceOperandIndex = type == IOType::INPUT ? getInputSourceOperand(index) |
| : getOutputSourceOperand(index); |
| return compound()->getMemoryPreferenceOfSourceOperand(sourceOperandIndex); |
| } |
| } |
| |
| MemoryPreference ExecutionPlan::CompoundBody::getMemoryPreferenceOfSourceOperand( |
| const SourceOperandIndex& index) const { |
| uint32_t alignment = kMinMemoryAlignment, padding = kMinMemoryPadding; |
| forEachStepRoleOfSourceOperand( |
| index, [&alignment, &padding](const auto* preparedModel, IOType, uint32_t) { |
| const auto preference = preparedModel->getMemoryPreference(); |
| alignment = std::max(alignment, preference.alignment); |
| padding = std::max(padding, preference.padding); |
| }); |
| return {alignment, padding}; |
| } |
| |
| void ExecutionPlan::forEachDynamicTemporary( |
| const std::function<void(SourceOperandIndex, const Operand&, uint32_t definingStepIndex)>& |
| fn) const { |
| if (mState != COMPOUND) { |
| return; |
| } |
| |
| for (const auto& logicalStep : compound()->mSteps) { |
| if (const ExecutionStep* step = logicalStep->tryExecutionStep()) { |
| const uint32_t stepIndex = step->getIndex(); |
| const uint32_t sourceModelIndex = step->getSourceModelIndex(); |
| for (const auto& entry : step->getTempsAsStepModelOutputs()) { |
| const auto sourceOperandIndex = SourceOperandIndex(sourceModelIndex, entry.first); |
| const auto& sourceOperand = getSourceOperand(sourceOperandIndex); |
| if (hasUnknownSize(sourceOperand)) { |
| fn(sourceOperandIndex, sourceOperand, stepIndex); |
| } |
| } |
| } |
| } |
| } |
| |
| int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, |
| uint32_t preference, uint32_t priority, |
| const OptionalTimePoint& deadline, ExecutionPlan* plan, |
| const std::vector<TokenValuePair>& metaData, |
| int simulateFailureResultCode) const { |
| uint32_t sourceModelIndex = plan->getSourceModels().addModel(this); |
| NN_RETURN_IF_ERROR(partitionTheWorkInternal(sourceModelIndex, devices, preference, priority, |
| deadline, plan)); |
| int n = plan->finish(preference, priority, deadline, metaData, simulateFailureResultCode); |
| if (VLOG_IS_ON(COMPILATION)) { |
| VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: source model: "; |
| logModelToInfo(makeModel()); |
| plan->dump(); |
| } |
| return n; |
| } |
| |
| int ModelBuilder::partitionTheWorkInternal(uint32_t sourceModelIndex, |
| const std::vector<std::shared_ptr<Device>>& devices, |
| uint32_t preference, uint32_t priority, |
| const OptionalTimePoint& deadline, |
| ExecutionPlan* plan) const { |
| // This function uses a heuristic approach to partitioning the graph. |
| // It should be good enough for the first release. |
| |
| SourceModels* sourceModels = &plan->getSourceModels(); |
| const size_t deviceCount = devices.size(); |
| const size_t operationCount = mOperations.size(); |
| |
| VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: " |
| << "sourceModelIndex = " << sourceModelIndex << ", " |
| << "deviceCount = " << deviceCount << ", " |
| << "operationCount = " << operationCount; |
| |
| // Figure out where each operation will best execute. |
| // The value of the vector is the index in the devices vector. |
| std::vector<int> bestDeviceForOperation(operationCount); |
| NN_RETURN_IF_ERROR( |
| findBestDeviceForEachOperation(preference, devices, &bestDeviceForOperation)); |
| |
| // A special value produced by findBestDeviceForEachOperation meaning that |
| // this is a control flow operation scheduled for interpreted execution |
| // (see LogicalStep). |
| const int kControlFlowInterpreter = deviceCount; |
| |
| // If one device will run all the operations, we don't need to split the |
| // work. This shortcut does not apply when recursively partitioning |
| // referenced models because our plan representation is flat. |
| if (sourceModelIndex == kMainModelInSourceModels && |
| std::adjacent_find(bestDeviceForOperation.begin(), bestDeviceForOperation.end(), |
| std::not_equal_to<int>()) == bestDeviceForOperation.end()) { |
| const int bestDeviceIndex = bestDeviceForOperation[0]; |
| // Bypass the partitioning process unless the only operation is a |
| // control flow operation scheduled for interpreted execution. |
| if (bestDeviceIndex != kControlFlowInterpreter) { |
| VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: only one best device: " |
| << bestDeviceIndex << " = " << devices[bestDeviceIndex]->getName(); |
| plan->becomeSingleStep(devices[bestDeviceIndex], this); |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| } |
| |
| // No easy solution, we need to split the work. |
| |
| // We keep track of the operations that are ready to run for each device. |
| // perDeviceQueue[deviceCount] is for interpreted execution of control flow |
| // (see LogicalStep). |
| std::vector<std::queue<uint32_t>> perDeviceQueue(deviceCount + 1); |
| |
| // This helper function produces a device name. |
| auto deviceName = [&devices, kControlFlowInterpreter, |
| deviceCount](int deviceIndex) -> std::string { |
| if (deviceIndex == kControlFlowInterpreter) { |
| return "NNAPI"; |
| } else if (deviceIndex < 0 || size_t(deviceIndex) >= deviceCount) { |
| return "{unknown}"; |
| } else { |
| return devices.at(deviceIndex)->getName(); |
| } |
| }; |
| |
| // This helper function enqueues the operation on the appropriate queue. |
| auto enqueueOnAppropriateDevice = [&](uint32_t operationIndex) { |
| int deviceIndex = bestDeviceForOperation[operationIndex]; |
| perDeviceQueue[deviceIndex].push(operationIndex); |
| VLOG(COMPILATION) << "enqueueOnAppropriateDevice " << operationIndex << " onto " |
| << deviceIndex << " (" << deviceName(deviceIndex) << ")"; |
| }; |
| |
| // This helper function finds a device that has operations ready to process. |
| // We start by looking at the control flow queue, and then look at the |
| // devices in reverse order (i.e., starting at the end of the devices |
| // vector). Earlier devices have a chance to prepare more of the inputs |
| // required by other devices. This function returns -1 if all queues are |
| // empty. |
| auto findNextDeviceToProcess = [&]() -> int { |
| for (int i = perDeviceQueue.size() - 1; i >= 0; i--) { |
| if (!perDeviceQueue[i].empty()) { |
| return i; |
| } |
| } |
| return -1; |
| }; |
| |
| OperandTracker tracker(this, enqueueOnAppropriateDevice); |
| // For each iteration of this loop, we'll create either an execution step or |
| // an interpreted control flow construct (including nested execution steps |
| // and interpreted control flow constructs). |
| while (true) { |
| // Find the device we'll do this step for. |
| int deviceIndex = findNextDeviceToProcess(); |
| VLOG(COMPILATION) << "findNextDeviceToProcess: " << deviceIndex << " (" |
| << deviceName(deviceIndex) << ")"; |
| if (deviceIndex < 0) { |
| break; |
| } |
| |
| // Assign as much as possible to this device. |
| auto& queue = perDeviceQueue[deviceIndex]; |
| if (deviceIndex != kControlFlowInterpreter) { |
| ExecutionStep* step = |
| plan->createNewExecutionStep(sourceModelIndex, devices[deviceIndex]); |
| while (!queue.empty()) { |
| uint32_t operationIndex = queue.front(); |
| queue.pop(); |
| int n = step->addOperation(operationIndex); |
| if (n != ANEURALNETWORKS_NO_ERROR) { |
| LOG(ERROR) << "failed to add operation " << operationIndex << " to step"; |
| return n; |
| } |
| tracker.markProcessed(operationIndex, enqueueOnAppropriateDevice); |
| } |
| } else { |
| while (!queue.empty()) { |
| uint32_t operationIndex = queue.front(); |
| queue.pop(); |
| const Operation& operation = getOperation(operationIndex); |
| if (operation.type == OperationType::IF) { |
| namespace op = operation_if; |
| const Operand& thenOperand = |
| getOperand(operation.inputs[op::kThenModelOperand]); |
| const Operand& elseOperand = |
| getOperand(operation.inputs[op::kElseModelOperand]); |
| const ModelBuilder* thenModel = getReferencedModel(thenOperand); |
| const ModelBuilder* elseModel = getReferencedModel(elseOperand); |
| uint32_t thenModelIndex = sourceModels->addModel(thenModel); |
| uint32_t elseModelIndex = sourceModels->addModel(elseModel); |
| |
| // Emits the following: |
| // Index Step |
| // i if then=(i + 1) else=(j + 1) |
| // ... (then model steps) |
| // j goto k |
| // ... (else model steps) |
| // k (steps after the IF) |
| IfStep* ifStep = plan->createNewIfStep(); |
| ifStep->conditionOperandIndex = SourceOperandIndex( |
| sourceModelIndex, operation.inputs[op::kCondBoolOperand]); |
| ifStep->thenStepIndex = plan->getNextStepIndex(); |
| NN_RETURN_IF_ERROR(thenModel->partitionTheWorkInternal( |
| thenModelIndex, devices, preference, priority, deadline, plan)); |
| GotoStep* afterThenBranch = plan->createNewGotoStep(); |
| ifStep->elseStepIndex = plan->getNextStepIndex(); |
| NN_RETURN_IF_ERROR(elseModel->partitionTheWorkInternal( |
| elseModelIndex, devices, preference, priority, deadline, plan)); |
| afterThenBranch->gotoStepIndex = plan->getNextStepIndex(); |
| |
| // Outer model operands. |
| for (uint32_t i = op::kFirstInput, n = operation.inputs.size(); i < n; ++i) { |
| ifStep->outerInputOperands.emplace_back(sourceModelIndex, |
| operation.inputs[i]); |
| } |
| for (uint32_t i = 0, n = operation.outputs.size(); i < n; ++i) { |
| ifStep->outerOutputOperands.emplace_back(sourceModelIndex, |
| operation.outputs[i]); |
| } |
| // Then model operands. |
| for (uint32_t i = 0, n = thenModel->inputCount(); i < n; ++i) { |
| ifStep->thenBranchInputOperands.emplace_back( |
| thenModelIndex, thenModel->getInputOperandIndex(i)); |
| } |
| for (uint32_t i = 0, n = thenModel->outputCount(); i < n; ++i) { |
| ifStep->thenBranchOutputOperands.emplace_back( |
| thenModelIndex, thenModel->getOutputOperandIndex(i)); |
| } |
| // Else model operands. |
| for (uint32_t i = 0, n = elseModel->inputCount(); i < n; ++i) { |
| ifStep->elseBranchInputOperands.emplace_back( |
| elseModelIndex, elseModel->getInputOperandIndex(i)); |
| } |
| for (uint32_t i = 0, n = elseModel->outputCount(); i < n; ++i) { |
| ifStep->elseBranchOutputOperands.emplace_back( |
| elseModelIndex, elseModel->getOutputOperandIndex(i)); |
| } |
| } else if (operation.type == OperationType::WHILE) { |
| namespace op = operation_while; |
| const Operand& condOperand = |
| getOperand(operation.inputs[op::kCondModelOperand]); |
| const Operand& bodyOperand = |
| getOperand(operation.inputs[op::kBodyModelOperand]); |
| const ModelBuilder* condModel = getReferencedModel(condOperand); |
| const ModelBuilder* bodyModel = getReferencedModel(bodyOperand); |
| uint32_t condModelIndex = sourceModels->addModel(condModel); |
| uint32_t bodyModelIndex = sourceModels->addModel(bodyModel); |
| |
| // Emits the following: |
| // Index Step |
| // i while cond=(i + 1) body=(j + 1) exit=(k + 1) |
| // ... (cond model steps) |
| // j goto i |
| // ... (body model steps) |
| // k goto i |
| // ... (steps after the WHILE) |
| // |
| // Note that WhileStep has WhileState associated with it. |
| WhileStep* whileStep = plan->createNewWhileStep(); |
| whileStep->condStepIndex = plan->getNextStepIndex(); |
| NN_RETURN_IF_ERROR(condModel->partitionTheWorkInternal( |
| condModelIndex, devices, preference, priority, deadline, plan)); |
| GotoStep* afterCond = plan->createNewGotoStep(); |
| afterCond->gotoStepIndex = whileStep->index; |
| whileStep->bodyStepIndex = plan->getNextStepIndex(); |
| NN_RETURN_IF_ERROR(bodyModel->partitionTheWorkInternal( |
| bodyModelIndex, devices, preference, priority, deadline, plan)); |
| GotoStep* afterBody = plan->createNewGotoStep(); |
| afterBody->gotoStepIndex = whileStep->index; |
| whileStep->exitStepIndex = plan->getNextStepIndex(); |
| |
| // Outer model operands. |
| for (uint32_t i = op::kFirstInput, n = operation.inputs.size(); i < n; ++i) { |
| whileStep->outerInputOperands.emplace_back(sourceModelIndex, |
| operation.inputs[i]); |
| } |
| for (uint32_t i = 0, n = operation.outputs.size(); i < n; ++i) { |
| whileStep->outerOutputOperands.emplace_back(sourceModelIndex, |
| operation.outputs[i]); |
| } |
| // Cond model operands. |
| for (uint32_t i = 0, n = condModel->inputCount(); i < n; ++i) { |
| whileStep->condInputOperands.emplace_back( |
| condModelIndex, condModel->getInputOperandIndex(i)); |
| } |
| whileStep->condOutputOperand = |
| SourceOperandIndex(condModelIndex, condModel->getOutputOperandIndex(0)); |
| // Body model operands. |
| for (uint32_t i = 0, n = bodyModel->inputCount(); i < n; ++i) { |
| whileStep->bodyInputOperands.emplace_back( |
| bodyModelIndex, bodyModel->getInputOperandIndex(i)); |
| } |
| for (uint32_t i = 0, n = bodyModel->outputCount(); i < n; ++i) { |
| whileStep->bodyOutputOperands.emplace_back( |
| bodyModelIndex, bodyModel->getOutputOperandIndex(i)); |
| } |
| } else { |
| CHECK(false) << operation.type << " is not a control flow operation"; |
| } |
| tracker.markProcessed(operationIndex, enqueueOnAppropriateDevice); |
| } |
| } |
| } |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| float ModelBuilder::getPerformance(uint32_t preference, |
| const std::shared_ptr<Device> device) const { |
| // Note that we will call this method multiple times per compilation with |
| // the same arguments if there are nested control flow operations and we |
| // decide to execute the outer operation on the ExecutionPlan::next() |
| // interpreter. |
| // |
| // This is a potential compilation performance problem. To work around it, |
| // the performance value could be cached for the duration of a compilation. |
| float perf = 0; |
| const size_t operationCount = mOperations.size(); |
| for (size_t operationIndex = 0; operationIndex < operationCount; operationIndex++) { |
| perf += getPerformance(preference, device, operationIndex); |
| } |
| return perf; |
| } |
| |
| float ModelBuilder::getPerformance(uint32_t preference, const std::shared_ptr<Device> device, |
| uint32_t operationIndex) const { |
| auto applyPreference = [preference](const Capabilities::PerformanceInfo& perf) { |
| return preference == ANEURALNETWORKS_PREFER_LOW_POWER ? perf.powerUsage : perf.execTime; |
| }; |
| |
| const Operation& operation = getOperation(operationIndex); |
| |
| if (operation.type == OperationType::IF) { |
| namespace op = operation_if; |
| const Operand& thenOperand = getOperand(operation.inputs[op::kThenModelOperand]); |
| const Operand& elseOperand = getOperand(operation.inputs[op::kElseModelOperand]); |
| const ModelBuilder* thenModel = getReferencedModel(thenOperand); |
| const ModelBuilder* elseModel = getReferencedModel(elseOperand); |
| return applyPreference(device->getIfPerformance()) + |
| 0.5 * (thenModel->getPerformance(preference, device) + |
| elseModel->getPerformance(preference, device)); |
| } |
| |
| if (operation.type == OperationType::WHILE) { |
| namespace op = operation_while; |
| const Operand& condOperand = getOperand(operation.inputs[op::kCondModelOperand]); |
| const Operand& bodyOperand = getOperand(operation.inputs[op::kBodyModelOperand]); |
| const ModelBuilder* condModel = getReferencedModel(condOperand); |
| const ModelBuilder* bodyModel = getReferencedModel(bodyOperand); |
| return applyPreference(device->getWhilePerformance()) + |
| condModel->getPerformance(preference, device) + |
| bodyModel->getPerformance(preference, device); |
| } |
| |
| // TODO This assumes that the type is dictated by the first operand. This is |
| // currently the case but is not a safe assumption to make in the long term. |
| const uint32_t operandIndex = operation.inputs[0]; |
| const OperandType operandType = mOperands[operandIndex].type; |
| switch (operandType) { |
| case OperandType::FLOAT32: |
| if (mRelaxComputationFloat32toFloat16) { |
| return applyPreference(device->getRelaxedFloat32toFloat16PerformanceScalar()); |
| } |
| break; |
| case OperandType::TENSOR_FLOAT32: |
| if (mRelaxComputationFloat32toFloat16) { |
| return applyPreference(device->getRelaxedFloat32toFloat16PerformanceTensor()); |
| } |
| break; |
| default: |
| break; |
| } |
| |
| return applyPreference(device->getPerformance(operandType)); |
| } |
| |
| bool ModelBuilder::isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const { |
| auto containsUnknownSize = [](const ModelBuilder* model, |
| const std::vector<uint32_t>& operandIndexes) { |
| for (uint32_t operandIndex : operandIndexes) { |
| if (hasUnknownSize(model->getOperand(operandIndex))) { |
| return true; |
| } |
| } |
| return false; |
| }; |
| |
| const Operation& operation = getOperation(operationIndex); |
| |
| if (operation.type == OperationType::IF) { |
| namespace op = operation_if; |
| const Operand& thenOperand = getOperand(operation.inputs[op::kThenModelOperand]); |
| const Operand& elseOperand = getOperand(operation.inputs[op::kElseModelOperand]); |
| const ModelBuilder* thenModel = getReferencedModel(thenOperand); |
| const ModelBuilder* elseModel = getReferencedModel(elseOperand); |
| return containsUnknownSize(this, operation.inputs) || |
| containsUnknownSize(this, operation.outputs) || |
| containsUnknownSize(thenModel, thenModel->getInputOperandIndexes()) || |
| containsUnknownSize(thenModel, thenModel->getOutputOperandIndexes()) || |
| containsUnknownSize(elseModel, elseModel->getInputOperandIndexes()) || |
| containsUnknownSize(elseModel, elseModel->getOutputOperandIndexes()); |
| } |
| |
| if (operation.type == OperationType::WHILE) { |
| namespace op = operation_while; |
| const Operand& condOperand = getOperand(operation.inputs[op::kCondModelOperand]); |
| const Operand& bodyOperand = getOperand(operation.inputs[op::kBodyModelOperand]); |
| const ModelBuilder* condModel = getReferencedModel(condOperand); |
| const ModelBuilder* bodyModel = getReferencedModel(bodyOperand); |
| return containsUnknownSize(this, operation.inputs) || |
| containsUnknownSize(this, operation.outputs) || |
| containsUnknownSize(condModel, condModel->getInputOperandIndexes()) || |
| containsUnknownSize(condModel, condModel->getOutputOperandIndexes()) || |
| containsUnknownSize(bodyModel, bodyModel->getInputOperandIndexes()) || |
| containsUnknownSize(bodyModel, bodyModel->getOutputOperandIndexes()); |
| } |
| |
| // Not a control flow operation. |
| return false; |
| } |
| |
| bool ModelBuilder::supportedByControlFlowInterpreter(uint32_t operationIndex) const { |
| const Operation& operation = getOperation(operationIndex); |
| return (operation.type == OperationType::IF || operation.type == OperationType::WHILE) && |
| // The partitioner does not support dynamic temporaries (b/132458982). |
| !isControlFlowOperationWithOperandOfUnknownSize(operationIndex); |
| } |
| |
| namespace { |
| |
| // This class determines whether a given device can execute a given operation |
| class CanDo { |
| public: |
| CanDo() {} |
| |
| void initialize(const MetaModel& metaModel, std::shared_ptr<Device> device) { |
| mSupportsOperationByIndex = device->getSupportedOperations(metaModel); |
| } |
| |
| bool check(size_t operationIndex) const { return mSupportsOperationByIndex[operationIndex]; } |
| |
| private: |
| std::vector<bool> mSupportsOperationByIndex; |
| }; |
| |
| } // anonymous namespace |
| |
| int ModelBuilder::findBestDeviceForEachOperation( |
| uint32_t preference, const std::vector<std::shared_ptr<Device>>& devices, |
| std::vector<int>* bestDeviceForOperation) const { |
| const MetaModel metaModel(makeModel(), DeviceManager::get()->strictSlicing()); |
| |
| const size_t deviceCount = devices.size(); |
| std::vector<CanDo> canDo(deviceCount); |
| for (size_t deviceIndex = 0; deviceIndex < deviceCount; deviceIndex++) { |
| canDo[deviceIndex].initialize(metaModel, devices[deviceIndex]); |
| } |
| |
| // Figure out the best driver for each operation. |
| const size_t operationCount = mOperations.size(); |
| for (size_t operationIndex = 0; operationIndex < operationCount; operationIndex++) { |
| const Operation& operation = getOperation(operationIndex); |
| // Find which device, including CPU fallback, gives the best performance for this operation. |
| int bestChoice = -1; |
| |
| if (isControlFlowOperationWithOperandOfUnknownSize(operationIndex)) { |
| // Do not schedule control flow operations with unknown size to |
| // non-CPU devices because this is not supported by the 1.3 HAL. |
| // See http://b/159076604#comment5. |
| auto cpuDeviceIterator = |
| std::find(devices.begin(), devices.end(), DeviceManager::getCpuDevice()); |
| if (cpuDeviceIterator != devices.end()) { |
| int cpuDeviceIndex = cpuDeviceIterator - devices.begin(); |
| if (canDo[cpuDeviceIndex].check(operationIndex)) { |
| bestChoice = cpuDeviceIndex; |
| } |
| } |
| } else { |
| float bestPerfVal = 0.0; // Do not check bestPerfVal if bestChoice < 0. |
| for (size_t deviceIndex = 0; deviceIndex < deviceCount; deviceIndex++) { |
| const auto& device = devices[deviceIndex]; |
| if (canDo[deviceIndex].check(operationIndex)) { |
| const float perfVal = getPerformance(preference, device, operationIndex); |
| const bool deviceIsPreferred = (device == DeviceManager::getCpuDevice()); |
| if (bestChoice < 0 || perfVal < bestPerfVal || |
| (perfVal == bestPerfVal && deviceIsPreferred)) { |
| bestChoice = deviceIndex; |
| bestPerfVal = perfVal; |
| } |
| } else { |
| // Somewhat noisy logging, but only place where the user of NNAPI can get |
| // feedback on why an operation was not run on a specific device. |
| // |
| // Logs O(operationCount * deviceCount) times, but typically deviceCount is |
| // very small. |
| VLOG(COMPILATION) << "Device " << device->getName() << " can't do operation " |
| << operation.type << ":" << operationIndex; |
| } |
| } |
| } |
| |
| if (bestChoice < 0) { |
| LOG(ERROR) << "No driver can do operation " << operation.type; |
| return ANEURALNETWORKS_BAD_DATA; |
| } else if (devices[bestChoice] == DeviceManager::getCpuDevice() && |
| supportedByControlFlowInterpreter(operationIndex)) { |
| // Run control flow on the ExecutionPlan::next() interpreter and try |
| // to delegate referenced models. |
| const int kControlFlowInterpreter = deviceCount; |
| (*bestDeviceForOperation)[operationIndex] = kControlFlowInterpreter; |
| VLOG(COMPILATION) << "ModelBuilder::findBestDeviceForEachOperation(" << operation.type |
| << ":" << operationIndex << ") = -1 (NNAPI)"; |
| } else { |
| (*bestDeviceForOperation)[operationIndex] = bestChoice; |
| VLOG(COMPILATION) << "ModelBuilder::findBestDeviceForEachOperation(" << operation.type |
| << ":" << operationIndex << ") = " << bestChoice << " (" |
| << devices[bestChoice]->getName() << ")"; |
| } |
| } |
| return ANEURALNETWORKS_NO_ERROR; |
| } |
| |
| } // namespace nn |
| } // namespace android |