| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <ControlFlow.h> |
| #include <HalInterfaces.h> |
| #include <SampleDriver.h> |
| #include <Utils.h> |
| #include <ValidateHal.h> |
| #include <gtest/gtest.h> |
| |
| #include <algorithm> |
| #include <filesystem> |
| #include <functional> |
| #include <iostream> |
| #include <map> |
| #include <memory> |
| #include <numeric> |
| #include <queue> |
| #include <set> |
| #include <string> |
| #include <tuple> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include "CompilationBuilder.h" |
| #include "ExecutionPlan.h" |
| #include "HalUtils.h" |
| #include "Manager.h" |
| #include "ModelBuilder.h" |
| #include "NeuralNetworks.h" |
| #include "NeuralNetworksOEM.h" |
| #include "TestNeuralNetworksWrapper.h" |
| #include "TmpDirectoryUtils.h" |
| |
| // Uncomment the following line to generate some debugging output that |
| // may be useful when analyzing failures: |
| // |
| // #define VERBOSE VERBOSE |
| |
| // These tests do whitebox testing of the graph partitioning |
| // algorithm. It is "whitebox" in the sense that we're not evaluating |
| // whether a particular partitioning is legal, or "good enough" |
| // according to some metric, but whether it exactly matches the |
| // expected behavior of the current partitioning algorithm. |
| // |
| // A key part of the current partitioning algorithm is to determine |
| // which device among the available devices should be the one to |
| // execute a particular operation from the graph. This determination |
| // is made "locally" -- i.e., it does not depend on the graph |
| // topology, only on the properties of the operation in question. |
| // IDevice::getSupportedOperations() indicates which operations in a |
| // graph can be executed on a device, and IDevice::getCapabilities() |
| // indicates how "good" that device is for executing particular kinds |
| // of operations. For each operation, the partitioning algorithm |
| // picks the "best" device that is capable of executing that |
| // operation; if no device can do so, then the algorithm picks the |
| // cpu. |
| // |
| // As part of this testing approach, we want to make it easy to |
| // specify which operations in a test graph can be executed on which |
| // devices. We accomplish this in the following way: |
| // - A unary OEM operation is available. |
| // - There is a collection of operations (each of which has two inputs |
| // and one output): |
| // - Eight kinds of operations available at driver version V1_0 or |
| // later. They are represented in the graph as ADD or MUL with a |
| // particular activation function -- two opcodes times four |
| // activation functions means eight available operation kinds. |
| // This is a low-level representation detail -- when we specify the |
| // behavior of the device or build a graph, we do so in terms of |
| // operation encodings 0..7. |
| // - Eight kinds of operations available at driver version V1_1 or |
| // later. They are represented in the graph as DIV or SUB with |
| // a particular activation function, exactly analogous to ADD |
| // and MUL above. We use operation encodings 8..15 for them. |
| // - Four kinds of operations available at driver version V1_2 or |
| // later. They are represented in the graph as MAXIMUM, |
| // MINIMUM, POW, or PRELU. These operations take no activation |
| // function, so we only get 4 operation kinds, for which we |
| // use operation encodings 16..19. |
| // - There is another collection of operations (each of which has one input |
| // and one output): |
| // - Single operation available at driver version V1_3 or |
| // later. It is represented in the graph as HARD_SWISH. |
| // These operations take no activation function, for which we |
| // use operation encodings 20..20. |
| |
| // When we instantiate a device for testing purposes, we specify what subset of |
| // those operations the device is able to execute. |
| // |
| // In order to determine whether or not a partitioning matches the |
| // expected partitioning, we check the number of partitions, check |
| // which device each partition targets, and compare each partition's |
| // subgraph, model inputs, model outputs, step model inputs, and |
| // step model outputs against what is expected. In order to perform |
| // that comparison, we build a model to compare against a partition's |
| // step model and run a graph comparison algorithm on it. The graph |
| // comparison and the inputs and outputs comparisons are syntactic |
| // rather than semantic comparisons -- they don't allow for |
| // reorderings of inputs and outputs. Because of this, we need to |
| // know exactly how the partitioning algorithm orders inputs and |
| // outputs in order to construct the models and operand lists to |
| // compare against. Here are some relevant behaviors of the |
| // partitioning algorithm: |
| // |
| // - It builds a subgraph by walking operations in forward topological |
| // order, and adding each operation's input operands and output |
| // operands in index order (input followed by output) when that |
| // operation is added. (It does not add an input that has already |
| // been added.) |
| // - It finds model inputs, model outputs, and step model inputs in |
| // the order the corresponding operands were added to the subgraph |
| // (see ExecutionStep methods getModelInputs(), getModelOutputs(), |
| // getTempsAsStepModelInputs(), getOutputsAsStepModelInputs()). |
| // - It finds temps as step model outputs in numerical order of corresponding |
| // operand number in the original model (see ExecutionStep method |
| // getTempsAsStepModelOutputs()). |
| // - When it calls identifyInputsAndOutputs() on the step model, it |
| // passes inputs from getModelInputs() in order, followed by temps as |
| // step model inputs from getTempsAsStepModelInputs() in order, |
| // followed by outputs as step model inputs from |
| // getOutputsAsStepModelInputs() in order; and it passes outputs from |
| // getModelOutputs() in order followed by step model outputs from |
| // getTempsAsStepModelOutputs() in order. |
| // |
| // TODO: Maybe the logic for comparing a partition to an expected |
| // model should be changed to tolerate reorderings of inputs and |
| // outputs, so that when we build models and lists to compare |
| // against, we don't need to worry about input and output |
| // orderings. But is there a way to do this that still lets us |
| // verify that we have the correct relationships between |
| // an (original) model's inputs and outputs and each step model's |
| // inputs and outputs, as well as the correct relationship |
| // between step model inputs and outputs across partitions? |
| |
| namespace { |
| |
| namespace hardware = android::hardware; |
| namespace V1_0 = ::android::hardware::neuralnetworks::V1_0; |
| namespace V1_1 = ::android::hardware::neuralnetworks::V1_1; |
| namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; |
| namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; |
| using CompilationBuilder = ::android::nn::CompilationBuilder; |
| using Device = ::android::nn::Device; |
| using DeviceManager = ::android::nn::DeviceManager; |
| using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference; |
| using ExecutePriority = ::android::nn::test_wrapper::ExecutePriority; |
| using ExecutionPlan = ::android::nn::ExecutionPlan; |
| using ExecutionStep = ::android::nn::ExecutionStep; |
| using HalCacheToken = ::android::nn::HalCacheToken; |
| using HalVersion = ::android::nn::HalVersion; |
| using HidlModel = V1_3::Model; |
| using IOType = ::android::nn::IOType; |
| using LogicalStep = ::android::nn::LogicalStep; |
| using ModelBuilder = ::android::nn::ModelBuilder; |
| using Operand = ::android::nn::Operand; |
| using Operation = ::android::nn::Operation; |
| using OptionalTimePoint = ::android::nn::OptionalTimePoint; |
| using Result = ::android::nn::test_wrapper::Result; |
| using SampleDriver = ::android::nn::sample_driver::SampleDriver; |
| using SharedDevice = ::android::nn::SharedDevice; |
| using SourceOperandIndex = ::android::nn::SourceOperandIndex; |
| using StepRole = ::android::nn::StepRole; |
| using WrapperCompilation = ::android::nn::test_wrapper::Compilation; |
| using WrapperExecution = ::android::nn::test_wrapper::Execution; |
| using WrapperModel = ::android::nn::test_wrapper::Model; |
| using WrapperOperandType = ::android::nn::test_wrapper::OperandType; |
| using WrapperSymmPerChannelQuantParams = ::android::nn::test_wrapper::SymmPerChannelQuantParams; |
| using WrapperType = ::android::nn::test_wrapper::Type; |
| using android::sp; |
| |
| void update(V1_3::Capabilities* capabilities, V1_3::OperandType type, float perf) { |
| V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf}; |
| ::android::nn::update(&capabilities->operandPerformance, type, perfInfo); |
| } |
| |
| float lookupExecTime(const V1_3::Capabilities& capabilities, V1_3::OperandType type) { |
| return ::android::nn::lookup(capabilities.operandPerformance, type).execTime; |
| } |
| |
| HalVersion min(HalVersion a, HalVersion b) { |
| return int32_t(a) < int32_t(b) ? a : b; |
| } |
| |
| const uint32_t kNumFuseCodes = 4; |
| const uint32_t kBadOperation = ~0; |
| |
| // V1_0 operations |
| const uint32_t kFirstEncodingADD = 0; |
| const uint32_t kFirstEncodingMUL = kFirstEncodingADD + kNumFuseCodes; |
| const uint32_t kFirstEncodingV1_0 = kFirstEncodingADD; |
| const uint32_t kLastEncodingV1_0 = kFirstEncodingMUL + kNumFuseCodes - 1; |
| |
| // V1_1 operations |
| const uint32_t kFirstEncodingDIV = kLastEncodingV1_0 + 1; |
| const uint32_t kFirstEncodingSUB = kFirstEncodingDIV + kNumFuseCodes; |
| const uint32_t kFirstEncodingV1_1 = kFirstEncodingDIV; |
| const uint32_t kLastEncodingV1_1 = kFirstEncodingSUB + kNumFuseCodes - 1; |
| |
| // V1_2 operations |
| const uint32_t kFirstEncodingMAXIMUM = kLastEncodingV1_1 + 1; |
| const uint32_t kFirstEncodingMINIMUM = kFirstEncodingMAXIMUM + 1; |
| const uint32_t kFirstEncodingPOW = kFirstEncodingMINIMUM + 1; |
| const uint32_t kFirstEncodingPRELU = kFirstEncodingPOW + 1; |
| const uint32_t kFirstEncodingV1_2 = kFirstEncodingMAXIMUM; |
| const uint32_t kLastEncodingV1_2 = kFirstEncodingPRELU; |
| |
| // V1_3 operations |
| const uint32_t kFirstEncodingHARD_SWISH = kLastEncodingV1_2 + 1; |
| const uint32_t kFirstEncodingV1_3 = kFirstEncodingHARD_SWISH; |
| const uint32_t kLastEncodingV1_3 = kFirstEncodingHARD_SWISH; |
| |
| const std::map<V1_3::OperationType, uint32_t> operationToFirstEncoding = { |
| {V1_3::OperationType::ADD, kFirstEncodingADD}, |
| {V1_3::OperationType::MUL, kFirstEncodingMUL}, |
| {V1_3::OperationType::DIV, kFirstEncodingDIV}, |
| {V1_3::OperationType::SUB, kFirstEncodingSUB}, |
| {V1_3::OperationType::MAXIMUM, kFirstEncodingMAXIMUM}, |
| {V1_3::OperationType::MINIMUM, kFirstEncodingMINIMUM}, |
| {V1_3::OperationType::POW, kFirstEncodingPOW}, |
| {V1_3::OperationType::PRELU, kFirstEncodingPRELU}, |
| {V1_3::OperationType::HARD_SWISH, kFirstEncodingHARD_SWISH}, |
| }; |
| |
| // Sorted in reverse order (std::greater) so that we can use map::lower_bound to |
| // find an entry whose key is numerically less than or equal to a search value. |
| // mapped_type is (OperandCode, hasFuseCode). |
| const std::map<uint32_t, std::pair<uint32_t, bool>, std::greater<>> firstEncodingToOperation = { |
| {kFirstEncodingADD, {ANEURALNETWORKS_ADD, true}}, |
| {kFirstEncodingMUL, {ANEURALNETWORKS_MUL, true}}, |
| {kFirstEncodingDIV, {ANEURALNETWORKS_DIV, true}}, |
| {kFirstEncodingSUB, {ANEURALNETWORKS_SUB, true}}, |
| {kFirstEncodingMAXIMUM, {ANEURALNETWORKS_MAXIMUM, false}}, |
| {kFirstEncodingMINIMUM, {ANEURALNETWORKS_MINIMUM, false}}, |
| {kFirstEncodingPOW, {ANEURALNETWORKS_POW, false}}, |
| {kFirstEncodingPRELU, {ANEURALNETWORKS_PRELU, false}}, |
| {kFirstEncodingHARD_SWISH, {ANEURALNETWORKS_HARD_SWISH, false}}, |
| }; |
| |
| // Look up the operation with the specified index in a graph, and return the |
| // operation encoding; or, if for some reason this is not one of the encoded |
| // operations, then return kBadOperation. |
| uint32_t lookupOperation(std::function<const V1_3::Operation&(uint32_t)> getOperation, |
| std::function<const V1_3::Operand&(uint32_t)> getOperand, |
| std::function<const uint8_t*(uint32_t)> getValue, |
| uint32_t operationIndex) { |
| const V1_3::Operation& operation = getOperation(operationIndex); |
| switch (operation.type) { |
| case V1_3::OperationType::ADD: |
| case V1_3::OperationType::MUL: |
| case V1_3::OperationType::DIV: |
| case V1_3::OperationType::SUB: { |
| // input2 is the fused activation function |
| const V1_3::Operand& input2 = getOperand(operation.inputs[2]); |
| if ((input2.type == V1_3::OperandType::INT32) && |
| (input2.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY)) { |
| int32_t value; |
| CHECK_EQ(sizeof(value), input2.location.length); |
| memcpy(&value, getValue(input2.location.offset), input2.location.length); |
| return value + operationToFirstEncoding.at(operation.type); |
| } |
| break; |
| } |
| default: { |
| auto it = operationToFirstEncoding.find(operation.type); |
| if (it != operationToFirstEncoding.end()) { |
| return it->second; |
| } |
| break; |
| } |
| } |
| return kBadOperation; |
| } |
| |
| uint32_t lookupOperation(const HidlModel& model, const V1_3::Subgraph& subgraph, |
| uint32_t operationIndex) { |
| return lookupOperation( |
| [&subgraph](uint32_t index) -> const V1_3::Operation& { |
| return subgraph.operations[index]; |
| }, |
| [&subgraph](uint32_t index) -> const V1_3::Operand& { |
| return subgraph.operands[index]; |
| }, |
| [&model](uint32_t offset) { return &model.operandValues[offset]; }, operationIndex); |
| } |
| |
| #ifdef VERBOSE |
| // This is a debugging utility function |
| void dump(const char* name, const ModelBuilder* model) { |
| const HidlModel hidlModel = model->makeHidlModel(); |
| std::cout << name << ": " << hidlModel << std::endl; |
| std::cout << "inputs: " << hidlModel.main.inputIndexes << std::endl; |
| std::cout << "outputs: " << hidlModel.main.outputIndexes << std::endl; |
| for (size_t i = 0, e = hidlModel.main.operations.size(); i < e; i++) { |
| std::cout << "operation[" << i << "]: " << hidlModel.main.operations[i] << std::endl; |
| } |
| } |
| #endif |
| |
| // This is an IDevice for testing purposes. It only has a few interesting |
| // properties, all of which are specified as constructor arguments: device |
| // capabilities; which subset of operation kinds (0..19) does the device |
| // support; does the device support the OEM operation; does the device support |
| // other operations. The subset is represented with a bitmask, in which |
| // operation kind K corresponds to the bit (1 << K). The other operations are |
| // represented by a set of OperationType. |
| class PartitioningDriver : public SampleDriver { |
| public: |
| enum OEM { |
| OEMNo, // rejected by getSupportedOperations and prepareModel |
| OEMIndecisive, // accepted by getSupportedOperations but not prepareModel |
| OEMYes, // accepted by getSupportedOperations and prepareModel |
| }; |
| |
| PartitioningDriver(const char* name, const char* version, V1_3::Capabilities capabilities, |
| uint32_t operationMask, OEM oem = OEMNo, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : SampleDriver(name), |
| mVersionString(version), |
| mCapabilities(capabilities), |
| mOperationMask(operationMask), |
| mOEM(oem), |
| mOperationTypes(std::move(operationTypes)) { |
| CHECK_EQ(mOperationTypes.count(V1_3::OperationType::OEM_OPERATION), size_t(0)); |
| if (operationMask) { |
| std::for_each(mOperationTypes.begin(), mOperationTypes.end(), |
| [](V1_3::OperationType type) { |
| CHECK_EQ(operationToFirstEncoding.count(type), size_t(0)); |
| }); |
| } |
| } |
| ~PartitioningDriver() override {} |
| |
| hardware::Return<void> getVersionString(getVersionString_cb cb) override { |
| cb(V1_0::ErrorStatus::NONE, mVersionString); |
| return hardware::Void(); |
| } |
| |
| hardware::Return<V1_3::ErrorStatus> prepareModel_1_3( |
| const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority, |
| const V1_3::OptionalTimePoint& deadline, |
| const hardware::hidl_vec<hardware::hidl_handle>& modelCache, |
| const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token, |
| const sp<V1_3::IPreparedModelCallback>& callback) override { |
| if (mOEM == OEMIndecisive) { |
| for (const auto& operation : model.main.operations) { |
| if (operation.type == V1_3::OperationType::OEM_OPERATION) { |
| callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr); |
| return V1_3::ErrorStatus::INVALID_ARGUMENT; |
| } |
| } |
| } |
| |
| // NOTE: We verify that all operations in the model are supported. |
| V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT; |
| auto ret = getSupportedOperations_1_3( |
| model, [&outStatus](V1_3::ErrorStatus inStatus, |
| const hardware::hidl_vec<bool>& supportedOperations) { |
| if (inStatus == V1_3::ErrorStatus::NONE) { |
| if (std::all_of(supportedOperations.begin(), supportedOperations.end(), |
| [](bool v) { return v; })) { |
| outStatus = V1_3::ErrorStatus::NONE; |
| } |
| } |
| }); |
| if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) { |
| return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache, |
| dataCache, token, callback); |
| } else { |
| callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr); |
| return V1_3::ErrorStatus::INVALID_ARGUMENT; |
| } |
| } |
| |
| hardware::Return<V1_0::DeviceStatus> getStatus() override { |
| return V1_0::DeviceStatus::AVAILABLE; |
| } |
| |
| hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override { |
| cb(V1_3::ErrorStatus::NONE, mCapabilities); |
| return hardware::Void(); |
| } |
| |
| hardware::Return<void> getSupportedOperations_1_3(const V1_3::Model& model, |
| getSupportedOperations_1_3_cb cb) override { |
| if (!android::nn::validateModel(model)) { |
| cb(V1_3::ErrorStatus::INVALID_ARGUMENT, std::vector<bool>()); |
| return hardware::Void(); |
| } |
| cb(V1_3::ErrorStatus::NONE, getSupportedOperationsForSubgraph(model, model.main)); |
| return hardware::Void(); |
| } |
| |
| hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb) override { |
| cb(V1_0::ErrorStatus::NONE, /*numModelCache=*/1, /*numDataCache=*/1); |
| return hardware::Void(); |
| } |
| |
| private: |
| std::vector<bool> getSupportedOperationsForSubgraph(const V1_3::Model& model, |
| const V1_3::Subgraph& subgraph) { |
| CHECK(&subgraph == &model.main || |
| std::find_if(model.referenced.begin(), model.referenced.end(), |
| [&subgraph](const V1_3::Subgraph& refSubgraph) { |
| return &subgraph == &refSubgraph; |
| }) != model.referenced.end()); |
| auto supportsEntireSubgraph = [this, &model, &subgraph](uint32_t refSubgraphOperandIndex) { |
| CHECK_LT(refSubgraphOperandIndex, subgraph.operands.size()); |
| const V1_3::Operand& refSubgraphOperand = subgraph.operands[refSubgraphOperandIndex]; |
| CHECK(refSubgraphOperand.lifetime == V1_3::OperandLifeTime::SUBGRAPH); |
| CHECK_LT(refSubgraphOperand.location.offset, model.referenced.size()); |
| const V1_3::Subgraph& refSubgraph = |
| model.referenced[refSubgraphOperand.location.offset]; |
| std::vector<bool> supported = getSupportedOperationsForSubgraph(model, refSubgraph); |
| return std::all_of(supported.begin(), supported.end(), [](bool x) { return x; }); |
| }; |
| const size_t count = subgraph.operations.size(); |
| std::vector<bool> supported(count); |
| for (size_t i = 0; i < count; i++) { |
| const V1_3::Operation& operation = subgraph.operations[i]; |
| if (mOperationTypes.count(operation.type)) { |
| if (operation.type == V1_3::OperationType::IF) { |
| namespace op = android::nn::operation_if; |
| CHECK_GE(operation.inputs.size(), op::kFirstInput); |
| supported[i] = |
| supportsEntireSubgraph(operation.inputs[op::kThenModelOperand]) && |
| supportsEntireSubgraph(operation.inputs[op::kElseModelOperand]); |
| } else if (operation.type == V1_3::OperationType::WHILE) { |
| namespace op = android::nn::operation_while; |
| CHECK_GE(operation.inputs.size(), op::kFirstInput); |
| supported[i] = |
| supportsEntireSubgraph(operation.inputs[op::kCondModelOperand]) && |
| supportsEntireSubgraph(operation.inputs[op::kBodyModelOperand]); |
| } else { |
| supported[i] = true; |
| } |
| continue; |
| } |
| if (operation.type == V1_3::OperationType::OEM_OPERATION) { |
| supported[i] = (mOEM != OEMNo); |
| continue; |
| } |
| supported[i] = false; |
| uint32_t operationEncoding = lookupOperation(model, subgraph, i); |
| if ((operationEncoding != kBadOperation) && |
| (mOperationMask & (1 << operationEncoding))) { |
| supported[i] = true; |
| } |
| } |
| return supported; |
| } |
| |
| std::string mVersionString; |
| V1_3::Capabilities mCapabilities; |
| uint32_t mOperationMask; |
| OEM mOEM; |
| std::set<V1_3::OperationType> mOperationTypes; |
| }; |
| |
| // Like PartitioningDriver, but implementing 1.2 |
| class PartitioningDriverV1_2 : public V1_2::IDevice { |
| public: |
| PartitioningDriverV1_2(const char* name, const char* version, V1_3::Capabilities capabilities, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem, |
| operationTypes)) {} |
| hardware::Return<void> getCapabilities_1_2(getCapabilities_1_2_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities_1_2(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations_1_2( |
| const V1_2::Model& model, getSupportedOperations_1_2_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations_1_2(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel_1_2( |
| const V1_2::Model& model, V1_1::ExecutionPreference preference, |
| const hardware::hidl_vec<hardware::hidl_handle>& modelCache, |
| const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token, |
| const sp<V1_2::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel_1_2(model, preference, modelCache, dataCache, token, |
| actualCallback); |
| } |
| hardware::Return<void> getVersionString(getVersionString_cb _hidl_cb) override { |
| return mLatestDriver->getVersionString(_hidl_cb); |
| } |
| hardware::Return<void> getType(getType_cb _hidl_cb) override { |
| return mLatestDriver->getType(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedExtensions(getSupportedExtensions_cb _hidl_cb) { |
| return mLatestDriver->getSupportedExtensions(_hidl_cb); |
| } |
| hardware::Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb _hidl_cb) { |
| return mLatestDriver->getNumberOfCacheFilesNeeded(_hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModelFromCache( |
| const hardware::hidl_vec<hardware::hidl_handle>& modelCache, |
| const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token, |
| const sp<V1_2::IPreparedModelCallback>& callback) { |
| return mLatestDriver->prepareModelFromCache(modelCache, dataCache, token, callback); |
| } |
| hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities_1_1(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations_1_1( |
| const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel_1_1( |
| const V1_1::Model& model, V1_1::ExecutionPreference preference, |
| const sp<V1_0::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel_1_1(model, preference, actualCallback); |
| } |
| hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); } |
| hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations(const V1_0::Model& model, |
| getSupportedOperations_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel( |
| const V1_0::Model& model, |
| const sp<V1_0::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel(model, actualCallback); |
| } |
| |
| private: |
| const sp<V1_3::IDevice> mLatestDriver; |
| }; |
| |
| // Like PartitioningDriver, but implementing 1.1 |
| class PartitioningDriverV1_1 : public V1_1::IDevice { |
| public: |
| PartitioningDriverV1_1(const char* name, const char* version, V1_3::Capabilities capabilities, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem, |
| operationTypes)) {} |
| hardware::Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities_1_1(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations_1_1( |
| const V1_1::Model& model, getSupportedOperations_1_1_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations_1_1(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel_1_1( |
| const V1_1::Model& model, V1_1::ExecutionPreference preference, |
| const sp<V1_0::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel_1_1(model, preference, actualCallback); |
| } |
| hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); } |
| hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations(const V1_0::Model& model, |
| getSupportedOperations_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel( |
| const V1_0::Model& model, |
| const sp<V1_0::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel(model, actualCallback); |
| } |
| |
| private: |
| const sp<V1_3::IDevice> mLatestDriver; |
| }; |
| |
| // Like PartitioningDriver, but implementing 1.0 |
| class PartitioningDriverV1_0 : public V1_0::IDevice { |
| public: |
| PartitioningDriverV1_0(const char* name, const char* version, V1_3::Capabilities capabilities, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : mLatestDriver(new PartitioningDriver(name, version, capabilities, operationMask, oem, |
| operationTypes)) {} |
| hardware::Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override { |
| return mLatestDriver->getCapabilities(_hidl_cb); |
| } |
| hardware::Return<void> getSupportedOperations(const V1_0::Model& model, |
| getSupportedOperations_cb _hidl_cb) override { |
| return mLatestDriver->getSupportedOperations(model, _hidl_cb); |
| } |
| hardware::Return<V1_0::ErrorStatus> prepareModel( |
| const V1_0::Model& model, |
| const sp<V1_0::IPreparedModelCallback>& actualCallback) override { |
| return mLatestDriver->prepareModel(model, actualCallback); |
| } |
| hardware::Return<V1_0::DeviceStatus> getStatus() override { return mLatestDriver->getStatus(); } |
| |
| private: |
| const sp<V1_3::IDevice> mLatestDriver; |
| }; |
| |
| enum class Dimensioned { |
| NO, // either a scalar, or a tensor of either unspecified rank (usually) |
| // or specified rank but with no specified dimensions (where |
| // specifically stated) |
| RANK_1, // tensor of shape { 0 } -- i.e., rank 1, unspecified dimensions |
| RANK_2, // tensor of shape { 0, 0 } -- i.e., rank 2, unspecified dimensions |
| YES_1, // tensor of shape { 1 } |
| YES_2, // tensor of shape { 2 } |
| YES_4, // tensor of shape { 4 } |
| YES = YES_1 |
| }; |
| |
| std::vector<uint32_t> dimensions(Dimensioned dimensioned) { |
| switch (dimensioned) { |
| default: |
| EXPECT_TRUE(false) << "Unknown value"; |
| FALLTHROUGH_INTENDED; |
| case Dimensioned::NO: |
| return {}; |
| case Dimensioned::RANK_1: |
| return {0}; |
| case Dimensioned::RANK_2: |
| return {0, 0}; |
| case Dimensioned::YES_1: |
| return {1}; |
| case Dimensioned::YES_2: |
| return {2}; |
| case Dimensioned::YES_4: |
| return {4}; |
| } |
| } |
| |
| // "dimensioned" must be a fully specified kind |
| uint32_t numberOfElements(Dimensioned dimensioned) { |
| auto dims = dimensions(dimensioned); |
| uint32_t result = std::reduce(dims.begin(), dims.end(), 1u, std::multiplies<>()); |
| CHECK_GT(result, 0u); |
| return result; |
| } |
| |
| std::string toString(Dimensioned dimensioned) { |
| switch (dimensioned) { |
| default: |
| return "<Unknown value>"; |
| case Dimensioned::NO: |
| return "NO"; |
| case Dimensioned::RANK_1: |
| return "RANK_1"; |
| case Dimensioned::RANK_2: |
| return "RANK_2"; |
| case Dimensioned::YES_1: |
| return "YES_1"; |
| case Dimensioned::YES_2: |
| return "YES_2"; |
| case Dimensioned::YES_4: |
| return "YES_4"; |
| } |
| } |
| |
| // This class adds some simple abstractions and utilities on top of |
| // WrapperModel. For example, it provides methods that work in terms of |
| // operation kind (0..7); and because we care about graph topology rather than |
| // details of operand types and values, it greatly simplifies the process of |
| // creating operands. |
| class PartitioningModel : private WrapperModel { |
| public: |
| using WrapperModel::finish; |
| using WrapperModel::getHandle; |
| using WrapperModel::identifyInputsAndOutputs; |
| using WrapperModel::isValid; |
| using WrapperModel::relaxComputationFloat32toFloat16; |
| using WrapperModel::setOperandValue; |
| |
| // Create a tensor operand of the specified type, and return the |
| // corresponding operand index. |
| uint32_t addIntOperand(Dimensioned dimensioned = Dimensioned::YES) { |
| return addOperand(WrapperType::TENSOR_INT32, dimensioned); |
| } |
| uint32_t addIntScalarOperand(std::optional<int> v = std::nullopt) { |
| uint32_t opnd = addOperand(WrapperType::INT32); |
| if (v.has_value()) { |
| setOperandValue(opnd, &v.value()); |
| } |
| return opnd; |
| } |
| uint32_t addFloatOperand(Dimensioned dimensioned = Dimensioned::YES) { |
| return addOperand(WrapperType::TENSOR_FLOAT32, dimensioned); |
| } |
| uint32_t addQuantOperand(Dimensioned dimensioned = Dimensioned::YES) { |
| return addOperand(WrapperType::TENSOR_QUANT8_ASYMM, dimensioned); |
| } |
| uint32_t addBooleanOperand(Dimensioned dimensioned = Dimensioned::YES) { |
| return addOperand(WrapperType::TENSOR_BOOL8, dimensioned); |
| } |
| uint32_t addFloatZeroOperand(Dimensioned dimensioned = Dimensioned::YES) { |
| uint32_t opnd = addFloatOperand(dimensioned); |
| std::vector<float> values(numberOfElements(dimensioned), 0.0f); |
| uint32_t size = values.size() * sizeof(float); |
| // Make sure the values are immediately copied so that it is safe to free the buffer after |
| // the setOperandValue call |
| CHECK_LE(size, ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES); |
| setOperandValue(opnd, values.data(), size); |
| return opnd; |
| } |
| |
| // Create an operand of the specified type, and return the corresponding |
| // operand index. |
| uint32_t addOperand(WrapperType wrapperType, Dimensioned dimensioned = Dimensioned::YES) { |
| switch (static_cast<int>(wrapperType)) { |
| case ANEURALNETWORKS_BOOL: |
| case ANEURALNETWORKS_FLOAT16: |
| case ANEURALNETWORKS_FLOAT32: |
| case ANEURALNETWORKS_INT32: |
| case ANEURALNETWORKS_UINT32: |
| case ANEURALNETWORKS_MODEL: |
| case ANEURALNETWORKS_OEM_SCALAR: |
| return addOperand(WrapperOperandType{wrapperType, {}}); |
| |
| case ANEURALNETWORKS_TENSOR_BOOL8: |
| case ANEURALNETWORKS_TENSOR_FLOAT16: |
| case ANEURALNETWORKS_TENSOR_FLOAT32: |
| case ANEURALNETWORKS_TENSOR_OEM_BYTE: |
| return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned)}); |
| |
| case ANEURALNETWORKS_TENSOR_INT32: |
| case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: |
| case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED: |
| case ANEURALNETWORKS_TENSOR_QUANT8_SYMM: |
| case ANEURALNETWORKS_TENSOR_QUANT16_ASYMM: |
| case ANEURALNETWORKS_TENSOR_QUANT16_SYMM: |
| return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), 1.0f}); |
| |
| case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL: |
| return addOperand(WrapperOperandType{wrapperType, dimensions(dimensioned), |
| WrapperSymmPerChannelQuantParams({1.0f}, 0)}); |
| |
| default: |
| ADD_FAILURE() << "Unexpected type " << static_cast<uint32_t>(wrapperType); |
| return ~uint32_t(0); |
| } |
| } |
| |
| // Create an operand of the specified operand type, and return the |
| // corresponding operand index. |
| uint32_t addOperand(const WrapperOperandType& wrapperOperandType) { |
| mWrapperOperandType.push_back(wrapperOperandType); |
| return WrapperModel::addOperand(&wrapperOperandType); |
| } |
| |
| // Create an operation with any number of inputs and one output, specifying |
| // the operation type (e.g., ANEURALNETWORKS_ADD), the input operand |
| // indexes, and the output type (e.g., WrapperType::TENSOR_FLOAT32). |
| // Returns the output operand index. |
| uint32_t addExplicitOperationXTo1(ANeuralNetworksOperationType operationType, |
| const std::vector<uint32_t>& inputs, WrapperType outputType, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| uint32_t output = addOperand(outputType, dimensionedOutput); |
| addOperation(operationType, inputs, {output}); |
| return output; |
| } |
| |
| // Create a V1_0 operation with two inputs and one output, specifying the |
| // operation kind (where 0 is the first V1_0 operation) and the input |
| // operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation2To1V1_0(uint32_t operation, const uint32_t input0, const uint32_t input1, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| CHECK_LE(operation, kLastEncodingV1_0 - kFirstEncodingV1_0); |
| return addOperation2To1(operation + kFirstEncodingV1_0, input0, input1, dimensionedOutput); |
| } |
| |
| // Create a V1_1 operation with two inputs and one output, specifying the |
| // operation kind (where 0 is the first V1_1 operation) and the input |
| // operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation2To1V1_1(uint32_t operation, const uint32_t input0, const uint32_t input1, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| CHECK_LE(operation, kLastEncodingV1_1 - kFirstEncodingV1_1); |
| return addOperation2To1(operation + kFirstEncodingV1_1, input0, input1, dimensionedOutput); |
| } |
| |
| // Create a V1_2 operation with two inputs and one output, specifying the |
| // operation kind (where 0 is the first V1_2 operation) and the input |
| // operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation2To1V1_2(uint32_t operation, const uint32_t input0, const uint32_t input1, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| CHECK_LE(operation, kLastEncodingV1_2 - kFirstEncodingV1_2); |
| return addOperation2To1(operation + kFirstEncodingV1_2, input0, input1, dimensionedOutput); |
| } |
| |
| // Create a V1_3 operation with two inputs and one output, specifying the |
| // operation kind (where 0 is the first V1_3 operation) and the input |
| // operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation1To1V1_3(uint32_t operation, const uint32_t input0, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| CHECK_LE(operation, kLastEncodingV1_3 - kFirstEncodingV1_3); |
| return addOperation1To1(operation + kFirstEncodingV1_3, input0, dimensionedOutput); |
| } |
| |
| // Create an OEM operation with one input and one output, |
| // specifying the input operand index. Returns the output operand |
| // index. |
| uint32_t addOperationOEM1To1(const uint32_t input, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| uint32_t output = addOperandOfSameType(input, dimensionedOutput); |
| addOperation(ANEURALNETWORKS_OEM_OPERATION, {input}, {output}); |
| return output; |
| } |
| |
| // Create an IF operation with the given condition operand and two |
| // referenced models for the true and false cases. |
| void addIfOperation(const uint32_t cond, const PartitioningModel& trueModel, |
| const PartitioningModel& falseModel, const std::vector<uint32_t>& inputs, |
| const std::vector<uint32_t>& outputs) { |
| const uint32_t opndTrue = addRefModelOperand(trueModel); |
| const uint32_t opndFalse = addRefModelOperand(falseModel); |
| std::vector<uint32_t> ifInputs = {cond, opndTrue, opndFalse}; |
| ifInputs.insert(ifInputs.end(), inputs.begin(), inputs.end()); |
| addOperation(ANEURALNETWORKS_IF, ifInputs, outputs); |
| } |
| |
| // Create a WHILE operation with the given condition and body referenced models. |
| void addWhileOperation(const PartitioningModel& condModel, const PartitioningModel& bodyModel, |
| const std::vector<uint32_t>& inputs, |
| const std::vector<uint32_t>& outputs) { |
| const uint32_t condOperand = addRefModelOperand(condModel); |
| const uint32_t bodyOperand = addRefModelOperand(bodyModel); |
| std::vector<uint32_t> whileInputs = {condOperand, bodyOperand}; |
| whileInputs.insert(whileInputs.end(), inputs.begin(), inputs.end()); |
| addOperation(ANEURALNETWORKS_WHILE, whileInputs, outputs); |
| } |
| |
| // Run the partitioning algorithm to create an ExecutionPlan. |
| int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, |
| ExecutePreference preference, ExecutePriority priority, |
| const OptionalTimePoint& deadline, ExecutionPlan* plan) { |
| return reinterpret_cast<ModelBuilder*>(getHandle()) |
| ->partitionTheWork(devices, static_cast<uint32_t>(preference), |
| static_cast<int32_t>(priority), deadline, plan, {}); |
| } |
| |
| #ifdef VERBOSE |
| // This is a debugging utility function. |
| void dump(const char* name) const { |
| const ModelBuilder* mb = reinterpret_cast<const ModelBuilder*>(getHandle()); |
| ::dump(name, mb); |
| } |
| #endif |
| |
| private: |
| // Create an operation with two inputs and one output, specifying |
| // the operation kind and the input operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation2To1(uint32_t operation, const uint32_t input0, const uint32_t input1, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| auto it = firstEncodingToOperation.lower_bound(operation); |
| CHECK(it != firstEncodingToOperation.end()); |
| ANeuralNetworksOperationType type = it->second.first; |
| if (it->second.second) { |
| int32_t fuseCode = operation - it->first; |
| uint32_t input2 = addIntOperand(fuseCode); |
| uint32_t output = addOperandOfSameType(input0, dimensionedOutput); |
| addOperation(type, {input0, input1, input2}, {output}); |
| return output; |
| } else { |
| uint32_t output = addOperandOfSameType(input0, dimensionedOutput); |
| addOperation(type, {input0, input1}, {output}); |
| return output; |
| } |
| } |
| |
| // Create an operation with one inputs and one output, specifying |
| // the operation kind and the input operand indexes. |
| // Returns the output operand index. |
| uint32_t addOperation1To1(uint32_t operation, const uint32_t input0, |
| Dimensioned dimensionedOutput = Dimensioned::YES) { |
| auto it = firstEncodingToOperation.lower_bound(operation); |
| CHECK(it != firstEncodingToOperation.end()); |
| ANeuralNetworksOperationType type = it->second.first; |
| |
| uint32_t output = addOperandOfSameType(input0, dimensionedOutput); |
| addOperation(type, {input0}, {output}); |
| return output; |
| } |
| |
| // Create a scalar integer operand of the specified value, and |
| // return the corresponding operand index. |
| uint32_t addIntOperand(int32_t value) { |
| uint32_t operand = addOperand(WrapperType::INT32); |
| setOperandValue(operand, &value, sizeof(value)); |
| return operand; |
| } |
| |
| // Create an operand from a model for control flow graphs. |
| uint32_t addRefModelOperand(const PartitioningModel& model) { |
| const uint32_t index = addOperand(WrapperType::MODEL); |
| WrapperModel::setOperandValueFromModel(index, &model); |
| return index; |
| } |
| |
| // Create an operand of the same type as the specified operand, |
| // and return the operand index of the new operand. |
| // |
| // If a tensor, the new operand will have the same rank as the specified |
| // operand. If dimensioned == Dimensioned::NO, then all dimensions of a new |
| // tensor operand will be unspecified. If dimensioned != Dimensioned::NO, |
| // then all dimensions of a new tensor operand will have the implied value |
| // (e.g., YES_1 means each dimension will have the value "1"). |
| uint32_t addOperandOfSameType(uint32_t operand, Dimensioned dimensioned = Dimensioned::YES) { |
| WrapperOperandType type = mWrapperOperandType.at(operand); |
| |
| const auto d = dimensions(dimensioned); |
| EXPECT_TRUE(d.size() <= 1); |
| for (auto& dimension : type.dimensions) { |
| dimension = (dimensioned == Dimensioned::NO ? 0 : d[0]); |
| } |
| |
| mWrapperOperandType.push_back(type); |
| return WrapperModel::addOperand(&type); |
| } |
| |
| // operand index to operand type |
| std::vector<WrapperOperandType> mWrapperOperandType; |
| }; |
| |
| // This class adds some utilities on top of WrapperCompilation. |
| class PartitioningCompilation : public WrapperCompilation { |
| public: |
| PartitioningCompilation(const PartitioningModel* model, |
| const std::vector<std::shared_ptr<Device>>& devices) { |
| ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle()); |
| CompilationBuilder* c = nullptr; |
| int result = m->createCompilation(&c, devices); |
| EXPECT_EQ(result, 0); |
| mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c); |
| } |
| |
| Result setPartitioning(uint32_t partitioning) { |
| return static_cast<Result>(builder()->forTest_setPartitioning(partitioning)); |
| } |
| |
| // Simulate recoverable partitioning failure. |
| Result failPartitioning() { |
| return static_cast<Result>( |
| builder()->forTest_failPartitioning(static_cast<int>(Result::OP_FAILED))); |
| } |
| |
| using WrapperCompilation::finish; |
| |
| const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); } |
| |
| private: |
| CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); } |
| |
| const CompilationBuilder* builder() const { |
| return reinterpret_cast<const CompilationBuilder*>(getHandle()); |
| } |
| }; |
| |
| #ifdef VERBOSE |
| #define RETURN_TRUE() \ |
| { \ |
| std::cerr << "returning true from " << __LINE__ << std::endl; \ |
| return true; \ |
| } |
| #else |
| #define RETURN_TRUE() \ |
| { return true; } |
| #endif |
| #ifdef VERBOSE |
| #define RETURN_FALSE(MESSAGE) \ |
| { \ |
| std::cerr << "returning false from " << __LINE__ MESSAGE << std::endl; \ |
| return false; \ |
| } |
| #else |
| #define RETURN_FALSE(MESSAGE) \ |
| { return false; } |
| #endif |
| |
| class PartitioningTest : public ::testing::Test { |
| protected: |
| using DynamicTemporariesType = decltype(ExecutionPlan().forTest_flatGetDynamicTemporaries()); |
| using RemapVectorType = ExecutionStep::RemapVectorType; |
| using StepModelOutputSetType = ExecutionStep::StepModelOutputSetType; |
| |
| // Used for PartitioningTest::checkExecutionPlanSteps. |
| static constexpr const char* kIfStep = "IF"; |
| static constexpr const char* kWhileStep = "WHILE"; |
| static constexpr const char* kGotoStep = "GOTO"; |
| |
| virtual void SetUp() {} |
| |
| // From a vector of DeviceSpecification, create a vector of |
| // Devices. |
| struct DeviceSpecification { |
| DeviceSpecification(const std::string& name, const V1_3::Capabilities& capabilities, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo) |
| : mName(name), |
| mVersionString(kVersionString), |
| mCapabilities(capabilities), |
| mOperationMask(operationMask), |
| mOEM(oem) {} |
| DeviceSpecification(const std::string& name, float perf, uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| HalVersion halVersion = HalVersion::LATEST, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : DeviceSpecification(name, perf, perf, operationMask, oem, halVersion, |
| operationTypes) {} |
| DeviceSpecification(const std::string& name, float perf, float perfRelaxed, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| HalVersion halVersion = HalVersion::LATEST, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : DeviceSpecification(name, kVersionString, perf, perfRelaxed, operationMask, oem, |
| halVersion, operationTypes) {} |
| DeviceSpecification(const std::string& name, const std::string& version, float perf, |
| uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| HalVersion halVersion = HalVersion::LATEST, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : DeviceSpecification(name, version, perf, perf, operationMask, oem, halVersion, |
| operationTypes) {} |
| DeviceSpecification(const std::string& name, const std::string& version, float perf, |
| float perfRelaxed, uint32_t operationMask, |
| PartitioningDriver::OEM oem = PartitioningDriver::OEMNo, |
| HalVersion halVersion = HalVersion::LATEST, |
| std::set<V1_3::OperationType> operationTypes = {}) |
| : mName(name), |
| mVersionString(version), |
| mHalVersion(halVersion), |
| mOperationMask(operationMask), |
| mOEM(oem), |
| mOperationTypes(std::move(operationTypes)) { |
| V1_0::PerformanceInfo perfInfo = {.execTime = perf, .powerUsage = perf}; |
| V1_0::PerformanceInfo perfRelaxedInfo = {.execTime = perfRelaxed, |
| .powerUsage = perfRelaxed}; |
| mCapabilities = { |
| .relaxedFloat32toFloat16PerformanceScalar = perfRelaxedInfo, |
| .relaxedFloat32toFloat16PerformanceTensor = perfRelaxedInfo, |
| .operandPerformance = |
| ::android::nn::nonExtensionOperandPerformance<HalVersion::V1_3>( |
| perfInfo), |
| .ifPerformance = perfInfo, |
| .whilePerformance = perfInfo}; |
| } |
| DeviceSpecification(const std::string& name, float perf, HalVersion halVersion, |
| uint32_t operationMaskV1_0, uint32_t operationMaskV1_1 = 0, |
| uint32_t operationMaskV1_2 = 0, uint32_t operationMaskV1_3 = 0) |
| : DeviceSpecification( |
| name, perf, perf, |
| makeOperationMask(halVersion, operationMaskV1_0, operationMaskV1_1, |
| operationMaskV1_2, operationMaskV1_3)) { |
| mHalVersion = halVersion; |
| } |
| |
| std::string mName; |
| std::string mVersionString; |
| V1_3::Capabilities mCapabilities; |
| HalVersion mHalVersion = HalVersion::LATEST; |
| uint32_t mOperationMask; |
| PartitioningDriver::OEM mOEM = PartitioningDriver::OEMNo; |
| std::set<V1_3::OperationType> mOperationTypes; |
| |
| static constexpr char kVersionString[] = "JUST_AN_EXAMPLE"; |
| |
| private: |
| // This function takes three operation masks aligned at the low-order |
| // bit -- one mask each for V1_0, V1_1, and V1_2 -- and produces a single |
| // composite operation mask, formed by shifting each of the input |
| // operation masks appropriately and ORing the results together. |
| // |
| // For convenience, any bits of an input mask that are too high order |
| // for that mask are discarded -- this allows ~0 to be a legal input |
| // mask. |
| // |
| // For the sake of example, assume that each low order mask is 4 bits |
| // wide, and take some artistic license to write literals in binary. |
| // Then: |
| // |
| // assert(makeOperationMask(HalVersion::V1_2, 0b0110, 0b1001, 0b0101) == |
| // 0b 0101 1001 0110); |
| // |
| // This is used by a DeviceSpecification constructor to build a mask of |
| // operations to be supported by the device. |
| static uint32_t makeOperationMask(HalVersion halVersion, uint32_t operationMaskV1_0, |
| uint32_t operationMaskV1_1, uint32_t operationMaskV1_2, |
| uint32_t operationMaskV1_3) { |
| if (halVersion < HalVersion::V1_3) { |
| CHECK(!operationMaskV1_3); |
| } |
| if (halVersion < HalVersion::V1_2) { |
| CHECK(!operationMaskV1_2); |
| } |
| if (halVersion < HalVersion::V1_1) { |
| CHECK(!operationMaskV1_1); |
| } |
| auto maskOfWidth = [](uint32_t width) -> uint32_t { return (1U << width) - 1; }; |
| static const uint32_t kOperationMaskV1_0 = |
| maskOfWidth(kLastEncodingV1_0 - kFirstEncodingV1_0 + 1); |
| static const uint32_t kOperationMaskV1_1 = |
| maskOfWidth(kLastEncodingV1_1 - kFirstEncodingV1_1 + 1); |
| static const uint32_t kOperationMaskV1_2 = |
| maskOfWidth(kLastEncodingV1_2 - kFirstEncodingV1_2 + 1); |
| static const uint32_t kOperationMaskV1_3 = |
| maskOfWidth(kLastEncodingV1_3 - kFirstEncodingV1_3 + 1); |
| return ((operationMaskV1_0 & kOperationMaskV1_0) << kFirstEncodingV1_0) | |
| ((operationMaskV1_1 & kOperationMaskV1_1) << kFirstEncodingV1_1) | |
| ((operationMaskV1_2 & kOperationMaskV1_2) << kFirstEncodingV1_2) | |
| ((operationMaskV1_3 & kOperationMaskV1_3) << kFirstEncodingV1_3); |
| } |
| }; |
| static std::vector<std::shared_ptr<Device>> makeDevices( |
| std::vector<DeviceSpecification> specifications) { |
| std::vector<std::shared_ptr<Device>> devices; |
| for (const auto& specification : specifications) { |
| SharedDevice device = nullptr; |
| switch (specification.mHalVersion) { |
| case HalVersion::V1_3: |
| device = android::nn::makeSharedDevice( |
| specification.mName, |
| new PartitioningDriver(specification.mName.c_str(), |
| specification.mVersionString.c_str(), |
| specification.mCapabilities, |
| specification.mOperationMask, specification.mOEM, |
| specification.mOperationTypes)); |
| break; |
| case HalVersion::V1_2: |
| device = android::nn::makeSharedDevice( |
| specification.mName, |
| new PartitioningDriverV1_2( |
| specification.mName.c_str(), |
| specification.mVersionString.c_str(), |
| specification.mCapabilities, specification.mOperationMask, |
| specification.mOEM, specification.mOperationTypes)); |
| break; |
| case HalVersion::V1_1: |
| device = android::nn::makeSharedDevice( |
| specification.mName, |
| new PartitioningDriverV1_1( |
| specification.mName.c_str(), |
| specification.mVersionString.c_str(), |
| specification.mCapabilities, specification.mOperationMask, |
| specification.mOEM, specification.mOperationTypes)); |
| break; |
| case HalVersion::V1_0: |
| device = android::nn::makeSharedDevice( |
| specification.mName, |
| new PartitioningDriverV1_0( |
| specification.mName.c_str(), |
| specification.mVersionString.c_str(), |
| specification.mCapabilities, specification.mOperationMask, |
| specification.mOEM, specification.mOperationTypes)); |
| break; |
| default: |
| ADD_FAILURE() << "Unexpected"; |
| } |
| auto driverDevice = DeviceManager::forTest_makeDriverDevice(device); |
| devices.push_back(std::move(driverDevice)); |
| } |
| devices.push_back(DeviceManager::getCpuDevice()); |
| return devices; |
| } |
| |
| static std::string stepsToString(const std::vector<std::string>& steps) { |
| std::stringstream ss; |
| ss << "[ "; |
| for (const auto& step : steps) { |
| ss << step << " "; |
| } |
| ss << "]"; |
| return ss.str(); |
| } |
| |
| // Checks the type of each logical step in an execution plan. |
| // Each entry of "expected" is either: kIfStep for IfStep, kWhileStep for WhileStep, |
| // kGotoStep for GotoStep, or the device name for ExecutionStep. |
| void checkExecutionPlanSteps(const ExecutionPlan& plan, |
| const std::vector<std::string>& expected) { |
| ASSERT_GT(expected.size(), 0u); |
| |
| std::vector<std::string> actual; |
| if (expected.size() == 1) { |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| actual.emplace_back(plan.forTest_simpleGetDevice()->getName()); |
| } else { |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& steps = plan.forTest_compoundGetSteps(); |
| for (const auto& step : steps) { |
| if (step->isIf()) { |
| actual.emplace_back(kIfStep); |
| } else if (step->isWhile()) { |
| actual.emplace_back(kWhileStep); |
| } else if (step->isGoto()) { |
| actual.emplace_back(kGotoStep); |
| } else if (step->isExecution()) { |
| actual.emplace_back(step->executionStep()->getDevice()->getName()); |
| } else { |
| ASSERT_FALSE(true) << "Unknown LogicalStep"; |
| } |
| } |
| } |
| ASSERT_TRUE(actual == expected) |
| << "expected: " << stepsToString(expected) << ", actual: " << stepsToString(actual); |
| } |
| |
| /*-- Graph comparision ----------------------------------------------------------------*/ |
| |
| // An operand with certain values for its lifetime does not have a |
| // defining operation in the graph. For the purposes of the graph |
| // comparison algorithm, we encode the "defining operation" index of |
| // such an operand as follows: |
| // - NO_VALUE kPseudoDefiningOperationNoValue |
| // - SUBGRAPH_INPUT kPseudoDefiningOperationModelInput0 + (position in list of inputs) |
| // - CONSTANT_COPY kPseudoDefiningOperationConstantCopy0 + (constant value) |
| // Note: For the graphs we build in this test, we |
| // only expect to see 4-byte constants within |
| // a very restricted range, so we only make |
| // room for such constants in our encoding |
| // space. |
| // We do not expect to see CONSTANT_REFERENCE, and so we do not handle |
| // it. |
| // |
| // The encoding is intended to be relatively human readable; it is not |
| // designed to represent some optimal balance of ranges for the items |
| // within its scope (actual operations, inputs, constants). |
| |
| enum PseudoDefiningOperationEncodings : uint32_t { |
| kPseudoDefiningOperationModelInput0 = 0x80000000U, |
| kPseudoDefiningOperationConstantCopy0 = 0x90000000U, |
| kPseudoDefiningOperationNoValue = 0xeeeeeeeeU, |
| |
| // lowest value for special encoding |
| kPseudoDefiningOperationBase = 0x80000000U, |
| |
| // range of encoded input or constant |
| kPseudoDefiningOperationRange = 0x10000000U, |
| }; |
| |
| // Build a map from operand to defining operation. |
| // TODO: Replace map with vector? |
| void buildDefinitionMap(const ModelBuilder* model, std::map<uint32_t, uint32_t>* defMap) { |
| // actual definitions |
| ASSERT_LT(model->operationCount(), kPseudoDefiningOperationBase); |
| for (uint32_t i = 0, e = model->operationCount(); i < e; i++) { |
| const V1_3::Operation& operation = android::nn::convertToV1_3(model->getOperation(i)); |
| for (uint32_t output : operation.outputs) { |
| (*defMap)[output] = i; |
| } |
| } |
| // inputs |
| ASSERT_LT(model->inputCount(), kPseudoDefiningOperationRange); |
| for (uint32_t i = 0, e = model->inputCount(); i < e; i++) { |
| (*defMap)[model->getInputOperandIndex(i)] = kPseudoDefiningOperationModelInput0 + i; |
| } |
| // look for NO_VALUE and CONSTANT_COPY |
| for (uint32_t i = 0, e = model->operandCount(); i < e; i++) { |
| const V1_3::Operand& operand = android::nn::convertToV1_3(model->getOperand(i)); |
| switch (operand.lifetime) { |
| case V1_3::OperandLifeTime::NO_VALUE: |
| (*defMap)[i] = kPseudoDefiningOperationNoValue; |
| break; |
| case V1_3::OperandLifeTime::CONSTANT_COPY: { |
| ASSERT_EQ(operand.location.length, sizeof(uint32_t)); |
| uint32_t value; |
| memcpy(&value, model->getPointerToOperandValue(operand.location.offset), |
| sizeof(uint32_t)); |
| ASSERT_LT(value, kPseudoDefiningOperationNoValue); |
| (*defMap)[i] = kPseudoDefiningOperationConstantCopy0 + value; |
| break; |
| } |
| case V1_3::OperandLifeTime::TEMPORARY_VARIABLE: |
| case V1_3::OperandLifeTime::SUBGRAPH_INPUT: |
| case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT: |
| // already handled |
| break; |
| default: |
| FAIL(); |
| break; |
| } |
| } |
| // validity check |
| ASSERT_EQ(model->operandCount(), defMap->size()); |
| } |
| |
| #ifdef VERBOSE |
| void dump(const char* name, const std::map<uint32_t, uint32_t>* aMap) { |
| auto writeNum = [](uint32_t num) { |
| if (num >= kPseudoDefiningOperationBase) { |
| std::cout << "0x" << std::hex << num << std::dec; |
| } else { |
| std::cout << num; |
| } |
| }; |
| |
| std::cout << name << ": { "; |
| bool gotOne = false; |
| for (const auto& entry : *aMap) { |
| if (gotOne) { |
| std::cout << ", "; |
| } else { |
| gotOne = true; |
| } |
| std::cout << "("; |
| writeNum(entry.first); |
| std::cout << ", "; |
| writeNum(entry.second); |
| std::cout << ")"; |
| } |
| std::cout << " }" << std::endl; |
| } |
| #endif |
| |
| bool compare(const Operand& operandA, const Operand& operandB) { |
| if (operandA.type != operandB.type || operandA.dimensions != operandB.dimensions || |
| operandA.scale != operandB.scale || operandA.zeroPoint != operandB.zeroPoint) { |
| return false; |
| } |
| return true; |
| } |
| |
| // Compare two graphs. We ignore operand and operation indexes (i.e., |
| // two nodes can be the same even if they are numbered differently) |
| // but we also ignore semantics (e.g., even if an operation kind is |
| // such that the operand is commutative, we still pay attention to the |
| // order of its input operands). |
| // |
| // The comparison algorithm works by walking modelA from outputs |
| // towards inputs, along the edge from each operand to its |
| // defining operation, and then along the edges to the operation's |
| // input operands. At each step along the way, we try to match up |
| // operands and operations from modelA with equivalent operands |
| // and operations from modelB. |
| // |
| // We start by assuming that modelA's outputs and modelB's outputs |
| // match positionally (e.g., modelA's first output operand is |
| // equivalent to modelB's first output operand). Once we've |
| // discovered two equivalent operands (such as those outputs), we |
| // place them in a work queue. We repeatedly pull operands off |
| // the queue and compare their defining operations and those |
| // operations' input operands, to discover more pairs of |
| // equivalent operands. If we ever find operations that do not |
| // match (e.g., because operation kind differs), or operands that |
| // do not match (e.g., because operand type differs); or if we |
| // ever find a conflict (we've already decided that operand A's |
| // equivalent operand is B0, but it looks like we need its |
| // equivalent operand to be B1); then the graphs compare unequal. |
| // Otherwise, we'll eventually exhaust the work queue, and |
| // conclude that the graphs compare equal. |
| // |
| // As a side effect of the comparison, we produce a map |
| // *inputsAndOutputsBToA that maps from each of the model input and output |
| // operand numbers of modelB to the corresponding operand numbers of modelA. |
| // If the comparison returns false, the contents of the map are undefined. |
| bool compare(const ModelBuilder* modelA, const ModelBuilder* modelB, |
| std::map<uint32_t, uint32_t>* inputsAndOutputsBToA) { |
| CHECK(inputsAndOutputsBToA != nullptr); |
| EXPECT_TRUE(inputsAndOutputsBToA->empty()); |
| |
| #ifdef VERBOSE |
| ::dump("compare(A)", modelA); |
| ::dump("compare(B)", modelB); |
| #endif |
| |
| if (modelA->operandCount() != modelB->operandCount() || |
| modelA->operationCount() != modelB->operationCount() || |
| modelA->inputCount() != modelB->inputCount() || |
| modelA->outputCount() != modelB->outputCount()) { |
| RETURN_FALSE(); |
| } |
| |
| // Maps from operand index to index of defining operation. |
| std::map<uint32_t, uint32_t> defsA, defsB; |
| buildDefinitionMap(modelA, &defsA); |
| buildDefinitionMap(modelB, &defsB); |
| if (HasFatalFailure()) return false; |
| |
| // Maps from operand index in modelA to equivalent operand index |
| // in modelB; and from operation index in modelA to equivalent |
| // operation index in modelB. |
| std::map<uint32_t, uint32_t> equivalentOperandsAToB; |
| std::map<uint32_t, uint32_t> equivalentOperationsAToB; |
| |
| // Queue of operand indexes from modelA, each of whose defining |
| // operations are to be checked for equivalence with modelB. |
| std::queue<uint32_t> workQueueOperandsA; |
| |
| // Seed operand equivalence map and work queue from model outputs. |
| for (uint32_t i = 0, e = modelA->outputCount(); i < e; i++) { |
| uint32_t outputA = modelA->getOutputOperandIndex(i); |
| uint32_t outputB = modelB->getOutputOperandIndex(i); |
| if (!compare(modelA->getOperand(outputA), modelB->getOperand(outputB))) { |
| #ifdef VERBOSE |
| std::cout << "modelA.output[" << i << "] = operand[" << outputA |
| << "] = " << toString(modelA->getOperand(outputA)) << std::endl; |
| std::cout << "modelB.output[" << i << "] = operand[" << outputB |
| << "] = " << toString(modelB->getOperand(outputB)) << std::endl; |
| #endif |
| RETURN_FALSE(); |
| } |
| equivalentOperandsAToB[outputA] = outputB; |
| workQueueOperandsA.push(outputA); |
| } |
| |
| #ifdef VERBOSE |
| dump("defsA", &defsA); |
| dump("defsB", &defsB); |
| #endif |
| |
| // Process the queue. |
| uint32_t pseudoDefinitionCount = 0; |
| while (!workQueueOperandsA.empty()) { |
| #ifdef VERBOSE |
| dump("equivalentOperandsAToB", &equivalentOperandsAToB); |
| dump("equivalentOperationsAToB", &equivalentOperationsAToB); |
| #endif |
| uint32_t operandIndexA = workQueueOperandsA.front(); |
| #ifdef VERBOSE |
| std::cout << "operandIndexA: " << operandIndexA << std::endl; |
| #endif |
| workQueueOperandsA.pop(); |
| uint32_t operandIndexB = equivalentOperandsAToB.at(operandIndexA); |
| |
| uint32_t operationIndexA = defsA.at(operandIndexA); |
| uint32_t operationIndexB = defsB.at(operandIndexB); |
| auto it = equivalentOperationsAToB.find(operationIndexA); |
| if (it != equivalentOperationsAToB.end()) { |
| if (it->second != operationIndexB) { |
| RETURN_FALSE(); |
| } |
| continue; |
| } |
| |
| // We haven't identified an equivalent operation for |
| // operationIndexA. |
| |
| if ((operationIndexA >= kPseudoDefiningOperationBase) != |
| (operationIndexB >= kPseudoDefiningOperationBase)) { |
| RETURN_FALSE(); |
| } |
| // Either both operands have pseudo-definitions, or neither |
| // does. |
| if (operationIndexA >= kPseudoDefiningOperationBase) { |
| // Both operands have pseudo-definitions. |
| if (operationIndexA != operationIndexB) { |
| RETURN_FALSE(); |
| } |
| equivalentOperationsAToB[operationIndexA] = operationIndexB; |
| ++pseudoDefinitionCount; |
| continue; |
| } |
| |
| // If we get here, neither operation A nor operation B is a |
| // pseudo-definition. |
| |
| const Operation& operationA = modelA->getOperation(operationIndexA); |
| const Operation& operationB = modelB->getOperation(operationIndexB); |
| if (operationA.type != operationB.type || |
| operationA.inputs.size() != operationB.inputs.size() || |
| operationA.outputs.size() != operationB.outputs.size()) { |
| RETURN_FALSE(); |
| } |
| equivalentOperationsAToB[operationIndexA] = operationIndexB; |
| for (uint32_t i = 0, e = operationA.inputs.size(); i < e; i++) { |
| uint32_t inputA = operationA.inputs[i]; |
| uint32_t inputB = operationB.inputs[i]; |
| auto it = equivalentOperandsAToB.find(inputA); |
| if (it != equivalentOperandsAToB.end()) { |
| if (it->second != inputB) { |
| RETURN_FALSE(); |
| } |
| continue; |
| } |
| // We haven't identified an equivalent operand for inputA. |
| if (!compare(modelA->getOperand(inputA), modelB->getOperand(inputB))) { |
| #ifdef VERBOSE |
| std::cout << "modelA.input[" << i << "] = operand[" << inputA |
| << "] = " << toString(modelA->getOperand(inputA)) << std::endl; |
| std::cout << "modelB.input[" << i << "] = operand[" << inputB |
| << "] = " << toString(modelB->getOperand(inputB)) << std::endl; |
| #endif |
| RETURN_FALSE(); |
| } |
| equivalentOperandsAToB[inputA] = inputB; |
| workQueueOperandsA.push(inputA); |
| } |
| } |
| |
| // Validity check |
| if (modelA->operandCount() != defsA.size() || modelA->operandCount() != defsB.size() || |
| modelA->operandCount() != equivalentOperandsAToB.size() || |
| modelA->operationCount() + pseudoDefinitionCount != equivalentOperationsAToB.size()) { |
| RETURN_FALSE(); |
| } |
| |
| // Build *inputsAndOutputsBToA |
| for (uint32_t aInputIndex : modelA->getInputOperandIndexes()) { |
| (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aInputIndex)] = aInputIndex; |
| } |
| for (uint32_t aOutputIndex : modelA->getOutputOperandIndexes()) { |
| (*inputsAndOutputsBToA)[equivalentOperandsAToB.at(aOutputIndex)] = aOutputIndex; |
| } |
| |
| RETURN_TRUE(); |
| } |
| |
| /*-------------------------------------------------------------------------------------*/ |
| |
| // As a side effect of the comparison, we produce a map |
| // *inputsAndOutputsModelToStep that maps from each of the model input and |
| // output operand numbers of "model" to the corresponding operand numbers of |
| // the step model from "step". If the comparison returns false, the contents |
| // of the map are undefined. |
| bool compare(const ExecutionStep* step, const PartitioningModel* model, |
| std::shared_ptr<Device> device, |
| std::map<uint32_t, uint32_t>* inputsAndOutputsModelToStep) { |
| return (step->getDevice() == device) && |
| compare(step->getStepModel(), |
| reinterpret_cast<const ModelBuilder*>(model->getHandle()), |
| inputsAndOutputsModelToStep); |
| } |
| |
| void compare(const std::shared_ptr<LogicalStep> logicalStep, const PartitioningModel* model, |
| std::shared_ptr<Device> device, const RemapVectorType& modelInputs, |
| const RemapVectorType& modelOutputs, const RemapVectorType& tempsAsStepModelInputs, |
| const StepModelOutputSetType& tempsAsStepModelOutputs, |
| const RemapVectorType& outputsAsStepModelInputs, |
| const std::set<uint32_t>& modelOutputsThatAreDownstreamInputs) { |
| ASSERT_TRUE(logicalStep->isExecution()); |
| const ExecutionStep* step = logicalStep->executionStep(); |
| std::map<uint32_t, uint32_t> inputsAndOutputsModelToStep; |
| ASSERT_NO_FATAL_FAILURE( |
| ASSERT_TRUE(compare(step, model, device, &inputsAndOutputsModelToStep))); |
| ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelInputs(), |
| modelInputs)); |
| ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, step->getModelOutputs(), |
| modelOutputs)); |
| ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, |
| step->getTempsAsStepModelInputs(), tempsAsStepModelInputs)); |
| ASSERT_TRUE(compareStepModelOutputSets(inputsAndOutputsModelToStep, |
| step->getTempsAsStepModelOutputs(), |
| tempsAsStepModelOutputs)); |
| ASSERT_TRUE(compareRemapVectors(inputsAndOutputsModelToStep, |
| step->getOutputsAsStepModelInputs(), |
| outputsAsStepModelInputs)); |
| ASSERT_TRUE(modelOutputsThatAreDownstreamInputs == |
| step->getModelOutputsThatAreDownstreamInputs()); |
| } |
| |
| private: |
| static bool compareRemapVectors(const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep, |
| const RemapVectorType& step, RemapVectorType model) { |
| std::transform(model.begin(), model.end(), model.begin(), |
| [&inputsAndOutputsModelToStep](const RemapVectorType::value_type& val) { |
| return std::make_pair(val.first, |
| inputsAndOutputsModelToStep.at(val.second)); |
| }); |
| return step == model; |
| } |
| |
| static bool compareStepModelOutputSets( |
| const std::map<uint32_t, uint32_t>& inputsAndOutputsModelToStep, |
| const StepModelOutputSetType& step, const StepModelOutputSetType& model) { |
| StepModelOutputSetType modelTransformed; |
| std::transform( |
| model.begin(), model.end(), std::inserter(modelTransformed, modelTransformed.end()), |
| [&inputsAndOutputsModelToStep](const StepModelOutputSetType::value_type& val) { |
| return std::make_pair(val.first, inputsAndOutputsModelToStep.at(val.second)); |
| }); |
| return step == modelTransformed; |
| } |
| }; |
| |
| TEST_F(PartitioningTest, SimpleModel) { |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1); |
| uint32_t opnd3 = model.addFloatOperand(); |
| uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3); |
| model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // Simple partition (two devices are each capable of everything, one is the best). |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| const auto devicesA = makeDevices({{"bad", 0.9, ~0U}, {"good", 0.5, ~0U}}); |
| ExecutionPlan planA; |
| ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &planA), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr); |
| ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "good"); |
| |
| // Simple partition (two devices are each capable of everything, none better than CPU). |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| const auto devicesC = makeDevices({{"bad", 1.1, ~0U}, {"bad2", 1.0, ~0U}}); |
| ExecutionPlan planC; |
| ASSERT_EQ(model.partitionTheWork(devicesC, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &planC), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(planC.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(planC.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(planC.forTest_simpleGetDevice(), DeviceManager::getCpuDevice()); |
| |
| // Compound partition (two devices, each is capable of one of the |
| // two operations). We could do more extensive checking here -- |
| // for example, verify that each step within the plan has the |
| // correct (model and step model)x(inputs and outputs). |
| const auto devicesB = makeDevices({{"0", 0.9, 1 << 0}, {"1", 0.5, 1 << 1}}); |
| ExecutionPlan planB; |
| ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &planB), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& stepsB = planB.forTest_compoundGetSteps(); |
| ASSERT_EQ(stepsB.size(), size_t(2)); |
| { |
| // Build a model to compare against the step model from stepsB[0]. |
| PartitioningModel modelB0; |
| uint32_t b0Opnd0 = modelB0.addFloatOperand(); |
| uint32_t b0Opnd1 = modelB0.addFloatOperand(); |
| uint32_t b0Opnd2 = modelB0.addOperation2To1V1_0(0, b0Opnd0, b0Opnd1); |
| modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2}); |
| modelB0.finish(); |
| ASSERT_TRUE(modelB0.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsB[0], &modelB0, devicesB[0], |
| RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs |
| RemapVectorType{}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{{opnd2, b0Opnd2}}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from stepsB[1]. |
| PartitioningModel modelB1; |
| uint32_t b1Opnd2 = modelB1.addFloatOperand(); |
| uint32_t b1Opnd3 = modelB1.addFloatOperand(); |
| uint32_t b1Opnd4 = modelB1.addOperation2To1V1_0(1, b1Opnd2, b1Opnd3); |
| // Note: In the partitioning algorithm, step model inputs follow |
| // model inputs. In the original model "model", opnd2 is not |
| // an input; so in the step model "modelB1", the corresponding |
| // input b1Opnd2 is a step model input, and must follow the |
| // model input b1Opnd3. |
| modelB1.identifyInputsAndOutputs({b1Opnd3, b1Opnd2}, {b1Opnd4}); |
| modelB1.finish(); |
| ASSERT_TRUE(modelB1.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE(compare( |
| stepsB[1], &modelB1, devicesB[1], RemapVectorType{{opnd3, b1Opnd3}}, // modelInputs |
| RemapVectorType{{opnd4, b1Opnd4}}, // modelOutputs |
| RemapVectorType{{opnd2, b1Opnd2}}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| } |
| |
| TEST_F(PartitioningTest, SliceModel) { |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1); |
| uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd0, opnd1); |
| uint32_t opnd4 = model.addOperation2To1V1_1(0, opnd0, opnd1); |
| uint32_t opnd5 = model.addOperation2To1V1_2(0, opnd2, opnd3); |
| uint32_t opnd6 = model.addOperation1To1V1_3(0, opnd2); |
| model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4, opnd5, opnd6}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // Simple partition (V1_0, V1_1, V1_2, V1_3 devices are available; V1_3 has best perf). |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| const auto devicesA = makeDevices({{"V1_0", 0.8, HalVersion::V1_0, ~0U}, |
| {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, |
| {"V1_2", 0.6, HalVersion::V1_2, ~0U, ~0U, ~0U}, |
| {"V1_3", 0.5, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); |
| ExecutionPlan planA; |
| ASSERT_EQ(model.partitionTheWork(devicesA, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &planA), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_NE(planA.forTest_simpleGetDevice().get(), nullptr); |
| ASSERT_EQ(planA.forTest_simpleGetDevice()->getName(), "V1_3"); |
| |
| // Compound partition (V1_0, V1_1, V1_2 devices are available, in decreasing |
| // order of performance; model is distributed across all three devices). |
| const auto devicesB = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U}, |
| {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, |
| {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}, |
| {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); |
| ExecutionPlan planB; |
| ASSERT_EQ(model.partitionTheWork(devicesB, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &planB), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(planB.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(planB.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& stepsB = planB.forTest_compoundGetSteps(); |
| ASSERT_EQ(stepsB.size(), size_t(4)); |
| { |
| // Build a model to compare against the step model from stepsB[0]. |
| PartitioningModel modelB0; |
| uint32_t b0Opnd0 = modelB0.addFloatOperand(); |
| uint32_t b0Opnd1 = modelB0.addFloatOperand(); |
| uint32_t b0Opnd2 = modelB0.addOperation2To1V1_1(0, b0Opnd0, b0Opnd1); |
| modelB0.identifyInputsAndOutputs({b0Opnd0, b0Opnd1}, {b0Opnd2}); |
| modelB0.finish(); |
| ASSERT_TRUE(modelB0.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsB[0], &modelB0, devicesB[1], |
| RemapVectorType{{opnd0, b0Opnd0}, {opnd1, b0Opnd1}}, // modelInputs |
| RemapVectorType{{opnd4, b0Opnd2}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from stepsB[1]. |
| PartitioningModel modelB1; |
| uint32_t b1Opnd0 = modelB1.addFloatOperand(); |
| uint32_t b1Opnd1 = modelB1.addFloatOperand(); |
| uint32_t b1Opnd2 = modelB1.addOperation2To1V1_0(0, b1Opnd0, b1Opnd1); |
| uint32_t b1Opnd3 = modelB1.addOperation2To1V1_0(1, b1Opnd0, b1Opnd1); |
| modelB1.identifyInputsAndOutputs({b1Opnd0, b1Opnd1}, {b1Opnd2, b1Opnd3}); |
| modelB1.finish(); |
| ASSERT_TRUE(modelB1.isValid()); |
| |
| // Note that this is also an important test that we can detect |
| // modelOutputsThatAreDownstreamInputs. |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsB[1], &modelB1, devicesB[0], |
| RemapVectorType{{opnd0, b1Opnd0}, {opnd1, b1Opnd1}}, // modelInputs |
| RemapVectorType{{opnd2, b1Opnd2}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{{opnd3, b1Opnd3}}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {0u})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from stepsB[2]. |
| PartitioningModel modelB2; |
| uint32_t b2Opnd0 = modelB2.addFloatOperand(); |
| uint32_t b2Opnd1 = modelB2.addOperation1To1V1_3(0, b2Opnd0); |
| // Note: In the partitioning algorithm, temps that are |
| // step model inputs precede model outputs that are step model |
| // inputs. |
| modelB2.identifyInputsAndOutputs({b2Opnd0}, {b2Opnd1}); |
| modelB2.finish(); |
| ASSERT_TRUE(modelB2.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsB[2], &modelB2, devicesB[3], RemapVectorType{}, // modelInputs |
| RemapVectorType{{opnd6, b2Opnd1}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{{opnd2, b2Opnd0}}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from stepsB[3]. |
| PartitioningModel modelB3; |
| uint32_t b3Opnd0 = modelB3.addFloatOperand(); |
| uint32_t b3Opnd1 = modelB3.addFloatOperand(); |
| uint32_t b3Opnd2 = modelB3.addOperation2To1V1_2(0, b3Opnd0, b3Opnd1); |
| // Note: In the partitioning algorithm, temps that are |
| // step model inputs precede model outputs that are step model |
| // inputs. In the original model "model", opnd3 is a temp and |
| // opnd2 is a model output; so in the step model "modelB3", the |
| // corresponding inputs b3Opnd1 and b3Opnd0 must appear in |
| // that order. |
| modelB3.identifyInputsAndOutputs({b3Opnd1, b3Opnd0}, {b3Opnd2}); |
| modelB3.finish(); |
| ASSERT_TRUE(modelB3.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsB[3], &modelB3, devicesB[2], RemapVectorType{}, // modelInputs |
| RemapVectorType{{opnd5, b3Opnd2}}, // modelOutputs |
| RemapVectorType{{opnd3, b3Opnd1}}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{{opnd2, b3Opnd0}}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| |
| // TODO: Make sure this still works when we have multiple devices |
| // of same version available for slicing. An easy (?) choice would |
| // be to route the two different V1_0 operations to different |
| // devices. |
| } |
| |
| TEST_F(PartitioningTest, SliceModelToEmpty) { |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0); |
| model.identifyInputsAndOutputs({opnd0}, {opnd1}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // Only the V1_3 device can handle any operations in the model. |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| const auto devices = makeDevices({{"V1_0", 0.6, HalVersion::V1_0, ~0U}, |
| {"V1_1", 0.7, HalVersion::V1_1, ~0U, ~0U}, |
| {"V1_2", 0.8, HalVersion::V1_2, ~0U, ~0U, ~0U}, |
| {"V1_3", 0.9, HalVersion::V1_3, ~0U, ~0U, ~0U, ~0U}}); |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_NE(plan.forTest_simpleGetDevice().get(), nullptr); |
| ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "V1_3"); |
| } |
| |
| TEST_F(PartitioningTest, Cpu) { |
| // Here's a model where some operations execute only on the Cpu. |
| // To make things interesting, we produce three partitions -- |
| // device, cpu, same-device. |
| |
| static const uint32_t kCpuOp = 1; |
| static const uint32_t kDevOp = 2; |
| |
| const auto devices = makeDevices({{"1", 0.5, 1 << kDevOp}}); |
| |
| PartitioningModel model; |
| |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| |
| uint32_t opnd2 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd1); |
| uint32_t opnd3 = model.addOperation2To1V1_0(kDevOp, opnd0, opnd2); |
| |
| uint32_t opnd4 = model.addOperation2To1V1_0(kCpuOp, opnd0, opnd3); |
| uint32_t opnd5 = model.addOperation2To1V1_0(kCpuOp, opnd2, opnd4); |
| |
| uint32_t opnd6 = model.addFloatOperand(); |
| |
| uint32_t opnd7 = model.addOperation2To1V1_0(kDevOp, opnd3, opnd5); |
| uint32_t opnd8 = model.addOperation2To1V1_0(kDevOp, opnd6, opnd7); |
| |
| model.identifyInputsAndOutputs({opnd0, opnd1, opnd6}, {opnd4, opnd8}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& steps = plan.forTest_compoundGetSteps(); |
| ASSERT_EQ(steps.size(), size_t(3)); |
| { |
| const auto& step0 = steps[0]; |
| |
| // Build a model to compare against the step model from steps[0]. |
| PartitioningModel model0; |
| uint32_t m0Opnd0 = model0.addFloatOperand(); |
| uint32_t m0Opnd1 = model0.addFloatOperand(); |
| uint32_t m0Opnd2 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd1); |
| uint32_t m0Opnd3 = model0.addOperation2To1V1_0(kDevOp, m0Opnd0, m0Opnd2); |
| model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2, m0Opnd3}); |
| model0.finish(); |
| ASSERT_TRUE(model0.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(step0, &model0, devices[0], |
| RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs |
| RemapVectorType{}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{{opnd2, m0Opnd2}, |
| {opnd3, m0Opnd3}}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| const auto& step1 = steps[1]; |
| |
| // Build a model to compare against the step model from steps[1]. |
| PartitioningModel model1; |
| uint32_t m1Opnd0 = model1.addFloatOperand(); |
| uint32_t m1Opnd3 = model1.addFloatOperand(); |
| uint32_t m1Opnd4 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd0, m1Opnd3); |
| uint32_t m1Opnd2 = model1.addFloatOperand(); |
| uint32_t m1Opnd5 = model1.addOperation2To1V1_0(kCpuOp, m1Opnd2, m1Opnd4); |
| model1.identifyInputsAndOutputs({m1Opnd0, m1Opnd3, m1Opnd2}, {m1Opnd4, m1Opnd5}); |
| model1.finish(); |
| ASSERT_TRUE(model1.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE(compare( |
| step1, &model1, DeviceManager::getCpuDevice(), |
| RemapVectorType{{opnd0, m1Opnd0}}, // modelInputs |
| RemapVectorType{{opnd4, m1Opnd4}}, // modelOutputs |
| RemapVectorType{{opnd3, m1Opnd3}, {opnd2, m1Opnd2}}, // tempsAsStepModelInputs |
| StepModelOutputSetType{{opnd5, m1Opnd5}}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| const auto& step2 = steps[2]; |
| |
| // Build a model to compare against the step model from steps[2]. |
| PartitioningModel model2; |
| uint32_t m2Opnd3 = model2.addFloatOperand(); |
| uint32_t m2Opnd5 = model2.addFloatOperand(); |
| uint32_t m2Opnd7 = model2.addOperation2To1V1_0(kDevOp, m2Opnd3, m2Opnd5); |
| uint32_t m2Opnd6 = model2.addFloatOperand(); |
| uint32_t m2Opnd8 = model2.addOperation2To1V1_0(kDevOp, m2Opnd6, m2Opnd7); |
| model2.identifyInputsAndOutputs({m2Opnd6, m2Opnd3, m2Opnd5}, {m2Opnd8}); |
| model2.finish(); |
| ASSERT_TRUE(model2.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE(compare( |
| step2, &model2, devices[0], RemapVectorType{{opnd6, m2Opnd6}}, // modelInputs |
| RemapVectorType{{opnd8, m2Opnd8}}, // modelOutputs |
| RemapVectorType{{opnd3, m2Opnd3}, {opnd5, m2Opnd5}}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| } |
| |
| TEST_F(PartitioningTest, SetPartitioning) { |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1, Dimensioned::NO); |
| uint32_t opnd3 = model.addFloatOperand(); |
| uint32_t opnd4 = model.addOperation2To1V1_0(1, opnd2, opnd3); |
| model.identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // One device that can and should execute operation 0. |
| const auto devices = makeDevices({{"hw", 0.5, (1 << 0)}}); |
| |
| // Test kPartitioningNo. We should not even attempt partitioning, |
| // so there should be a SIMPLE plan on CPU. |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| PartitioningCompilation cPNo(&model, devices); |
| ASSERT_EQ(cPNo.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR); |
| ASSERT_EQ(cPNo.failPartitioning(), Result::NO_ERROR); |
| ASSERT_EQ(cPNo.finish(), Result::NO_ERROR); |
| ASSERT_EQ(cPNo.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(cPNo.getExecutionPlan().forTest_simpleGetDevice(), DeviceManager::getCpuDevice()); |
| |
| // Test kPartitioningWithFallback. We should attempt partitioning, simulate |
| // a recoverable failure, then fallback to CPU with a SIMPLE plan, and |
| // finally return success. No need to compare the original model to the |
| // model from the plan -- we didn't actually do any partitioning. |
| PartitioningCompilation cPWithFallback(&model, devices); |
| ASSERT_EQ(cPWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback), |
| Result::NO_ERROR); |
| ASSERT_EQ(cPWithFallback.failPartitioning(), Result::NO_ERROR); |
| ASSERT_EQ(cPWithFallback.finish(), Result::NO_ERROR); |
| ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(cPWithFallback.getExecutionPlan().forTest_simpleGetDevice(), |
| DeviceManager::getCpuDevice()); |
| |
| // Test kPartitioningWithoutFallback. We should attempt partitioning, |
| // simulate a recoverable failure, and fail. |
| PartitioningCompilation cPWithoutFallback(&model, devices); |
| ASSERT_EQ(cPWithoutFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), |
| Result::NO_ERROR); |
| ASSERT_EQ(cPWithoutFallback.failPartitioning(), Result::NO_ERROR); |
| ASSERT_EQ(cPWithoutFallback.finish(), Result::OP_FAILED); |
| ASSERT_EQ(cPWithoutFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::ERROR); |
| } |
| |
| // Regression test for http://b/69166603: |
| // "partitioned compilation and execution yields wrong results when model output is step model |
| // input" |
| TEST_F(PartitioningTest, ModelOutputAsStepModelInput) { |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1); |
| uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd2, opnd2); |
| model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd3}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // Compound partition (two devices, each is capable of one of the |
| // two operations). We could do more extensive checking here -- |
| // for example, verify that each step within the plan has the |
| // correct (model and step model)x(inputs and outputs). |
| const auto devices = makeDevices({{"0", 0.5, 1 << 0}, {"1", 0.5, 1 << 1}}); |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& steps = plan.forTest_compoundGetSteps(); |
| ASSERT_EQ(steps.size(), size_t(2)); |
| { |
| // Build a model to compare against the step model from steps[0]. |
| PartitioningModel model0; |
| uint32_t m0Opnd0 = model0.addFloatOperand(); |
| uint32_t m0Opnd1 = model0.addFloatOperand(); |
| uint32_t m0Opnd2 = model0.addOperation2To1V1_0(0, m0Opnd0, m0Opnd1); |
| model0.identifyInputsAndOutputs({m0Opnd0, m0Opnd1}, {m0Opnd2}); |
| model0.finish(); |
| ASSERT_TRUE(model0.isValid()); |
| ASSERT_NO_FATAL_FAILURE( |
| compare(steps[0], &model0, devices[0], |
| RemapVectorType{{opnd0, m0Opnd0}, {opnd1, m0Opnd1}}, // modelInputs |
| RemapVectorType{{opnd2, m0Opnd2}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {0u})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from steps[1]. |
| PartitioningModel model1; |
| uint32_t m1Opnd2 = model1.addFloatOperand(); |
| uint32_t m1Opnd3 = model1.addOperation2To1V1_0(1, m1Opnd2, m1Opnd2); |
| model1.identifyInputsAndOutputs({m1Opnd2}, {m1Opnd3}); |
| model1.finish(); |
| ASSERT_TRUE(model1.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(steps[1], &model1, devices[1], RemapVectorType{}, // modelInputs |
| RemapVectorType{{opnd3, m1Opnd3}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{{opnd2, m1Opnd2}}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| } |
| |
| TEST_F(PartitioningTest, OemOperations) { |
| // Trivial model consisting solely of OEM operation. |
| PartitioningModel model; |
| uint32_t opndIn = model.addFloatOperand(); |
| uint32_t opndOut = model.addOperationOEM1To1(opndIn); |
| model.identifyInputsAndOutputs({opndIn}, {opndOut}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| // Verify that the best driver than can run an OEM operation is |
| // used, even if it is not better than the CPU. |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| const auto devicesBestOEM = makeDevices({{"badOEM", 1.5, ~0U, PartitioningDriver::OEMYes}, |
| {"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}, |
| {"goodOEM", 1.2, ~0U, PartitioningDriver::OEMYes}}); |
| PartitioningCompilation compilationBestOEM(&model, devicesBestOEM); |
| ASSERT_EQ(compilationBestOEM.finish(), Result::NO_ERROR); |
| const auto& planBestOEM = compilationBestOEM.getExecutionPlan(); |
| ASSERT_EQ(planBestOEM.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_NE(planBestOEM.forTest_simpleGetDevice().get(), nullptr); |
| ASSERT_EQ(planBestOEM.forTest_simpleGetDevice()->getName(), "goodOEM"); |
| |
| // Verify that we get an error if no driver can run an OEM operation. |
| const auto devicesNoOEM = makeDevices({{"noOEM", 0.5, ~0U, PartitioningDriver::OEMNo}}); |
| PartitioningCompilation compilationNoOEM(&model, devicesNoOEM); |
| ASSERT_EQ(compilationNoOEM.finish(), Result::BAD_DATA); |
| |
| // Verify that we get an error if a driver can SUPPORT but not PREPARE an OEM operation. |
| const auto devicesIndecisiveOEM = |
| makeDevices({{"indecisiveOEM", 0.5, ~0U, PartitioningDriver::OEMIndecisive}}); |
| PartitioningCompilation compilationIndecisiveOEM(&model, devicesIndecisiveOEM); |
| ASSERT_NE(compilationIndecisiveOEM.finish(), Result::NO_ERROR); |
| |
| // Verify that we get an error if there are no drivers (only CPU fallback). |
| PartitioningCompilation compilationNoDrivers(&model, makeDevices({}) /* no drivers */); |
| ASSERT_EQ(compilationNoDrivers.finish(), Result::BAD_DATA); |
| } |
| |
| TEST_F(PartitioningTest, RelaxedFP) { |
| const auto devices = makeDevices({// Best choice for non-relaxed model. |
| {"f32", 0.8, 0.9 /* relaxed */, ~0U}, |
| // Best choice for relaxed model. |
| {"f16", 0.9, 0.8 /* relaxed */, ~0U}}); |
| |
| auto TrivialTest = [&devices](bool doRelax, const char* expectDevice) { |
| // Trivial model consisting solely of one operation. |
| SCOPED_TRACE(expectDevice); |
| PartitioningModel model; |
| uint32_t opnd0 = model.addFloatOperand(); |
| uint32_t opnd1 = model.addFloatOperand(); |
| uint32_t opnd2 = model.addOperation2To1V1_0(0, opnd0, opnd1); |
| model.identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| model.relaxComputationFloat32toFloat16(doRelax); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| // Verify that the model will be executed on the appropriate device. |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), expectDevice); |
| }; |
| |
| ASSERT_NO_FATAL_FAILURE(TrivialTest(false, "f32")); |
| ASSERT_NO_FATAL_FAILURE(TrivialTest(true, "f16")); |
| } |
| |
| TEST_F(PartitioningTest, Perf) { |
| // The various type names used here are confusing. |
| // |
| // OperandType (from HAL file), WrapperType (from NeuralNetworksWrapper.h), |
| // and OperandCode (from NeuralNetworks.h) are different enums representing |
| // the same type kind -- e.g., OperandType::FLOAT32, WrapperType::FLOAT32, |
| // ANEURALNETWORKS_FLOAT32. Corresponding enumerators have the same value. |
| // |
| // WrapperOperandType is the NeuralNetworksWrapper.h representation of a |
| // full operand type (WrapperType plus dimensions plus other attributes). |
| |
| auto TestType = [](V1_3::OperandType operandType) { |
| if (operandType == V1_3::OperandType::SUBGRAPH) { |
| // SUBGRAPH capabilities are handled differently. |
| return; |
| } |
| SCOPED_TRACE(toString(operandType)); |
| // Trivial model consisting solely of OEM operation. We |
| // pick OEM operation because this allows us to use |
| // inputs and outputs of any number and type. |
| PartitioningModel model; |
| uint32_t opndIn = model.addOperand(static_cast<WrapperType>(operandType)); |
| uint32_t opndOut = model.addOperationOEM1To1(opndIn); |
| model.identifyInputsAndOutputs({opndIn}, {opndOut}); |
| model.finish(); |
| ASSERT_TRUE(model.isValid()); |
| |
| const V1_3::Capabilities baseCapabilities = ::android::nn::makeCapabilities(0.5); |
| |
| { |
| // better than base |
| V1_3::Capabilities goodCapabilities = baseCapabilities; |
| update(&goodCapabilities, operandType, 0.25); |
| |
| const auto devices = |
| makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes}, |
| {"good", goodCapabilities, ~0U, PartitioningDriver::OEMYes}}); |
| |
| // Verify that model will be executed on "good". |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "good"); |
| } |
| |
| { |
| // worse than base |
| V1_3::Capabilities badCapabilities = baseCapabilities; |
| update(&badCapabilities, operandType, 0.75); |
| const auto devices = |
| makeDevices({{"base", baseCapabilities, ~0U, PartitioningDriver::OEMYes}, |
| {"bad", badCapabilities, ~0U, PartitioningDriver::OEMYes}}); |
| |
| // Verify that model will be executed on "base". |
| // No need to compare the original model to the model from the plan -- we |
| // didn't actually do any partitioning. |
| ExecutionPlan plan; |
| ASSERT_EQ(model.partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| EXPECT_TRUE(plan.forTest_flatGetDynamicTemporaries().empty()); |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), "base"); |
| } |
| }; |
| |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) { |
| TestType(static_cast<V1_3::OperandType>(type)); |
| } |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) { |
| TestType(static_cast<V1_3::OperandType>(type)); |
| } |
| } |
| |
| TEST_F(PartitioningTest, ZeroInputStepModel) { |
| PartitioningModel model; |
| const uint32_t opnd0 = model.addFloatZeroOperand(); |
| const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0); |
| const uint32_t opnd2 = model.addFloatOperand(); |
| const uint32_t opnd3 = model.addOperation2To1V1_0(1, opnd1, opnd2); |
| model.identifyInputsAndOutputs({opnd2}, {opnd3}); |
| ASSERT_EQ(model.finish(), Result::NO_ERROR); |
| |
| // This will result in 2 partitions: deviceA handles op0, deviceB handles op1. |
| // The partition for deviceA does not have any model input, and should result in full CPU |
| // fallback. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| PartitioningCompilation compilation(&model, devices); |
| ASSERT_EQ(compilation.finish(), Result::NO_ERROR); |
| const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName(); |
| checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName}); |
| } |
| |
| TEST_F(PartitioningTest, ZeroOutputStepModel) { |
| PartitioningModel model; |
| const uint32_t opnd0 = model.addFloatOperand(); |
| const uint32_t opnd1 = model.addOperation1To1V1_3(0, opnd0); |
| const uint32_t opnd2 = model.addFloatOperand(); |
| model.addOperation2To1V1_0(1, opnd1, opnd2); |
| model.identifyInputsAndOutputs({opnd0, opnd2}, {opnd1}); |
| ASSERT_EQ(model.finish(), Result::NO_ERROR); |
| |
| // This will result in 2 partitions: deviceA handles op0, deviceB handles op1. |
| // The partition for deviceB does not have any model output, and should result in full CPU |
| // fallback. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| PartitioningCompilation compilation(&model, devices); |
| ASSERT_EQ(compilation.finish(), Result::NO_ERROR); |
| const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName(); |
| checkExecutionPlanSteps(compilation.getExecutionPlan(), {cpuDeviceName}); |
| } |
| |
| // Test dynamic temporaries and related parts of the partitioning implementation. |
| // |
| // opnd0 = model input // tensor to pad |
| // opnd1 = model input // padding |
| // opnd2 = PAD(opnd1, opnd0) // model output |
| // opnd3 = PAD(opnd1, opnd0) |
| // opnd4 = ADD(opnd2, opnd3, FUSED_NONE) // model output |
| class DynamicTemporariesTest : public PartitioningTest { |
| protected: |
| // Call these functions in sequence in order to perform the test. |
| // Call to declareOutputDimensions() can be omitted (see the default values below). |
| // Call to declareHalVersions() can be omitted (defaults to HalVersion::LATEST). |
| void declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified, |
| bool opnd3PartitionOutputSpecified, |
| bool opnd4ModelOutputSpecified); |
| void declareHalVersions(HalVersion padDeviceVersion, HalVersion addDeviceVersion); |
| void makeModelAndValidate(); |
| void compileModelAndComparePlan(bool noFallback = true); |
| void executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough, |
| bool opnd4ModelOutputBigEnough); |
| |
| // set by declareOutputDimensions() |
| bool mOpnd2ModelAndPartitionOutputSpecified = false; |
| bool mOpnd3PartitionOutputSpecified = false; |
| bool mOpnd4ModelOutputSpecified = false; |
| |
| // set by declareHalVersions() |
| HalVersion mPadDeviceVersion = HalVersion::LATEST; |
| HalVersion mAddDeviceVersion = HalVersion::LATEST; |
| HalVersion mMinDeviceVersion = HalVersion::LATEST; // minimum of the other two device versions |
| |
| // created by makeModelAndValidate() |
| std::optional<PartitioningModel> mModel; |
| std::vector<uint32_t> mOpnds; |
| |
| // created by compileModelAndComparePlan(); |
| std::optional<PartitioningCompilation> mCompilation; |
| |
| static bool supportsOutputOfUnknownRank(HalVersion version) { |
| return version >= HalVersion::V1_2; |
| } |
| |
| static Dimensioned dimensionedOutput(HalVersion version, bool specified) { |
| return specified ? Dimensioned::YES_4 |
| : supportsOutputOfUnknownRank(version) ? Dimensioned::NO |
| : Dimensioned::RANK_1; |
| } |
| }; |
| |
| void DynamicTemporariesTest::declareOutputDimensions(bool opnd2ModelAndPartitionOutputSpecified, |
| bool opnd3PartitionOutputSpecified, |
| bool opnd4ModelOutputSpecified) { |
| ASSERT_FALSE(mModel.has_value()); |
| mOpnd2ModelAndPartitionOutputSpecified = opnd2ModelAndPartitionOutputSpecified; |
| mOpnd3PartitionOutputSpecified = opnd3PartitionOutputSpecified; |
| mOpnd4ModelOutputSpecified = opnd4ModelOutputSpecified; |
| } |
| |
| void DynamicTemporariesTest::declareHalVersions(HalVersion padDeviceVersion, |
| HalVersion addDeviceVersion) { |
| ASSERT_FALSE(mModel.has_value()); |
| mPadDeviceVersion = padDeviceVersion; |
| mAddDeviceVersion = addDeviceVersion; |
| mMinDeviceVersion = min(padDeviceVersion, addDeviceVersion); |
| } |
| |
| void DynamicTemporariesTest::makeModelAndValidate() { |
| ASSERT_FALSE(mModel.has_value()); |
| mModel = PartitioningModel(); |
| |
| uint32_t opndActivation = mModel->addIntScalarOperand(ANEURALNETWORKS_FUSED_NONE); |
| |
| uint32_t opnd0 = mModel->addFloatOperand(Dimensioned::YES_2); // tensor to pad |
| uint32_t opnd1 = mModel->addIntOperand(Dimensioned::RANK_2); // paddings |
| uint32_t opnd2 = mModel->addExplicitOperationXTo1( |
| ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32, |
| dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified)); |
| uint32_t opnd3 = mModel->addExplicitOperationXTo1( |
| ANEURALNETWORKS_PAD, {opnd0, opnd1}, WrapperType::TENSOR_FLOAT32, |
| dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified)); |
| uint32_t opnd4 = mModel->addExplicitOperationXTo1( |
| ANEURALNETWORKS_ADD, {opnd2, opnd3, opndActivation}, WrapperType::TENSOR_FLOAT32, |
| dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified)); |
| mModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2, opnd4}); |
| mModel->finish(); |
| ASSERT_TRUE(mModel->isValid()); |
| |
| mOpnds = {opnd0, opnd1, opnd2, opnd3, opnd4}; |
| } |
| |
| void DynamicTemporariesTest::compileModelAndComparePlan(bool noFallback) { |
| ASSERT_TRUE(mModel.has_value()); |
| ASSERT_TRUE(!mCompilation.has_value()); |
| |
| auto devices = makeDevices({{"pad", |
| 0.9, |
| 0U, |
| PartitioningDriver::OEMNo, |
| mPadDeviceVersion, |
| {V1_3::OperationType::PAD}}, |
| {"add", |
| 0.9, |
| 0U, |
| PartitioningDriver::OEMNo, |
| mAddDeviceVersion, |
| {V1_3::OperationType::ADD}}}); |
| |
| mCompilation = PartitioningCompilation(&mModel.value(), devices); |
| ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithoutFallback), |
| Result::NO_ERROR); |
| if (noFallback) { |
| ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR); |
| const ExecutionPlan& planA = mCompilation->getExecutionPlan(); |
| EXPECT_TRUE(planA.forTest_flatGetDynamicTemporaries() == |
| (mOpnd3PartitionOutputSpecified ? DynamicTemporariesType{} |
| : DynamicTemporariesType{mOpnds[3]})); |
| ASSERT_EQ(planA.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| const auto& stepsA = planA.forTest_compoundGetSteps(); |
| ASSERT_EQ(stepsA.size(), size_t(2)); |
| { |
| // Build a model to compare against the step model from stepsA[0]. |
| PartitioningModel modelA0; |
| uint32_t a0Opnd0 = modelA0.addFloatOperand(Dimensioned::YES_2); |
| uint32_t a0Opnd1 = modelA0.addIntOperand(Dimensioned::RANK_2); |
| uint32_t a0Opnd2 = modelA0.addExplicitOperationXTo1( |
| ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32, |
| dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified)); |
| uint32_t a0Opnd3 = modelA0.addExplicitOperationXTo1( |
| ANEURALNETWORKS_PAD, {a0Opnd0, a0Opnd1}, WrapperType::TENSOR_FLOAT32, |
| dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified)); |
| modelA0.identifyInputsAndOutputs({a0Opnd0, a0Opnd1}, {a0Opnd3, a0Opnd2}); |
| modelA0.finish(); |
| ASSERT_TRUE(modelA0.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE(compare( |
| stepsA[0], &modelA0, devices[0], |
| RemapVectorType{{mOpnds[0], a0Opnd0}, {mOpnds[1], a0Opnd1}}, // modelInputs |
| RemapVectorType{{mOpnds[2], a0Opnd3}}, // modelOutputs |
| RemapVectorType{}, // tempsAsStepModelInputs |
| StepModelOutputSetType{{mOpnds[3], a0Opnd2}}, // tempsAsStepModelOutputs |
| RemapVectorType{}, // outputsAsStepModelInputs |
| {0u})); // modelOutputsThatAreDownstreamInputs |
| } |
| { |
| // Build a model to compare against the step model from stepsA[1]. |
| PartitioningModel modelA1; |
| uint32_t a1Opnd2 = modelA1.addFloatOperand( |
| dimensionedOutput(mMinDeviceVersion, mOpnd2ModelAndPartitionOutputSpecified)); |
| uint32_t a1Opnd3 = modelA1.addFloatOperand( |
| dimensionedOutput(mMinDeviceVersion, mOpnd3PartitionOutputSpecified)); |
| uint32_t a1Opnd4 = modelA1.addOperation2To1V1_0( |
| 0, a1Opnd2, a1Opnd3, |
| dimensionedOutput(mMinDeviceVersion, mOpnd4ModelOutputSpecified)); |
| modelA1.identifyInputsAndOutputs({a1Opnd3, a1Opnd2}, {a1Opnd4}); |
| modelA1.finish(); |
| ASSERT_TRUE(modelA1.isValid()); |
| |
| ASSERT_NO_FATAL_FAILURE( |
| compare(stepsA[1], &modelA1, devices[1], RemapVectorType{}, // modelInputs |
| RemapVectorType{{mOpnds[4], a1Opnd4}}, // modelOutputs |
| RemapVectorType{{mOpnds[3], a1Opnd3}}, // tempsAsStepModelInputs |
| StepModelOutputSetType{}, // tempsAsStepModelOutputs |
| RemapVectorType{{mOpnds[2], a1Opnd2}}, // outputsAsStepModelInputs |
| {})); // modelOutputsThatAreDownstreamInputs |
| } |
| } else { |
| ASSERT_EQ(mCompilation->finish(), Result::OP_FAILED); |
| // Try again, expecting fallback. |
| mCompilation = PartitioningCompilation(&mModel.value(), devices); |
| ASSERT_EQ(mCompilation->setPartitioning(DeviceManager::kPartitioningWithFallback), |
| Result::NO_ERROR); |
| ASSERT_EQ(mCompilation->finish(), Result::NO_ERROR); |
| ASSERT_EQ(mCompilation->getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(mCompilation->getExecutionPlan().forTest_simpleGetDevice(), |
| DeviceManager::getCpuDevice()); |
| } |
| } |
| |
| void DynamicTemporariesTest::executeCompilationAndCompareOutput(bool opnd2ModelOutputBigEnough, |
| bool opnd4ModelOutputBigEnough) { |
| ASSERT_TRUE(opnd2ModelOutputBigEnough || !mOpnd2ModelAndPartitionOutputSpecified); |
| ASSERT_TRUE(opnd4ModelOutputBigEnough || !mOpnd4ModelOutputSpecified); |
| |
| ASSERT_TRUE(mCompilation.has_value()); |
| WrapperExecution e(&mCompilation.value()); |
| |
| WrapperOperandType padTensorValueType(WrapperType::TENSOR_FLOAT32, {2}); |
| const float padTensorValue[] = {3.0f, 5.0f}; |
| e.setInput(0, &padTensorValue, &padTensorValueType.operandType); |
| |
| WrapperOperandType paddingsType(WrapperType::TENSOR_INT32, {1, 2}); |
| const int paddings[1][2] = {{1, 1}}; |
| e.setInput(1, &paddings, &paddingsType.operandType); |
| |
| auto setOutput = [&e](uint32_t index, float* buffer, bool bigEnough, bool specified, |
| HalVersion version) { |
| const uint32_t elts = bigEnough ? 4 : 3; |
| std::fill(buffer, buffer + elts, -1.0f); |
| using DimsType = std::vector<uint32_t>; |
| WrapperOperandType outputType( |
| WrapperType::TENSOR_FLOAT32, |
| specified ? DimsType{elts} |
| : supportsOutputOfUnknownRank(version) ? DimsType{} : DimsType{0}); |
| e.setOutput(index, buffer, elts * sizeof(float), &outputType.operandType); |
| }; |
| float opnd2ModelOutput[4], opnd4ModelOutput[4]; |
| setOutput(0, opnd2ModelOutput, opnd2ModelOutputBigEnough, |
| mOpnd2ModelAndPartitionOutputSpecified, mPadDeviceVersion); |
| setOutput(1, opnd4ModelOutput, opnd4ModelOutputBigEnough, mOpnd4ModelOutputSpecified, |
| mAddDeviceVersion); |
| |
| const Result expectResult = opnd2ModelOutputBigEnough && opnd4ModelOutputBigEnough |
| ? Result::NO_ERROR |
| : Result::OUTPUT_INSUFFICIENT_SIZE; |
| ASSERT_EQ(e.compute(), expectResult); |
| if (expectResult == Result::NO_ERROR) { |
| float expected[4] = {0.0f, padTensorValue[0], padTensorValue[1], 0.0f}; |
| ASSERT_TRUE(std::equal(std::begin(opnd2ModelOutput), std::end(opnd2ModelOutput), |
| std::begin(expected))); |
| for (auto& elt : expected) { |
| elt *= 2; |
| } |
| ASSERT_TRUE(std::equal(std::begin(opnd4ModelOutput), std::end(opnd4ModelOutput), |
| std::begin(expected))); |
| } |
| } |
| |
| TEST_F(DynamicTemporariesTest, ModelOutputsSufficientSize) { |
| // The purpose of this test is to confirm that the partitioner and the |
| // runtime can handle a model output of unspecified dimensions but |
| // sufficient size that is written by one partition and read by another. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false, |
| /*opnd3PartitionOutputSpecified=*/true, |
| /*opnd4ModelOutputSpecified=*/false)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| // TODO(b/174851714): Fix the partitioner and re-enable this test. |
| TEST_F(DynamicTemporariesTest, DISABLED_ModelOutputsSufficientSize_V1_1) { |
| // The purpose of this test is to confirm that the partitioner and the |
| // runtime can handle a model output of unspecified dimensions but |
| // sufficient size that is written by one partition and read by another. |
| // Regression test for http://b/174851714. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false, |
| /*opnd3PartitionOutputSpecified=*/true, |
| /*opnd4ModelOutputSpecified=*/false)); |
| ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1, |
| /*addDeviceVersion=*/HalVersion::V1_1)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, DynamicTemporariesUnspecifiedOutputs) { |
| // The purpose of this test is to confirm that the partitioner can produce |
| // dynamic temporaries and that the runtime can handle them properly. Note |
| // that all model outputs are of unspecified dimensions but sufficient size. |
| |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs) { |
| // The purpose of this test is to confirm that the partitioner can produce |
| // dynamic temporaries and that the runtime can handle them properly. Note |
| // that all model outputs are of specified dimensions. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true, |
| /*opnd3PartitionOutputSpecified=*/false, |
| /*opnd4ModelOutputSpecified=*/true)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_2) { |
| // The purpose of this test is to confirm that the partitioner can produce |
| // dynamic temporaries and that the runtime can handle them properly. Note |
| // that all model outputs are of specified dimensions. |
| // Regression test for http://b/174851714. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true, |
| /*opnd3PartitionOutputSpecified=*/false, |
| /*opnd4ModelOutputSpecified=*/true)); |
| ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_2, |
| /*addDeviceVersion=*/HalVersion::V1_2)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, DynamicTemporariesSpecifiedOutputs_V1_1) { |
| // The purpose of this test is to confirm that the partitioner cannot produce |
| // dynamic temporaries for V1_1 but instead does whole-model CPU fallback. Note |
| // that all model outputs are of specified dimensions. |
| // Regression test for http://b/174851714. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/true, |
| /*opnd3PartitionOutputSpecified=*/false, |
| /*opnd4ModelOutputSpecified=*/true)); |
| ASSERT_NO_FATAL_FAILURE(declareHalVersions(/*padDeviceVersion=*/HalVersion::V1_1, |
| /*addDeviceVersion=*/HalVersion::V1_1)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan(false)); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithDynamicTemporary) { |
| // The purpose of this test is to confirm that the runtime can detect a |
| // model output of insufficient size in the presence of a dynamic temporary. |
| |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, ModelOutputsInsufficientSizeWithoutDynamicTemporary) { |
| // The purpose of this test is to confirm that the runtime can detect a |
| // model output of insufficient size in the absence of a dynamic temporary. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false, |
| /*opnd3PartitionOutputSpecified=*/true, |
| /*opnd4ModelOutputSpecified=*/false)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, false)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, ModelOutput2InsufficientSizeWithoutDynamicTemporary) { |
| // The purpose of this test is to confirm that the runtime can detect a |
| // model output of insufficient size in the absence of a dynamic temporary. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false, |
| /*opnd3PartitionOutputSpecified=*/true, |
| /*opnd4ModelOutputSpecified=*/false)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(false, true)); |
| } |
| |
| TEST_F(DynamicTemporariesTest, ModelOutput4InsufficientSizeWithoutDynamicTemporary) { |
| // The purpose of this test is to confirm that the runtime can detect a |
| // model output of insufficient size in the absence of a dynamic temporary. |
| |
| ASSERT_NO_FATAL_FAILURE(declareOutputDimensions(/*opnd2ModelAndPartitionOutputSpecified=*/false, |
| /*opnd3PartitionOutputSpecified=*/true, |
| /*opnd4ModelOutputSpecified=*/false)); |
| ASSERT_NO_FATAL_FAILURE(makeModelAndValidate()); |
| ASSERT_NO_FATAL_FAILURE(compileModelAndComparePlan()); |
| ASSERT_NO_FATAL_FAILURE(executeCompilationAndCompareOutput(true, false)); |
| } |
| |
| // Test token rehashing during the compilation step. |
| class CacheTest : public PartitioningTest { |
| protected: |
| virtual void SetUp() override { |
| PartitioningTest::SetUp(); |
| char cacheDirTemp[] = NN_TMP_DIR "/TestCompilationCachingXXXXXX"; |
| char* cacheDir = mkdtemp(cacheDirTemp); |
| ASSERT_NE(cacheDir, nullptr); |
| mCacheDir = cacheDir; |
| } |
| |
| virtual void TearDown() override { |
| if (!::testing::Test::HasFailure()) { |
| std::filesystem::remove_all(mCacheDir); |
| } |
| PartitioningTest::TearDown(); |
| } |
| |
| void expectUniqueTokens(const std::vector<std::vector<uint8_t>>& tokens) { |
| for (uint32_t i = 0; i < tokens.size(); i++) { |
| SCOPED_TRACE(i); |
| for (uint32_t j = i + 1; j < tokens.size(); j++) { |
| SCOPED_TRACE(j); |
| EXPECT_NE(tokens[i], tokens[j]); |
| } |
| } |
| } |
| |
| // Launch a single run of the partitioner against the provided model and device list with |
| // cache token privided as tokenIn. Find the partition for the device with deviceName. |
| // Record the transformed token into tokenOut. Two or more partitions may be on the same device. |
| // "devicePartitionIndex" specifies the index of the ExecutionStep corresponding to the |
| // partition of interest, within the sequence of ExecutionSteps on the target device. |
| // If tokenIn is empty, no caching information will be provided to the partitioner. |
| void getTransformedCacheTokenSingle(const PartitioningModel& model, |
| const std::vector<std::shared_ptr<Device>>& devices, |
| const char* deviceName, const std::vector<uint8_t>& tokenIn, |
| ExecutePreference preference, ExecutePriority priority, |
| uint32_t devicePartitionIndex, |
| std::vector<uint8_t>* tokenOut) { |
| // Compile the model and get the execution plan. |
| PartitioningCompilation compilation(&model, devices); |
| if (!tokenIn.empty()) { |
| compilation.setCaching(mCacheDir.c_str(), tokenIn); |
| } |
| compilation.setPreference(preference); |
| compilation.setPriority(priority); |
| ASSERT_EQ(compilation.finish(), Result::NO_ERROR); |
| const ExecutionPlan& plan = compilation.getExecutionPlan(); |
| |
| // Find the cache info for the device. |
| const uint8_t* token = nullptr; |
| if (plan.forTest_getKind() == ExecutionPlan::Kind::SIMPLE) { |
| ASSERT_EQ(devicePartitionIndex, 0u); |
| ASSERT_EQ(plan.forTest_simpleGetDevice()->getName(), deviceName); |
| token = plan.forTest_simpleGetCacheToken(); |
| } else if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) { |
| const auto& steps = plan.forTest_compoundGetSteps(); |
| uint32_t executionStepCount = 0; |
| for (const auto& step : steps) { |
| if (step->isExecution() && |
| step->executionStep()->getDevice()->getName() == deviceName) { |
| if (devicePartitionIndex == executionStepCount) { |
| token = step->executionStep()->forTest_getCacheToken(); |
| break; |
| } |
| executionStepCount++; |
| } |
| } |
| } else { |
| FAIL(); |
| } |
| |
| // Retrieve the transformed token from the cache info. |
| if (token == nullptr) { |
| tokenOut->clear(); |
| } else { |
| tokenOut->resize(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN); |
| std::copy(token, token + ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, tokenOut->begin()); |
| } |
| } |
| |
| // A wrapper of getTransformedCacheTokenSingle, which runs getTransformedCacheTokenSingle |
| // multiple times and checks if the transformation provides consistent result. |
| // Two or more partitions may be on the same device. "devicePartitionIndex" specifies the index |
| // of the ExecutionStep corresponding to the partition of interest, within the sequence of |
| // ExecutionSteps on the target device. |
| void getTransformedCacheToken(const PartitioningModel& model, |
| const std::vector<std::shared_ptr<Device>>& devices, |
| const char* deviceName, const std::vector<uint8_t>& tokenIn, |
| ExecutePreference preference, ExecutePriority priority, |
| std::vector<uint8_t>* tokenOut, |
| uint32_t devicePartitionIndex = 0) { |
| getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, priority, |
| devicePartitionIndex, tokenOut); |
| |
| // Test if the runtime maps to the same cache token every time for the same compilation |
| // setup. |
| for (uint32_t i = 0; i < 10; i++) { |
| std::vector<uint8_t> token; |
| SCOPED_TRACE(i); |
| getTransformedCacheTokenSingle(model, devices, deviceName, tokenIn, preference, |
| priority, devicePartitionIndex, &token); |
| EXPECT_EQ(*tokenOut, token); |
| } |
| } |
| |
| void createModelForCachingTests(PartitioningModel* model) { |
| uint32_t opnd0 = model->addFloatOperand(); |
| uint32_t opnd1 = model->addFloatOperand(); |
| uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1); |
| uint32_t opnd3 = model->addFloatOperand(); |
| uint32_t opnd4 = model->addOperation2To1V1_0(1, opnd2, opnd3); |
| model->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd4}); |
| model->finish(); |
| ASSERT_TRUE(model->isValid()); |
| } |
| |
| // The first model returned in "models" is the main model. |
| void createControlFlowModelForCachingTests( |
| std::vector<std::unique_ptr<PartitioningModel>>* models) { |
| CHECK(models != nullptr); |
| |
| auto trueModel = std::make_unique<PartitioningModel>(); |
| { |
| const uint32_t opnd0 = trueModel->addFloatOperand(); |
| const uint32_t opnd1 = trueModel->addFloatOperand(); |
| const uint32_t opnd2 = trueModel->addOperation2To1V1_0(0, opnd0, opnd1); |
| trueModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| trueModel->finish(); |
| ASSERT_TRUE(trueModel->isValid()); |
| } |
| |
| auto falseModel = std::make_unique<PartitioningModel>(); |
| { |
| const uint32_t opnd0 = falseModel->addFloatOperand(); |
| const uint32_t opnd1 = falseModel->addFloatOperand(); |
| const uint32_t opnd2 = falseModel->addOperation2To1V1_0(0, opnd0, opnd1); |
| falseModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| falseModel->finish(); |
| ASSERT_TRUE(falseModel->isValid()); |
| } |
| |
| auto mainModel = std::make_unique<PartitioningModel>(); |
| { |
| const uint32_t opnd0 = mainModel->addBooleanOperand(); |
| const uint32_t opnd1 = mainModel->addFloatOperand(); |
| const uint32_t opnd2 = mainModel->addFloatOperand(); |
| const uint32_t opnd3 = mainModel->addFloatOperand(); |
| mainModel->addIfOperation(opnd0, *trueModel, *falseModel, {opnd1, opnd2}, {opnd3}); |
| mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3}); |
| mainModel->finish(); |
| ASSERT_TRUE(mainModel->isValid()); |
| } |
| |
| models->clear(); |
| models->push_back(std::move(mainModel)); |
| models->push_back(std::move(trueModel)); |
| models->push_back(std::move(falseModel)); |
| } |
| |
| std::string mCacheDir; |
| }; |
| |
| // Test the case when no token is provided by the application and the execution plan has a |
| // simple body. |
| TEST_F(CacheTest, CacheTokenNoneSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // deviceA can execute the whole model. |
| const auto deviceA = makeDevices({ |
| {"deviceA", 0.5, ~0U}, |
| }); |
| |
| std::vector<uint8_t> tokenIn, tokenOut; |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut); |
| EXPECT_TRUE(tokenOut.empty()); |
| } |
| |
| // Test if the runtime maps to different cache tokens for devices with different names in |
| // execution plan with a simple body. |
| TEST_F(CacheTest, CacheTokenDifferentDeviceNamesSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // Two devices that can both execute the whole model. |
| const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}}); |
| const auto deviceB = makeDevices({{"deviceB", 0.5, ~0U}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> deviceAToken, deviceBToken; |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceAToken); |
| getTransformedCacheToken(model, deviceB, "deviceB", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceBToken); |
| expectUniqueTokens({deviceAToken, deviceBToken}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for devices with different version strings in |
| // execution plan with a simple body. |
| TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // Two devices that can both execute the whole model. |
| const auto deviceA_1_0 = makeDevices({{"deviceA", "1.0", 0.5, ~0U}}); |
| const auto deviceA_1_1 = makeDevices({{"deviceA", "1.1", 0.5, ~0U}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token; |
| getTransformedCacheToken(model, deviceA_1_0, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceA_1_0_Token); |
| getTransformedCacheToken(model, deviceA_1_1, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceA_1_1_Token); |
| expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different preferences |
| // in execution plan with a simple body. |
| TEST_F(CacheTest, CacheTokenDifferentPreferencesSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // One device that can execute the whole model. |
| const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}}); |
| |
| std::vector<uint8_t> fastToken, powerToken, sustainedToken; |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &fastToken); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT, |
| &powerToken); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT, |
| &sustainedToken); |
| expectUniqueTokens({fastToken, powerToken, sustainedToken}); |
| } |
| |
| // TODO (b/207721221): add test for AIDL compilation hints. |
| // Test if the runtime maps to different cache tokens for compilations with different priorities |
| // in execution plan with a simple body. |
| TEST_F(CacheTest, CacheTokenDifferentPrioritiesSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // One device that can execute the whole model. |
| const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}}); |
| |
| std::vector<uint8_t> lowToken, mediumToken, highToken; |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW, |
| &lowToken); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM, |
| &mediumToken); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH, |
| &highToken); |
| expectUniqueTokens({lowToken, mediumToken, highToken}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different tokens |
| // provided by application in execution plan with a simple body. |
| TEST_F(CacheTest, CacheTokenDifferentTokensSimpleBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // One device that can execute the whole model. |
| const auto deviceA = makeDevices({{"deviceA", 0.5, ~0U}}); |
| |
| std::vector<uint8_t> tokenOut1, tokenOut2; |
| std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn1, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut1); |
| getTransformedCacheToken(model, deviceA, "deviceA", tokenIn2, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut2); |
| expectUniqueTokens({tokenOut1, tokenOut2}); |
| } |
| |
| // Test the case when no token is provided by the application and the execution plan has a |
| // compound body. |
| TEST_F(CacheTest, CacheTokenNoneCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> tokenIn, tokenOut; |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut); |
| EXPECT_TRUE(tokenOut.empty()); |
| getTransformedCacheToken(model, devices, "deviceB", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut); |
| EXPECT_TRUE(tokenOut.empty()); |
| } |
| |
| // Test if the runtime maps to different cache tokens for devices with different names in |
| // execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentDeviceNamesCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}}); |
| // DeviceB executes the first operation only. |
| const auto devices2 = makeDevices({{"deviceB", 0.8, ~0U}, {"deviceC", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> deviceAToken, deviceBToken; |
| getTransformedCacheToken(model, devices1, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceAToken); |
| getTransformedCacheToken(model, devices2, "deviceB", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceBToken); |
| expectUniqueTokens({deviceAToken, deviceBToken}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for devices with different names in |
| // execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentDeviceVersionStringsCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices1 = makeDevices({{"deviceA", "1.0", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| // DeviceB executes the first operation only. |
| const auto devices2 = makeDevices({{"deviceA", "1.1", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> deviceA_1_0_Token, deviceA_1_1_Token; |
| getTransformedCacheToken(model, devices1, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceA_1_0_Token); |
| getTransformedCacheToken(model, devices2, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &deviceA_1_1_Token); |
| expectUniqueTokens({deviceA_1_0_Token, deviceA_1_1_Token}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different preferences |
| // in execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentPreferencesCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> fastToken, powerToken, sustainedToken; |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &fastToken); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_LOW_POWER, ExecutePriority::DEFAULT, |
| &powerToken); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_SUSTAINED_SPEED, ExecutePriority::DEFAULT, |
| &sustainedToken); |
| expectUniqueTokens({fastToken, powerToken, sustainedToken}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different priorities |
| // in execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentPrioritiesCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> lowToken, mediumToken, highToken; |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::LOW, |
| &lowToken); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::MEDIUM, |
| &mediumToken); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::HIGH, |
| &highToken); |
| expectUniqueTokens({lowToken, mediumToken, highToken}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different tokens |
| // provided by application in execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentTokensCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the first operation only. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| |
| std::vector<uint8_t> tokenOut1, tokenOut2; |
| std::vector<uint8_t> tokenIn1(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> tokenIn2(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 1); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn1, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut1); |
| getTransformedCacheToken(model, devices, "deviceA", tokenIn2, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut2); |
| expectUniqueTokens({tokenOut1, tokenOut2}); |
| } |
| |
| // Test if the runtime maps to different cache tokens for compilations with different partitioning |
| // outcome in execution plan with a compound body. |
| TEST_F(CacheTest, CacheTokenDifferentPartitionsCompoundBody) { |
| PartitioningModel model; |
| createModelForCachingTests(&model); |
| |
| // DeviceA executes the whole model. |
| const auto devices1 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 0U}}); |
| // DeviceA executes the first operation only. |
| const auto devices2 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| // DeviceA executes the second operation only. |
| const auto devices3 = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 0}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> tokenOut1, tokenOut2, tokenOut3; |
| getTransformedCacheToken(model, devices1, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut1); |
| getTransformedCacheToken(model, devices2, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut2); |
| getTransformedCacheToken(model, devices3, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut3); |
| expectUniqueTokens({tokenOut1, tokenOut2, tokenOut3}); |
| } |
| |
| // Test if the runtime maps different referenced models to different cache tokens. |
| TEST_F(CacheTest, CacheTokenDifferentReferenceModelPartitions) { |
| std::vector<std::unique_ptr<PartitioningModel>> models; |
| createControlFlowModelForCachingTests(&models); |
| const auto& main = *models[0]; |
| |
| // DeviceA executes the two referenced models but does not support IF. |
| // There will be two partitions on deviceA. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}}); |
| |
| std::vector<uint8_t> tokenIn(ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN, 0); |
| std::vector<uint8_t> tokenOut1, tokenOut2; |
| getTransformedCacheToken(main, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut1, /*devicePartitionIndex=*/0); |
| getTransformedCacheToken(main, devices, "deviceA", tokenIn, |
| ExecutePreference::PREFER_FAST_SINGLE_ANSWER, ExecutePriority::DEFAULT, |
| &tokenOut2, /*devicePartitionIndex=*/1); |
| expectUniqueTokens({tokenOut1, tokenOut2}); |
| } |
| |
| // Very basic tests of some of the PerformanceInfo functionality. |
| // Placed in this file because partitioning is the consumer of this functionality. |
| class PerfTest : public ::testing::Test {}; |
| |
| TEST_F(PerfTest, Lookup) { |
| // Derive an arbitrary (but reproducible) performance value from an OperandType. |
| // We'll use this to ensure that we can save and then recover a type's performance. |
| auto typePerf = [](V1_3::OperandType type) { return float(static_cast<uint32_t>(type)); }; |
| |
| V1_3::Capabilities capabilities = ::android::nn::makeCapabilities(-1.0f); |
| |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) { |
| V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type); |
| update(&capabilities, operandType, typePerf(operandType)); |
| } |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) { |
| V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type); |
| update(&capabilities, operandType, typePerf(operandType)); |
| } |
| |
| // Make sure lookup retrieves the values stored by update |
| |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::FUNDAMENTAL_MAX); ++type) { |
| V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type); |
| if (operandType == V1_3::OperandType::SUBGRAPH) { |
| // SUBGRAPH capabilities are handled differently. |
| continue; |
| } |
| SCOPED_TRACE(toString(operandType)); |
| EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType)); |
| } |
| for (uint32_t type = static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MIN); |
| type <= static_cast<uint32_t>(V1_3::OperandTypeRange::OEM_MAX); ++type) { |
| V1_3::OperandType operandType = static_cast<V1_3::OperandType>(type); |
| SCOPED_TRACE(toString(operandType)); |
| EXPECT_EQ(lookupExecTime(capabilities, operandType), typePerf(operandType)); |
| } |
| |
| // Check the behavior of a missing type |
| |
| V1_3::OperandType operandType = static_cast<V1_3::OperandType>( |
| static_cast<uint32_t>(V1_3::OperandTypeRange::BASE_MAX) + 1); |
| EXPECT_EQ(lookupExecTime(capabilities, operandType), FLT_MAX); |
| } |
| |
| class ControlFlowPartitioningTest : public PartitioningTest { |
| protected: |
| // opnd0 --> +-----+ |
| // | op0 | --> opnd2 |
| // opnd1 --> +-----+ |
| std::unique_ptr<PartitioningModel> createBranchOrBodyModel(Dimensioned dimensioned) { |
| auto model = std::make_unique<PartitioningModel>(); |
| const uint32_t opnd0 = model->addFloatOperand(dimensioned); |
| const uint32_t opnd1 = model->addFloatOperand(dimensioned); |
| const uint32_t opnd2 = model->addOperation2To1V1_0(0, opnd0, opnd1, dimensioned); |
| model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| model->finish(); |
| EXPECT_TRUE(model->isValid()); |
| return model; |
| } |
| |
| // opnd0 --> +-------+ |
| // | EQUAL | --> opnd2 |
| // opnd1 --> +-------+ |
| std::unique_ptr<PartitioningModel> createCondModel(Dimensioned dimensioned) { |
| auto model = std::make_unique<PartitioningModel>(); |
| const uint32_t opnd0 = model->addFloatOperand(dimensioned); |
| const uint32_t opnd1 = model->addFloatOperand(dimensioned); |
| const uint32_t opnd2 = model->addExplicitOperationXTo1( |
| ANEURALNETWORKS_EQUAL, {opnd0, opnd1}, WrapperType::TENSOR_BOOL8); |
| model->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| model->finish(); |
| EXPECT_TRUE(model->isValid()); |
| return model; |
| } |
| |
| // opnd0 --> +----+ |
| // opnd1 --> | IF | --> opnd3 |
| // opnd2 --> +----+ |
| std::vector<std::unique_ptr<PartitioningModel>> createIfModel( |
| Dimensioned dimensionedMain = Dimensioned::YES, |
| Dimensioned dimensionedThen = Dimensioned::YES, |
| Dimensioned dimensionedElse = Dimensioned::YES) { |
| auto thenModel = createBranchOrBodyModel(dimensionedThen); |
| auto elseModel = createBranchOrBodyModel(dimensionedElse); |
| |
| auto mainModel = std::make_unique<PartitioningModel>(); |
| const uint32_t opnd0 = mainModel->addBooleanOperand(); |
| const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain); |
| const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain); |
| const uint32_t opnd3 = mainModel->addFloatOperand(dimensionedMain); |
| mainModel->addIfOperation(opnd0, *thenModel, *elseModel, {opnd1, opnd2}, {opnd3}); |
| mainModel->identifyInputsAndOutputs({opnd0, opnd1, opnd2}, {opnd3}); |
| mainModel->finish(); |
| EXPECT_TRUE(mainModel->isValid()); |
| |
| std::vector<std::unique_ptr<PartitioningModel>> models; |
| models.push_back(std::move(mainModel)); |
| models.push_back(std::move(thenModel)); |
| models.push_back(std::move(elseModel)); |
| return std::move(models); |
| } |
| |
| // opnd0 --> +-------+ |
| // | WHILE | --> opnd2 |
| // opnd1 --> +-------+ |
| std::vector<std::unique_ptr<PartitioningModel>> createWhileModel( |
| Dimensioned dimensionedMain = Dimensioned::YES, |
| Dimensioned dimensionedCond = Dimensioned::YES, |
| Dimensioned dimensionedBody = Dimensioned::YES) { |
| auto condModel = createCondModel(dimensionedCond); |
| auto bodyModel = createBranchOrBodyModel(dimensionedBody); |
| |
| auto mainModel = std::make_unique<PartitioningModel>(); |
| const uint32_t opnd0 = mainModel->addFloatOperand(dimensionedMain); |
| const uint32_t opnd1 = mainModel->addFloatOperand(dimensionedMain); |
| const uint32_t opnd2 = mainModel->addFloatOperand(dimensionedMain); |
| mainModel->addWhileOperation(*condModel, *bodyModel, {opnd0, opnd1}, {opnd2}); |
| mainModel->identifyInputsAndOutputs({opnd0, opnd1}, {opnd2}); |
| mainModel->finish(); |
| EXPECT_TRUE(mainModel->isValid()); |
| |
| std::vector<std::unique_ptr<PartitioningModel>> models; |
| models.push_back(std::move(mainModel)); |
| models.push_back(std::move(condModel)); |
| models.push_back(std::move(bodyModel)); |
| return std::move(models); |
| } |
| |
| void testIfUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen, |
| Dimensioned dimensionedElse); |
| void testWhileUnknownSize(Dimensioned dimensionedMain, Dimensioned dimensionedThen, |
| Dimensioned dimensionedElse); |
| }; |
| |
| TEST_F(ControlFlowPartitioningTest, IF_Interpreted) { |
| const auto models = createIfModel(); |
| |
| // The device supports the referenced models but does not support IF. |
| const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| checkExecutionPlanSteps(plan, {kIfStep, "V1_0", kGotoStep, "V1_0"}); |
| } |
| |
| TEST_F(ControlFlowPartitioningTest, WHILE_Interpreted) { |
| const auto models = createWhileModel(); |
| |
| // The device supports the body model but does not support WHILE or the |
| // condition model (because of EQUAL). |
| const auto devices = makeDevices({{"V1_0", 0.9, HalVersion::V1_0, ~0U}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| const auto& cpuDeviceName = DeviceManager::getCpuDevice()->getName(); |
| checkExecutionPlanSteps(plan, {kWhileStep, cpuDeviceName, kGotoStep, "V1_0", kGotoStep}); |
| } |
| |
| TEST_F(ControlFlowPartitioningTest, IF_SimplePlan) { |
| const auto models = createIfModel(); |
| |
| // The device supports all operations. |
| const auto devices = makeDevices({{"ALL", |
| 0.9, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::IF}}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| checkExecutionPlanSteps(plan, {"ALL"}); |
| } |
| |
| TEST_F(ControlFlowPartitioningTest, WHILE_SimplePlan) { |
| const auto models = createWhileModel(); |
| |
| // The device supports all operations. |
| const auto devices = makeDevices({{"ALL", |
| 0.9, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| checkExecutionPlanSteps(plan, {"ALL"}); |
| } |
| |
| void ControlFlowPartitioningTest::testIfUnknownSize(Dimensioned dimensionedMain, |
| Dimensioned dimensionedThen, |
| Dimensioned dimensionedElse) { |
| if (dimensionedMain != Dimensioned::NO && dimensionedThen != Dimensioned::NO && |
| dimensionedElse != Dimensioned::NO) { |
| // No unknown size. |
| return; |
| } |
| |
| const auto models = createIfModel(dimensionedMain, dimensionedThen, dimensionedElse); |
| |
| // The device supports all operations but the partitioner ignores its IF |
| // support due to http://b/159076604#comment5. |
| const auto devices = makeDevices({{"ALL", |
| 0.9, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::IF}}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| // The control flow interpreter does not support unknown size (b/132458982). |
| checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()}); |
| } |
| |
| TEST_F(ControlFlowPartitioningTest, IF_UnknownSize) { |
| const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES}; |
| for (Dimensioned dimensionedMain : configurations) { |
| SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain)); |
| for (Dimensioned dimensionedThen : configurations) { |
| SCOPED_TRACE(testing::Message() << "dimensionedThen: " << toString(dimensionedThen)); |
| for (Dimensioned dimensionedElse : configurations) { |
| SCOPED_TRACE(testing::Message() |
| << "dimensionedElse: " << toString(dimensionedElse)); |
| testIfUnknownSize(dimensionedMain, dimensionedThen, dimensionedElse); |
| } |
| } |
| } |
| } |
| |
| void ControlFlowPartitioningTest::testWhileUnknownSize(Dimensioned dimensionedMain, |
| Dimensioned dimensionedCond, |
| Dimensioned dimensionedBody) { |
| if (dimensionedMain != Dimensioned::NO && dimensionedCond != Dimensioned::NO && |
| dimensionedBody != Dimensioned::NO) { |
| // No unknown size. |
| return; |
| } |
| |
| const auto models = createWhileModel(dimensionedMain, dimensionedCond, dimensionedBody); |
| |
| // The device supports all operations but the partitioner ignores its WHILE |
| // support due to http://b/159076604#comment5. |
| const auto devices = makeDevices({{"ALL", |
| 0.9, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::WHILE, V1_3::OperationType::EQUAL}}}); |
| |
| ExecutionPlan plan; |
| ASSERT_EQ(models[0]->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &plan), |
| ANEURALNETWORKS_NO_ERROR); |
| // The control flow interpreter does not support unknown size (b/132458982). |
| checkExecutionPlanSteps(plan, {DeviceManager::getCpuDevice()->getName()}); |
| } |
| |
| TEST_F(ControlFlowPartitioningTest, WHILE_UnknownSize) { |
| const std::vector<Dimensioned> configurations = {Dimensioned::NO, Dimensioned::YES}; |
| for (Dimensioned dimensionedMain : configurations) { |
| SCOPED_TRACE(testing::Message() << "dimensionedMain: " << toString(dimensionedMain)); |
| for (Dimensioned dimensionedCond : configurations) { |
| SCOPED_TRACE(testing::Message() << "dimensionedCond: " << toString(dimensionedCond)); |
| for (Dimensioned dimensionedBody : configurations) { |
| SCOPED_TRACE(testing::Message() |
| << "dimensionedBody: " << toString(dimensionedBody)); |
| testWhileUnknownSize(dimensionedMain, dimensionedCond, dimensionedBody); |
| } |
| } |
| } |
| } |
| |
| // Test the memory step role analysis of the partitioning implementation. |
| class MemoryStepRoleTest : public PartitioningTest { |
| protected: |
| // A tuple of {device_name, input/output} |
| using TestStepRole = std::tuple<std::string, IOType>; |
| |
| void SetUp() override { |
| PartitioningTest::SetUp(); |
| mModel = std::make_unique<PartitioningModel>(); |
| } |
| |
| static std::string toString(SourceOperandIndex index) { |
| return "{" + std::to_string(index.first) + ", " + std::to_string(index.second) + "}"; |
| } |
| |
| static std::string toString(const std::set<TestStepRole>& roles) { |
| std::stringstream ss; |
| ss << "[ "; |
| for (const auto& [deviceName, type] : roles) { |
| ss << "{" << deviceName << ", " << (type == IOType::INPUT ? "INPUT" : "OUTPUT") << "} "; |
| } |
| ss << "]"; |
| return ss.str(); |
| } |
| |
| void finishAndPartitionModelForDevices(const std::vector<std::shared_ptr<Device>>& devices) { |
| mModel->finish(); |
| ASSERT_TRUE(mModel->isValid()); |
| ASSERT_EQ(mModel->partitionTheWork(devices, ExecutePreference::PREFER_LOW_POWER, |
| ExecutePriority::DEFAULT, {}, &mPlan), |
| ANEURALNETWORKS_NO_ERROR); |
| } |
| |
| void checkStepRolesOfInput(uint32_t index, const std::set<TestStepRole>& expected) const { |
| SCOPED_TRACE("Input: " + std::to_string(index)); |
| std::set<TestStepRole> actual; |
| mPlan.forEachStepRoleOfInput( |
| index, [&actual](const auto* preparedModel, IOType type, uint32_t) { |
| actual.emplace(preparedModel->getDevice()->getName(), type); |
| }); |
| EXPECT_TRUE(expected == actual) |
| << "expected: " << toString(expected) << ", actual: " << toString(actual); |
| } |
| |
| void checkStepRolesOfOutput(uint32_t index, const std::set<TestStepRole>& expected) const { |
| SCOPED_TRACE("Output: " + std::to_string(index)); |
| std::set<TestStepRole> actual; |
| mPlan.forEachStepRoleOfOutput( |
| index, [&actual](const auto* preparedModel, IOType type, uint32_t) { |
| actual.emplace(preparedModel->getDevice()->getName(), type); |
| }); |
| EXPECT_TRUE(expected == actual) |
| << "expected: " << toString(expected) << ", actual: " << toString(actual); |
| } |
| |
| void checkStepRolesOfSourceOperand(SourceOperandIndex index, |
| const std::set<TestStepRole>& expected) const { |
| SCOPED_TRACE("SourceOperandIndex: " + toString(index)); |
| std::set<TestStepRole> actual; |
| mPlan.forTest_compoundForEachStepRoleOfSourceOperand( |
| index, [&actual](const auto* preparedModel, IOType type, uint32_t) { |
| actual.emplace(preparedModel->getDevice()->getName(), type); |
| }); |
| EXPECT_TRUE(expected == actual) |
| << "expected: " << toString(expected) << ", actual: " << toString(actual); |
| } |
| |
| std::unique_ptr<PartitioningModel> mModel; |
| ExecutionPlan mPlan; |
| }; |
| |
| // Test a graph with 3 operations, each operation in a separate partition: |
| // opnd2 = OP0(opnd0, opnd1) |
| // opnd4 = OP1(opnd1, opnd3) |
| // opnd5 = OP2(opnd2, opnd4) |
| TEST_F(MemoryStepRoleTest, NoControlFlow) { |
| const uint32_t opnd0 = mModel->addFloatOperand(); |
| const uint32_t opnd1 = mModel->addFloatOperand(); |
| const uint32_t opnd2 = mModel->addOperation2To1V1_0(0, opnd0, opnd1); |
| const uint32_t opnd3 = mModel->addFloatOperand(); |
| const uint32_t opnd4 = mModel->addOperation2To1V1_0(1, opnd1, opnd3); |
| const uint32_t opnd5 = mModel->addOperation2To1V1_0(2, opnd2, opnd4); |
| mModel->identifyInputsAndOutputs({opnd0, opnd1, opnd3}, {opnd2, opnd5}); |
| |
| // This will result in 3 partitions: |
| // deviceA handles op0, deviceB handles op1, deviceC handles op2. |
| const auto devices = makeDevices( |
| {{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}, {"deviceC", 0.5, 1 << 2}}); |
| finishAndPartitionModelForDevices(devices); |
| checkExecutionPlanSteps(mPlan, {"deviceB", "deviceA", "deviceC"}); |
| |
| // Check the step roles of the main model inputs and outputs: |
| // |
| // input0 and input2 are each exclusive for a single partition. |
| checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}}); |
| checkStepRolesOfInput(2, {{"deviceB", IOType::INPUT}}); |
| // input1 is shared by two operations in different partitions. |
| checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| // output0 is a model output that is a downstream input. |
| checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceC", IOType::INPUT}}); |
| // output1 is only used in a single partition. |
| checkStepRolesOfOutput(1, {{"deviceC", IOType::OUTPUT}}); |
| |
| // Check the step roles of the partition boundary temporaries that we will allocate memory on |
| // behalf of (see ExecutionPlan::makeController for the allocation logic): |
| // |
| // opnd4 is a partition boundary temporary. |
| checkStepRolesOfSourceOperand({0, opnd4}, |
| {{"deviceB", IOType::OUTPUT}, {"deviceC", IOType::INPUT}}); |
| } |
| |
| // Test a graph with an interpreted IF operation. |
| TEST_F(MemoryStepRoleTest, InterpretedIf) { |
| auto thenModel = std::make_unique<PartitioningModel>(); |
| const uint32_t thenOpnd0 = thenModel->addFloatOperand(); |
| const uint32_t thenOpnd1 = thenModel->addFloatOperand(); |
| const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1); |
| thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2}); |
| thenModel->finish(); |
| EXPECT_TRUE(thenModel->isValid()); |
| |
| auto elseModel = std::make_unique<PartitioningModel>(); |
| const uint32_t elseOpnd0 = elseModel->addFloatOperand(); |
| const uint32_t elseOpnd1 = elseModel->addFloatOperand(); |
| const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1); |
| elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2}); |
| elseModel->finish(); |
| EXPECT_TRUE(elseModel->isValid()); |
| |
| const uint32_t mainOpnd0 = mModel->addBooleanOperand(); |
| const uint32_t mainOpnd1 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd2 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd3 = mModel->addFloatOperand(); |
| mModel->addIfOperation(mainOpnd0, *thenModel, *elseModel, {mainOpnd1, mainOpnd2}, {mainOpnd3}); |
| mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3}); |
| |
| // deviceA handles op0, deviceB handles op1. |
| const auto devices = makeDevices({{"deviceA", 0.8, ~0U}, {"deviceB", 0.5, 1 << 1}}); |
| finishAndPartitionModelForDevices(devices); |
| checkExecutionPlanSteps(mPlan, {kIfStep, "deviceA", kGotoStep, "deviceB"}); |
| |
| // Check the step roles of the main model inputs and outputs: |
| // |
| // input0 is a condition operand of the interpreted IF that will only be read by the runtime. |
| checkStepRolesOfInput(0, {}); |
| // input1 and input2 are outer inputs of the interpreted IF. The memories may be directly used |
| // by the input operands of the then and else model. |
| checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| // output0 is the outer output of the interpreted IF. The memory may be directly |
| // used by the output operands of the then and else model. |
| checkStepRolesOfOutput(0, {{"deviceA", IOType::OUTPUT}, {"deviceB", IOType::OUTPUT}}); |
| |
| // There is no partition boundary temporary in this model that we will allocate memory on |
| // behalf of (see ExecutionPlan::makeController for the allocation logic). |
| } |
| |
| // Test a graph with an interpreted WHILE operation. |
| TEST_F(MemoryStepRoleTest, InterpretedWhile) { |
| // Condition model: |
| // condOpnd3 = OP0(condOpnd0, condOpnd1) |
| // condOpnd4 = EQUAL(condOpnd2, condOpnd3) |
| auto condModel = std::make_unique<PartitioningModel>(); |
| const uint32_t condOpnd0 = condModel->addFloatOperand(); |
| const uint32_t condOpnd1 = condModel->addFloatOperand(); |
| const uint32_t condOpnd2 = condModel->addFloatOperand(); |
| const uint32_t condOpnd3 = condModel->addOperation2To1V1_0(0, condOpnd0, condOpnd1); |
| const uint32_t condOpnd4 = condModel->addExplicitOperationXTo1( |
| ANEURALNETWORKS_EQUAL, {condOpnd2, condOpnd3}, WrapperType::TENSOR_BOOL8); |
| condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd4}); |
| condModel->finish(); |
| EXPECT_TRUE(condModel->isValid()); |
| |
| // Body model: |
| // bodyOpnd3 = OP1(bodyOpnd0, bodyOpnd1) |
| // bodyOpnd4 = OP1(bodyOpnd0, bodyOpnd2) |
| auto bodyModel = std::make_unique<PartitioningModel>(); |
| const uint32_t bodyOpnd0 = bodyModel->addFloatOperand(); |
| const uint32_t bodyOpnd1 = bodyModel->addFloatOperand(); |
| const uint32_t bodyOpnd2 = bodyModel->addFloatOperand(); |
| const uint32_t bodyOpnd3 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd1); |
| const uint32_t bodyOpnd4 = bodyModel->addOperation2To1V1_0(1, bodyOpnd0, bodyOpnd2); |
| bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3, bodyOpnd4}); |
| bodyModel->finish(); |
| EXPECT_TRUE(bodyModel->isValid()); |
| |
| const uint32_t mainOpnd0 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd1 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd2 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd3 = mModel->addFloatOperand(); |
| mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2}, |
| {mainOpnd3}); |
| mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3}); |
| |
| // deviceA handles the cond model, deviceB handles the body model. |
| const auto devices = makeDevices({{"deviceA", |
| 0.8, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::EQUAL}}, |
| {"deviceB", 0.5, 1 << 1}}); |
| finishAndPartitionModelForDevices(devices); |
| checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, "deviceB", kGotoStep}); |
| |
| // The subgraph indexes of the condition and body models of the WHILE operation. |
| const uint32_t condModelIndex = 1; |
| const uint32_t bodyModelIndex = 2; |
| |
| // Check the step roles of the main model inputs and outputs: |
| // |
| // input0 (input-output), input1 (state-only), and input2 (input-only) are outer inputs of the |
| // interpreted WHILE. The memories may be directly used by the input operands of the condition |
| // and body models. |
| checkStepRolesOfInput(0, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| checkStepRolesOfInput(1, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| checkStepRolesOfInput(2, {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}}); |
| // output0 is an outer output of the interpreted WHILE that will only be written by the runtime. |
| checkStepRolesOfOutput(0, {}); |
| |
| // Check the step roles of the partition boundary temporaries that we will allocate memory on |
| // behalf of (see ExecutionPlan::makeController for the allocation logic): |
| // |
| // condOpnd4 is output of the interpreted WHILE condition model. |
| checkStepRolesOfSourceOperand({condModelIndex, condOpnd4}, {{"deviceA", IOType::OUTPUT}}); |
| // bodyOpnd3 (input-output) and bodyOpnd4 (state-only) are outputs of the interpreted WHILE body |
| // model. The memories may be directly used by the input operands of the condition and body |
| // models. |
| checkStepRolesOfSourceOperand( |
| {bodyModelIndex, bodyOpnd3}, |
| {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}}); |
| checkStepRolesOfSourceOperand( |
| {bodyModelIndex, bodyOpnd4}, |
| {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceB", IOType::OUTPUT}}); |
| } |
| |
| // Test a graph with nested interpreted control flow operations: a WHILE operation with IF operation |
| // in the body model. |
| TEST_F(MemoryStepRoleTest, NestedInterpretedControlFlow) { |
| auto condModel = std::make_unique<PartitioningModel>(); |
| const uint32_t condOpnd0 = condModel->addFloatOperand(); |
| const uint32_t condOpnd1 = condModel->addFloatOperand(); |
| const uint32_t condOpnd2 = condModel->addBooleanOperand(); |
| const uint32_t condOpnd3 = condModel->addExplicitOperationXTo1( |
| ANEURALNETWORKS_EQUAL, {condOpnd0, condOpnd1}, WrapperType::TENSOR_BOOL8); |
| condModel->identifyInputsAndOutputs({condOpnd0, condOpnd1, condOpnd2}, {condOpnd3}); |
| condModel->finish(); |
| EXPECT_TRUE(condModel->isValid()); |
| |
| auto thenModel = std::make_unique<PartitioningModel>(); |
| const uint32_t thenOpnd0 = thenModel->addFloatOperand(); |
| const uint32_t thenOpnd1 = thenModel->addFloatOperand(); |
| const uint32_t thenOpnd2 = thenModel->addOperation2To1V1_0(0, thenOpnd0, thenOpnd1); |
| thenModel->identifyInputsAndOutputs({thenOpnd0, thenOpnd1}, {thenOpnd2}); |
| thenModel->finish(); |
| EXPECT_TRUE(thenModel->isValid()); |
| |
| auto elseModel = std::make_unique<PartitioningModel>(); |
| const uint32_t elseOpnd0 = elseModel->addFloatOperand(); |
| const uint32_t elseOpnd1 = elseModel->addFloatOperand(); |
| const uint32_t elseOpnd2 = elseModel->addOperation2To1V1_0(1, elseOpnd0, elseOpnd1); |
| elseModel->identifyInputsAndOutputs({elseOpnd0, elseOpnd1}, {elseOpnd2}); |
| elseModel->finish(); |
| EXPECT_TRUE(elseModel->isValid()); |
| |
| auto bodyModel = std::make_unique<PartitioningModel>(); |
| const uint32_t bodyOpnd0 = bodyModel->addFloatOperand(); |
| const uint32_t bodyOpnd1 = bodyModel->addFloatOperand(); |
| const uint32_t bodyOpnd2 = bodyModel->addBooleanOperand(); |
| const uint32_t bodyOpnd3 = bodyModel->addFloatOperand(); |
| bodyModel->addIfOperation(bodyOpnd2, *thenModel, *elseModel, {bodyOpnd0, bodyOpnd1}, |
| {bodyOpnd3}); |
| bodyModel->identifyInputsAndOutputs({bodyOpnd0, bodyOpnd1, bodyOpnd2}, {bodyOpnd3}); |
| bodyModel->finish(); |
| EXPECT_TRUE(bodyModel->isValid()); |
| |
| const uint32_t mainOpnd0 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd1 = mModel->addFloatOperand(); |
| const uint32_t mainOpnd2 = mModel->addBooleanOperand(); |
| const uint32_t mainOpnd3 = mModel->addFloatOperand(); |
| mModel->addWhileOperation(*condModel, *bodyModel, {mainOpnd0, mainOpnd1, mainOpnd2}, |
| {mainOpnd3}); |
| mModel->identifyInputsAndOutputs({mainOpnd0, mainOpnd1, mainOpnd2}, {mainOpnd3}); |
| |
| // deviceA handles the cond model, deviceB handles the then model, |
| // deviceC handles the else model. |
| const auto devices = makeDevices({{"deviceA", |
| 0.8, |
| ~0U, |
| PartitioningDriver::OEMNo, |
| HalVersion::LATEST, |
| {V1_3::OperationType::EQUAL}}, |
| {"deviceB", 0.5, 1 << 0}, |
| {"deviceC", 0.5, 1 << 1}}); |
| finishAndPartitionModelForDevices(devices); |
| checkExecutionPlanSteps(mPlan, {kWhileStep, "deviceA", kGotoStep, kIfStep, "deviceB", kGotoStep, |
| "deviceC", kGotoStep}); |
| |
| // The subgraph indexes of the condition and body models of the WHILE operation. |
| const uint32_t condModelIndex = 1; |
| const uint32_t bodyModelIndex = 2; |
| |
| // Check the step roles of the main model inputs and outputs: |
| // |
| // input0 and input1 are outer inputs of the interpreted WHILE. The memories may be directly |
| // used by the input operands of the condition and body models, and then be directly used by the |
| // input operands of the then and else model of the interpreted IF in the body model. |
| checkStepRolesOfInput( |
| 0, |
| {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}}); |
| checkStepRolesOfInput( |
| 1, |
| {{"deviceA", IOType::INPUT}, {"deviceB", IOType::INPUT}, {"deviceC", IOType::INPUT}}); |
| // input2 is also an outer input of the interpreted WHILE. The memory has no step role in the |
| // condition model. In the body model, the memory will be used by the condition operand of the |
| // interpreted IF that will only be read by the runtime. |
| checkStepRolesOfInput(2, {}); |
| // output0 is an outer output of the interpreted WHILE that will only be written by the runtime. |
| checkStepRolesOfOutput(0, {}); |
| |
| // Check the step roles of the partition boundary temporaries that we will allocate memory on |
| // behalf of (see ExecutionPlan::makeController for the allocation logic): |
| // |
| // condOpnd2 is output of the interpreted WHILE condition model. |
| checkStepRolesOfSourceOperand({condModelIndex, condOpnd3}, {{"deviceA", IOType::OUTPUT}}); |
| // bodyOpnd3 is output of the interpreted WHILE body model. The memories may be directly used by |
| // the input operands of the condition and body models, and then be directly used by the |
| // input operands of the then and else model of the interpreted IF in the body model. |
| checkStepRolesOfSourceOperand({bodyModelIndex, bodyOpnd3}, {{"deviceA", IOType::INPUT}, |
| {"deviceB", IOType::INPUT}, |
| {"deviceB", IOType::OUTPUT}, |
| {"deviceC", IOType::INPUT}, |
| {"deviceC", IOType::OUTPUT}}); |
| } |
| |
| } // namespace |