| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <HalInterfaces.h> |
| #include <SampleDriver.h> |
| #include <ValidateHal.h> |
| #include <android-base/logging.h> |
| #include <gtest/gtest.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdio> |
| #include <iterator> |
| #include <map> |
| #include <memory> |
| #include <random> |
| #include <set> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| #include "CompilationBuilder.h" |
| #include "HalUtils.h" |
| #include "Manager.h" |
| #include "ModelBuilder.h" |
| #include "NeuralNetworks.h" |
| #include "TestNeuralNetworksWrapper.h" |
| |
| // Uncomment the following line to generate some debugging output that |
| // may be useful when analyzing failures: |
| // |
| // #define VERBOSE VERBOSE |
| |
| // Uncomment the following line to generate some debugging output that |
| // may be useful to determine test coverage for support of dynamic |
| // temporaries (http://b/132458982): |
| // |
| // #define TRACE_DYNTEMP TRACE_DYNTEMP |
| |
| // We randomly generate tests (model + input data) at runtime, and verify |
| // that we get the same results whether we do partitioned compilation/execution |
| // or non partitioned compilation/execution. We perform a test as follows: |
| // |
| // (1) Randomly generate a model (graph and weights), randomly generate input |
| // data, randomly assign inputs and outputs to CPU memory or to shared |
| // memory. |
| // |
| // Randomly leaves dimensions unset for intermediate operands. |
| // |
| // (2) Randomly generate drivers based on the sample driver, each of which |
| // executes models on the CPU. They differ according to which operations |
| // they support. |
| // |
| // (3) Compile and execute without partitioning, saving off the results. |
| // |
| // (4) Compile and execute with partitioning. |
| // |
| // (5) Verify that the saved results from (3) match the results from (4). |
| // |
| // For simplicity, all data (model inputs, model outputs, weights, |
| // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two |
| // dimensions are fixed throughout a particular test case (and |
| // randomly determined). This prevents us from having to find a |
| // mechanism to "resize" data (e.g., if ADD#a operates on data of size |
| // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a |
| // and ADD#b become inputs of ADD#c, do we need to insert one or more |
| // operations between (say) ADD#a and ADD#c to convert ADD#2's data |
| // from size 2x2 to size 3x3 in order to match ADD#b). In the few |
| // cases where an operand cannot be of this type, it is a constant |
| // (e.g., activation functions and RNN bias). |
| // |
| // Each operation we generate has a signature (described in more |
| // detail later). The randomly generated drivers decide which |
| // operations they can execute by checking operation signatures. Once |
| // we have built the model and know the set of signatures, we randomly |
| // assign each signature to a driver. No signature is supported by |
| // multiple drivers -- we're not testing the logic that the |
| // partitioning algorithm uses to select the best driver for an |
| // operation. |
| |
| namespace android { |
| |
| namespace V1_0 = ::android::hardware::neuralnetworks::V1_0; |
| namespace V1_1 = ::android::hardware::neuralnetworks::V1_1; |
| namespace V1_2 = ::android::hardware::neuralnetworks::V1_2; |
| namespace V1_3 = ::android::hardware::neuralnetworks::V1_3; |
| using CompilationBuilder = nn::CompilationBuilder; |
| using DeviceManager = nn::DeviceManager; |
| using Device = nn::Device; |
| using SharedDevice = nn::SharedDevice; |
| using ExecutionPlan = nn::ExecutionPlan; |
| using ExecutionStep = nn::ExecutionStep; |
| using HalCacheToken = nn::HalCacheToken; |
| using HalVersion = nn::HalVersion; |
| using HidlModel = V1_3::Model; |
| using LogicalStep = nn::LogicalStep; |
| using ModelBuilder = nn::ModelBuilder; |
| using Result = nn::test_wrapper::Result; |
| using SampleDriver = nn::sample_driver::SampleDriver; |
| using WrapperCompilation = nn::test_wrapper::Compilation; |
| using WrapperExecution = nn::test_wrapper::Execution; |
| using WrapperMemory = nn::test_wrapper::Memory; |
| using WrapperModel = nn::test_wrapper::Model; |
| using WrapperOperandType = nn::test_wrapper::OperandType; |
| using WrapperType = nn::test_wrapper::Type; |
| |
| namespace { |
| |
| /// Configure test size ////////////////////////////////////////////////////////// |
| |
| // We may exceed this in order to connect otherwise disjoint subgraphs. |
| static const unsigned kMaxNumOperations = 100; |
| |
| // We build models to process 2-D square tensors up to this size in each dimension; |
| // note that the API promotes by-value weights larger than 128 to by-reference, |
| // so we want to ensure that we can pick both types that exceed and types that do |
| // not exceed this size. |
| static const unsigned kMaxProblemSize = 8; |
| |
| // First seed for pseudorandom test generation. |
| static const unsigned kFirstSeed = 0; |
| |
| // Number of test cases. |
| static const unsigned kNumTestCases = 225; |
| |
| // Force all graph weights into a single pool (as we recommend to users) |
| // or allow them to be distributed across multiple pools (more stress |
| // on the partitioning algorithm and the rest of the runtime)? |
| // Forcing all graph weights into a single pool may be necessary to |
| // prevent large graphs from running up against http://b/70302693 |
| // "NNAPI overuses (?) fds". |
| static const bool kAllWeightsInOnePool = false; |
| |
| ////////////////////////////////////////////////////////////////////////////////// |
| |
| // The signature of an operation consists of the operation type (e.g., |
| // ADD) and the activation function (use -1 in the case of an |
| // operation type for which the activation function is inapplicable). |
| typedef std::pair<ANeuralNetworksOperationType, int> Signature; |
| |
| // This class adds some simple utilities on top of WrapperModel. For example, |
| // it provides access to certain features from ModelBuilder that are not exposed |
| // by the base class (such as inputCount() and operation index). |
| class TestModel : public WrapperModel { |
| public: |
| uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs, |
| const std::vector<uint32_t>& outputs) { |
| const uint32_t operationIndex = operationCount(); |
| mOperations.push_back(outputs); |
| WrapperModel::addOperation(type, inputs, outputs); |
| return operationIndex; |
| } |
| |
| uint32_t operationCount() const { return mOperations.size(); } |
| |
| uint32_t inputCount() const { return builder()->inputCount(); } |
| uint32_t outputCount() const { return builder()->outputCount(); } |
| |
| const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const { |
| CHECK(index < mOperations.size()); |
| return mOperations[index]; |
| } |
| |
| // All values are immediately copied into the model (we need to do |
| // this ourselves in cases where the underlying NNAPI does not). |
| void setOperandValue(uint32_t index, const std::vector<float>& value) { |
| const size_t length = value.size() * sizeof(float); |
| |
| if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { |
| WrapperModel::setOperandValue(index, value.data(), length); |
| } else { |
| mOperandValues.push_back(value); |
| WrapperModel::setOperandValue(index, mOperandValues.back().data(), length); |
| } |
| } |
| |
| void setOperandValue(uint32_t index, const std::vector<int32_t>& value) { |
| const size_t length = value.size() * sizeof(int32_t); |
| |
| CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES); |
| WrapperModel::setOperandValue(index, value.data(), length); |
| } |
| |
| void setOperandValue(uint32_t index, int32_t value) { |
| CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES); |
| WrapperModel::setOperandValue(index, &value, sizeof(value)); |
| } |
| |
| private: |
| const ModelBuilder* builder() const { |
| return reinterpret_cast<const ModelBuilder*>(getHandle()); |
| } |
| |
| // Representation of operations: vector index is operation number, |
| // vector value is operation's output operands. |
| std::vector<std::vector<uint32_t>> mOperations; |
| |
| // Large operand values -- not immediately copied into the |
| // WrapperModel, so remembered here instead. |
| std::vector<std::vector<float>> mOperandValues; |
| }; |
| |
| // This class adds some simple utilities on top of WrapperCompilation in order |
| // to provide access to certain features from CompilationBuilder that are not |
| // exposed by the base class. |
| class TestCompilation : public WrapperCompilation { |
| public: |
| TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {} |
| |
| TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) { |
| ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle()); |
| CompilationBuilder* c = nullptr; |
| int result = m->createCompilation(&c, devices); |
| EXPECT_EQ(result, 0); |
| mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c); |
| } |
| |
| using WrapperCompilation::finish; |
| |
| Result setPartitioning(uint32_t partitioning) { |
| return static_cast<Result>(builder()->forTest_setPartitioning(partitioning)); |
| } |
| |
| const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); } |
| |
| private: |
| const CompilationBuilder* builder() const { |
| return reinterpret_cast<const CompilationBuilder*>(getHandle()); |
| } |
| CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); } |
| }; |
| |
| // This class is used to manage a collection of memory regions, |
| // disjoint windows onto a set of Memory instances, each of which is |
| // associated with a single shared memory region. Each region and |
| // Memory instance is assigned a number. The usage pattern is as |
| // follows: |
| // - Call addMemory() and addRegion() as many times as needed to |
| // declare (but not define) Memory instances and declare region |
| // instances. |
| // - Call layout() to define the Memory instances. |
| // - Call getRegion() as many times as needed to get the details |
| // of memory regions (such as address, or Memory/offset/length). |
| // The Memory instances created by layout() are owned by the |
| // TestMemories instance, and are destroyed when the TestMemories |
| // instance is destroyed. |
| class TestMemories { |
| public: |
| TestMemories() = default; |
| |
| TestMemories(const TestMemories&) = delete; |
| TestMemories& operator=(const TestMemories&) = delete; |
| |
| unsigned addMemory() { |
| CHECK(!mLayoutDone); |
| mMemorySizes.push_back(0); |
| return memoryCount() - 1; |
| } |
| unsigned memoryCount() const { return mMemorySizes.size(); } |
| |
| unsigned addRegion(unsigned memoryIndex, uint32_t length) { |
| CHECK(!mLayoutDone); |
| CHECK(memoryIndex < memoryCount()); |
| uint32_t& memorySize = mMemorySizes[memoryIndex]; |
| auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length); |
| mRegions.push_back(desc); |
| memorySize += length; |
| return regionCount() - 1; |
| } |
| unsigned regionCount() const { return mRegions.size(); } |
| |
| void layout(); |
| |
| void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset, |
| uint32_t* pLength) { |
| CHECK(mLayoutDone); |
| CHECK(regionIndex < regionCount()); |
| const auto& regionDescriptor = mRegions[regionIndex]; |
| const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)]; |
| uint32_t offset = std::get<1>(regionDescriptor); |
| uint32_t length = std::get<2>(regionDescriptor); |
| |
| uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer(); |
| CHECK(buffer != nullptr); |
| |
| if (pMemory) *pMemory = memory; |
| if (pOffset) *pOffset = offset; |
| if (pLength) *pLength = length; |
| |
| return buffer + offset; |
| } |
| |
| void* getRegion(unsigned regionIndex) { |
| return getRegion(regionIndex, nullptr, nullptr, nullptr); |
| } |
| |
| private: |
| // Index is the memory index; value is the size of the memory |
| // (aggregate size of all regions in the memory). |
| std::vector<uint32_t> mMemorySizes; |
| |
| // Index is the memory index. |
| std::vector<WrapperMemory> mMemories; |
| |
| // Index is the region index; tuple represents memory index, |
| // region offset within memory, region length. |
| std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions; |
| |
| // For validity checking. |
| bool mLayoutDone = false; |
| }; |
| |
| void TestMemories::layout() { |
| CHECK(!mLayoutDone); |
| for (uint32_t memorySize : mMemorySizes) { |
| auto [n, ashmem] = nn::MemoryAshmem::create(memorySize); |
| CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR); |
| CHECK(ashmem != nullptr); |
| |
| ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release()); |
| mMemories.emplace_back(memory); |
| } |
| mLayoutDone = true; |
| } |
| |
| class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> { |
| public: |
| RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {} |
| |
| static Signature getSignature(const HidlModel& model, const V1_3::Operation& operation); |
| |
| protected: |
| static SharedDevice makeTestDriver(HalVersion version, const char* name, |
| std::set<Signature> signatures); |
| |
| static HalVersion getMinHalVersion(ANeuralNetworksOperationType type); |
| |
| static std::string to_string(HalVersion version); |
| |
| bool randBool() { return randUInt(2) == 1; } |
| |
| double randFrac() { // [0.0, 1.0) |
| return mRandNumUnitDist(mRandNumEng); |
| } |
| |
| unsigned randUInt(unsigned limit) { // [0, limit) |
| return unsigned(randFrac() * limit); |
| } |
| |
| // Represents an operation in which every input and output operand |
| // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except: |
| // - One input operand may be an activation function. |
| // - Any number of input operands may be "special" in some other way |
| // (and in this implementation, not produced by any other operation). |
| // We require that: |
| // - There be at least one input operand that is neither an |
| // activation function nor "special". |
| struct OperationPattern { |
| HalVersion mMinHalVersion; |
| int mOperationType; |
| unsigned mNumInputs; |
| unsigned mNumOutputs; |
| int mActivationFunctionInputIndex; // <0 if none |
| |
| // Returns operand index, or <0 if input is normal (must not |
| // be called for an activation function operand). Function |
| // should have the following prototype: |
| // |
| // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex); |
| // |
| int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned); |
| }; |
| |
| static const OperationPattern kOperationPatterns[]; |
| |
| // See OperationPattern::mMakeSpecialInput. This function is used to |
| // manufacture an ELU input operand that doesn't fit the general operand |
| // pattern known to the graph generator infrastructure. |
| int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model, |
| unsigned inputIndex) { |
| if (inputIndex != 1) { |
| return -1; |
| } |
| |
| // input operand 1 is alpha, a scalar |
| const WrapperOperandType alphaType(WrapperType::FLOAT32, {}); |
| return int(model->addConstantOperand(&alphaType, 1.0f)); |
| } |
| |
| // See OperationPattern::mMakeSpecialInput. This function is used to |
| // manufacture an RNN input operand that doesn't fit the general operand |
| // pattern known to the graph generator infrastructure. |
| int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) { |
| if (inputIndex != 3) { |
| return -1; |
| } |
| |
| // input operand 3 is bias, a 1-D tensor |
| const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize}); |
| const uint32_t operandIndex = model->addOperand(&biasType); |
| std::vector<float> biasValue(problemSize); |
| std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); }); |
| model->setOperandValue(operandIndex, biasValue); |
| return int(operandIndex); |
| } |
| |
| // See OperationPattern::mMakeSpecialInput. This function is used to |
| // manufacture a TRANSPOSE input operand that doesn't fit the general operand |
| // pattern known to the graph generator infrastructure. |
| int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model, |
| unsigned inputIndex) { |
| if (inputIndex != 1) { |
| return -1; |
| } |
| |
| // input operand 1 is perm, a 1-D tensor |
| const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2}); |
| const uint32_t operandIndex = model->addOperand(&permType); |
| std::vector<int32_t> permValue = {1, 0}; |
| model->setOperandValue(operandIndex, permValue); |
| return int(operandIndex); |
| } |
| |
| #ifdef VERBOSE |
| class ModelStats { |
| public: |
| ModelStats(const ModelBuilder* model) : mBuilder(model) {} |
| ModelStats(const WrapperModel* model) |
| : mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {} |
| friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) { |
| const uint32_t operandCount = stats.mBuilder->operandCount(); |
| const uint32_t inputCount = stats.mBuilder->inputCount(); |
| const uint32_t outputCount = stats.mBuilder->outputCount(); |
| out << "operationCount = " << stats.mBuilder->operationCount() |
| << ", operandCount = " << operandCount << ", inputCount = " << inputCount << " (" |
| << (double(inputCount) / operandCount) << ")" |
| << ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount) |
| << ")"; |
| return out; |
| } |
| |
| private: |
| const ModelBuilder* mBuilder; |
| }; |
| |
| template <typename T_iterator> |
| static void dump(T_iterator I, T_iterator E) { |
| std::cout << "{"; |
| for (; I != E; I++) { |
| std::cout << " " << *I; |
| } |
| std::cout << " }" << std::endl; |
| } |
| #endif |
| |
| std::mt19937 mRandNumEng; |
| |
| private: |
| std::uniform_real_distribution<double> mRandNumUnitDist; |
| }; |
| |
| const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = { |
| {HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr}, |
| {HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr}, |
| {HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr}, |
| {HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5, |
| &RandomPartitioningTest::makeRnnSpecialInput}, |
| {HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr}, |
| |
| {HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr}, |
| {HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1, |
| &RandomPartitioningTest::makeTransposeSpecialInput}, |
| |
| {HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr}, |
| {HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr}, |
| {HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr}, |
| |
| {HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1, |
| &RandomPartitioningTest::makeEluSpecialInput}, |
| {HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr}, |
| }; |
| |
| HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) { |
| static const auto kOperationToVersion = [] { |
| std::map<ANeuralNetworksOperationType, HalVersion> result; |
| for (const auto& pattern : kOperationPatterns) { |
| result[pattern.mOperationType] = pattern.mMinHalVersion; |
| } |
| return result; |
| }(); |
| |
| return kOperationToVersion.at(type); |
| } |
| |
| Signature RandomPartitioningTest::getSignature(const HidlModel& model, |
| const V1_3::Operation& operation) { |
| static const auto kOperationToActivation = [] { |
| std::map<ANeuralNetworksOperationType, int> result; |
| for (const auto& pattern : kOperationPatterns) { |
| result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex; |
| } |
| return result; |
| }(); |
| |
| const ANeuralNetworksOperationType operationType = |
| static_cast<ANeuralNetworksOperationType>(operation.type); |
| const int activationFunctionInputIndex = kOperationToActivation.at(operationType); |
| if (activationFunctionInputIndex < 0) { |
| return Signature(operationType, -1); |
| } |
| |
| const V1_3::Operand& operand = |
| model.main.operands[operation.inputs[activationFunctionInputIndex]]; |
| CHECK(operand.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY); |
| CHECK(operand.type == V1_3::OperandType::INT32); |
| int32_t value; |
| memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length); |
| return Signature(operationType, value); |
| } |
| |
| std::string RandomPartitioningTest::to_string(HalVersion version) { |
| switch (version) { |
| case HalVersion::V1_0: |
| return "V1_0"; |
| case HalVersion::V1_1: |
| return "V1_1"; |
| case HalVersion::V1_2: |
| return "V1_2"; |
| case HalVersion::V1_3: |
| return "V1_3"; |
| default: |
| return "V_UNKNOWN"; |
| } |
| }; |
| |
| class TestDriver : public SampleDriver { |
| public: |
| // Behaves like SampleDriver, except that it only supports |
| // operations with the specified signatures. |
| TestDriver(const char* name, std::set<Signature> signatures) |
| : SampleDriver(name), mSignatures(std::move(signatures)) {} |
| |
| hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override { |
| android::nn::initVLogMask(); |
| const V1_0::PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f}; |
| V1_3::Capabilities capabilities = { |
| .relaxedFloat32toFloat16PerformanceScalar = kPerf, |
| .relaxedFloat32toFloat16PerformanceTensor = kPerf, |
| .operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf), |
| .ifPerformance = kPerf, |
| .whilePerformance = kPerf}; |
| _hidl_cb(V1_3::ErrorStatus::NONE, capabilities); |
| return hardware::Void(); |
| } |
| |
| hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model, |
| getSupportedOperations_1_3_cb cb) override { |
| if (nn::validateModel(model)) { |
| const size_t count = model.main.operations.size(); |
| std::vector<bool> supported(count); |
| for (size_t i = 0; i < count; i++) { |
| supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature( |
| model, model.main.operations[i])) != 0); |
| } |
| cb(V1_3::ErrorStatus::NONE, supported); |
| } else { |
| cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {}); |
| } |
| return hardware::Void(); |
| } |
| |
| hardware::Return<V1_3::ErrorStatus> prepareModel_1_3( |
| const HidlModel& model, V1_1::ExecutionPreference preference, V1_3::Priority priority, |
| const V1_3::OptionalTimePoint& deadline, |
| const hardware::hidl_vec<hardware::hidl_handle>& modelCache, |
| const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token, |
| const sp<V1_3::IPreparedModelCallback>& callback) override { |
| // NOTE: We verify that all operations in the model are supported. |
| V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT; |
| auto ret = getSupportedOperations_1_3( |
| model, [&outStatus](V1_3::ErrorStatus inStatus, |
| const hardware::hidl_vec<bool>& supportedOperations) { |
| if (inStatus == V1_3::ErrorStatus::NONE) { |
| if (std::all_of(supportedOperations.begin(), supportedOperations.end(), |
| [](bool v) { return v; })) { |
| outStatus = V1_3::ErrorStatus::NONE; |
| } |
| } |
| }); |
| if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) { |
| return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache, |
| dataCache, token, callback); |
| } else { |
| callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr); |
| return V1_3::ErrorStatus::INVALID_ARGUMENT; |
| } |
| } |
| |
| private: |
| const std::set<Signature> mSignatures; |
| }; |
| |
| SharedDevice RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name, |
| std::set<Signature> signatures) { |
| switch (version) { |
| case HalVersion::V1_0: |
| return V1_0::utils::Device::create(name, new TestDriver(name, std::move(signatures))) |
| .value(); |
| case HalVersion::V1_1: |
| return V1_1::utils::Device::create(name, new TestDriver(name, std::move(signatures))) |
| .value(); |
| case HalVersion::V1_2: |
| return V1_2::utils::Device::create(name, new TestDriver(name, std::move(signatures))) |
| .value(); |
| case HalVersion::V1_3: |
| return V1_3::utils::Device::create(name, new TestDriver(name, std::move(signatures))) |
| .value(); |
| default: |
| ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version); |
| return nullptr; |
| } |
| } |
| |
| INSTANTIATE_TEST_SUITE_P(Seed, RandomPartitioningTest, |
| ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases)); |
| |
| TEST_P(RandomPartitioningTest, Test) { |
| LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam(); |
| |
| #ifdef VERBOSE |
| std::cout << std::setprecision(2) << std::fixed << std::setw(4); |
| #endif |
| |
| const unsigned problemSize = 1 + randUInt(kMaxProblemSize); |
| const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize}); |
| const WrapperOperandType unknownDimensionsTypes[] = { |
| {WrapperType::TENSOR_FLOAT32, {}}, |
| {WrapperType::TENSOR_FLOAT32, {0, 0}}, |
| {WrapperType::TENSOR_FLOAT32, {0, problemSize}}, |
| {WrapperType::TENSOR_FLOAT32, {problemSize, 0}}, |
| }; |
| const unsigned kUnknownDimensionsTypesCount = |
| sizeof(unknownDimensionsTypes) / sizeof(unknownDimensionsTypes[0]); |
| |
| static const WrapperOperandType activationFunctionType(WrapperType::INT32, {}); |
| |
| const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1); |
| const bool allowDeadOperations = (randFrac() < 0.2); |
| const bool allowUnknownDimensions = (randFrac() < 0.25); |
| |
| // TODO: The current algorithm builds the graph in a forward |
| // direction (i.e., later-generated operations consume outputs |
| // from earlier-generated operations). In order to get more |
| // variation in graph topology, perhaps we should also create an |
| // algorithm to build the graph in a backward direction (i.e., |
| // later-generated operations produce outputs to be consumed by |
| // earlier-generated operations). |
| [[maybe_unused]] const bool buildForward = randBool(); |
| |
| // TODO: Add a form of forced connectivity that operates by |
| // joining disjoint subgraphs rather than by forcing a root. |
| const bool forceCommonRoot = (randFrac() < 0.75); |
| |
| auto computeMode = WrapperExecution::getComputeMode(); |
| // We check randFrac() independent of compute mode, because we don't want |
| // the random number sequence to change depending on compute mode: Compute |
| // mode should only affect how we perform the inference, not how we build the |
| // Model, the Compilation, or the Execution. |
| if (randFrac() < 0.5 && computeMode == WrapperExecution::ComputeMode::ASYNC) { |
| computeMode = WrapperExecution::ComputeMode::FENCED; |
| } |
| |
| TestModel model; |
| std::vector<uint32_t> modelInputs; |
| std::vector<uint32_t> modelOutputs; |
| |
| std::set<uint32_t> operandsWithUnknownDimensions; |
| |
| // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32. |
| TestMemories weights; |
| |
| // Keep track of all normal (i.e., not activation function and not |
| // "special") operands that are values (from setOperandValue*()). |
| // .first: operand index |
| // .second: if the operand is already defined (via setOperandValue*()) then ~0U; |
| // otherwise, the operand has yet to be defined, and this is the corresponding |
| // region index in "weights" |
| std::vector<std::pair<uint32_t, unsigned>> valueOperands; |
| |
| // An operand is "dead" if it is not consumed by another operation |
| // and is not a model output. Key is operand index; value is |
| // operation index. |
| std::map<uint32_t, uint32_t> deadOperands; |
| |
| // An operation is "dead" if all of its outputs are dead. |
| std::set<uint32_t> deadOperations; |
| |
| // Collect the signatures of operations in this model. |
| std::set<Signature> signatures; |
| |
| // For reporting purposes, keep track of the number of root |
| // operations (those that do not consume results produced by other |
| // operations). |
| unsigned rootOperationCount = 0; |
| |
| // Track whether we added operands with unknown dimensions. In this case, |
| // partitioned compilation will fail if such an operand is read in a |
| // different partition than it is written, and the partition that does the |
| // writing is scheduled on a pre-HAL 1.2 (pre-Android Q) device. |
| bool hasUnknownDimensions = false; |
| |
| // Generate operations. |
| for (unsigned i = 0; i < numOperations; i++) { |
| const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns)); |
| const auto& operationPattern = kOperationPatterns[operationPatternIndex]; |
| |
| // INPUTS ////////////////////////////////////////////////////////////////////////////////// |
| |
| std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U); |
| |
| // First, process activation function and special inputs, and |
| // keep track of which inputs remain. |
| std::vector<uint32_t> normalOperationInputIndexes; |
| int32_t activationFunction = -1; |
| for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs; |
| operationInputIndex++) { |
| if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) { |
| const uint32_t operandIndex = model.addOperand(&activationFunctionType); |
| activationFunction = randUInt(4); |
| if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) { |
| // workaround for http://b/69011131 |
| activationFunction = ANEURALNETWORKS_FUSED_NONE; |
| } |
| model.setOperandValue(operandIndex, activationFunction); |
| operationInputs[operationInputIndex] = operandIndex; |
| continue; |
| } |
| if (operationPattern.mMakeSpecialInput != nullptr) { |
| const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))( |
| problemSize, &model, operationInputIndex); |
| if (operandIndex >= 0) { |
| operationInputs[operationInputIndex] = operandIndex; |
| continue; |
| } |
| } |
| normalOperationInputIndexes.push_back(operationInputIndex); |
| } |
| CHECK(!normalOperationInputIndexes.empty()); |
| signatures.insert(Signature(operationPattern.mOperationType, activationFunction)); |
| |
| // A (normal) operation input can be one of: |
| // - a new or existing model input |
| // - an output of an existing operation |
| // - an OperandValue |
| // - an OperandValueFromMemory |
| // Some guidelines: |
| // - We generally don't want all of an operation's inputs to be values (constants) |
| const unsigned normalOperationInputCount = normalOperationInputIndexes.size(); |
| // How many of this operation's inputs are constants? |
| unsigned normalOperationInputConstantCount = 0; |
| // How many of this operation's inputs are model inputs? |
| unsigned normalOperationInputModelInputCount = 0; |
| // We begin by deciding what kind of input each (normal) operation will be; we don't |
| // actually pick input operand indexes at this time, because we might override this |
| // decision later. |
| enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE }; |
| std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount); |
| std::generate( |
| normalOperationInputKinds.begin(), normalOperationInputKinds.end(), |
| [this, &model, numOperations, normalOperationInputCount, |
| &normalOperationInputConstantCount, |
| &normalOperationInputModelInputCount]() -> InputKind { |
| // Constant? Becomes less likely the more |
| // constants we already have as inputs to |
| // this operation. |
| if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) / |
| normalOperationInputCount)) { |
| normalOperationInputConstantCount++; |
| return IK_VALUE; |
| } |
| |
| // Model input? Becomes less likely the |
| // more model inputs we already have as |
| // inputs to this operation, and the further |
| // along we are in generating this model |
| // (i.e., the more operations we have |
| // generated). |
| if ((model.operationCount() == 0) || |
| (randFrac() < 0.5 * |
| (1 - double(normalOperationInputModelInputCount) / |
| normalOperationInputCount) * |
| std::min(0.3, (1 - double(model.operationCount()) / |
| numOperations)))) { |
| normalOperationInputModelInputCount++; |
| return IK_SUBGRAPH_INPUT; |
| } |
| |
| // Else output of an existing operation. |
| return IK_OPERATION_OUTPUT; |
| }); |
| |
| // Now force common root or model input, if necessary. (A |
| // model must have at least one input.) |
| auto force = [this, &normalOperationInputKinds, |
| normalOperationInputCount](InputKind forceKind) { |
| if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(), |
| [forceKind](InputKind kind) { return kind == forceKind; })) { |
| normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind; |
| } |
| }; |
| if (forceCommonRoot && (model.operationCount() != 0)) { |
| force(IK_OPERATION_OUTPUT); |
| } |
| if (modelInputs.empty()) { |
| CHECK(model.operationCount() == 0); |
| force(IK_SUBGRAPH_INPUT); |
| } |
| |
| // Finally create the normal inputs. |
| bool isRootOperation = true; |
| for (unsigned i = 0; i < normalOperationInputCount; i++) { |
| uint32_t operandIndex = ~0U; |
| switch (normalOperationInputKinds[i]) { |
| case IK_SUBGRAPH_INPUT: { |
| if (!modelInputs.empty() && (randFrac() < 0.5)) { |
| operandIndex = modelInputs[randUInt(modelInputs.size())]; |
| } else { |
| operandIndex = model.addOperand(&problemType); |
| modelInputs.push_back(operandIndex); |
| } |
| break; |
| } |
| case IK_OPERATION_OUTPUT: { |
| decltype(deadOperands.begin()) deadOperandI; |
| if (!deadOperands.empty() && (randFrac() < 0.5)) { |
| deadOperandI = deadOperands.begin(); |
| std::advance(deadOperandI, randUInt(deadOperands.size())); |
| operandIndex = deadOperandI->first; |
| } else { |
| const uint32_t existingOperationIndex = randUInt(model.operationCount()); |
| const auto& existingOperationOutputs = |
| model.getOperationOutputs(existingOperationIndex); |
| operandIndex = |
| existingOperationOutputs[randUInt(existingOperationOutputs.size())]; |
| deadOperandI = deadOperands.find(operandIndex); |
| CHECK(deadOperandI == deadOperands.end() || |
| deadOperandI->second == existingOperationIndex); |
| } |
| if (deadOperandI != deadOperands.end()) { |
| const uint32_t correspondingOperation = deadOperandI->second; |
| deadOperands.erase(deadOperandI); |
| |
| auto deadOperationI = deadOperations.find(correspondingOperation); |
| if (deadOperationI != deadOperations.end()) { |
| deadOperations.erase(deadOperationI); |
| } |
| } |
| isRootOperation = false; |
| break; |
| } |
| case IK_VALUE: { |
| if (!valueOperands.empty() && (randFrac() < 0.25)) { |
| operandIndex = valueOperands[randUInt(valueOperands.size())].first; |
| } else { |
| operandIndex = model.addOperand(&problemType); |
| if (randFrac() < 0.5) { |
| std::vector<float> value(problemSize * problemSize); |
| std::generate(value.begin(), value.end(), |
| [this] { return randFrac(); }); |
| model.setOperandValue(operandIndex, value); |
| valueOperands.push_back(std::make_pair(operandIndex, ~0U)); |
| } else { |
| unsigned memoryIndex = ~0U; |
| if ((weights.memoryCount() != 0) && |
| (kAllWeightsInOnePool || (randFrac() < 0.5))) { |
| memoryIndex = randUInt(weights.memoryCount()); |
| } else { |
| memoryIndex = weights.addMemory(); |
| } |
| const size_t length = problemSize * problemSize * sizeof(float); |
| const unsigned regionIndex = weights.addRegion(memoryIndex, length); |
| valueOperands.push_back(std::make_pair(operandIndex, regionIndex)); |
| } |
| } |
| break; |
| } |
| default: |
| FAIL(); |
| } |
| operationInputs[normalOperationInputIndexes[i]] = operandIndex; |
| } |
| if (isRootOperation) { |
| rootOperationCount++; |
| } |
| |
| // OUTPUTS ///////////////////////////////////////////////////////////////////////////////// |
| |
| std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs); |
| std::generate( |
| operationOutputs.begin(), operationOutputs.end(), |
| [&operandsWithUnknownDimensions, &model, &problemType, &unknownDimensionsTypes, |
| &hasUnknownDimensions, allowUnknownDimensions, this] { |
| // Before the fix for http://b/132458982, 3% unknowns causes |
| // ~35% of partitionings to fail. After the fix, 3% |
| // unknowns causes ~3% of partitionings to fail. (This is |
| // determined by removing the fallback code and noting the |
| // number of failures.) |
| if (allowUnknownDimensions && randFrac() < 0.03) { |
| hasUnknownDimensions = true; |
| uint32_t opndIdx = model.addOperand( |
| &unknownDimensionsTypes[randUInt(kUnknownDimensionsTypesCount)]); |
| operandsWithUnknownDimensions.insert(opndIdx); |
| return opndIdx; |
| } else { |
| return model.addOperand(&problemType); |
| } |
| }); |
| |
| // OPERATION /////////////////////////////////////////////////////////////////////////////// |
| |
| const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType, |
| operationInputs, operationOutputs); |
| deadOperations.insert(operationIndex); |
| std::for_each(operationOutputs.begin(), operationOutputs.end(), |
| [&deadOperands, operationIndex](uint32_t operandIndex) { |
| deadOperands.insert(std::make_pair(operandIndex, operationIndex)); |
| }); |
| } |
| |
| // Now finalize the weights. |
| weights.layout(); |
| for (const auto& valueOperand : valueOperands) { |
| const uint32_t operandIndex = valueOperand.first; |
| const unsigned regionIndex = valueOperand.second; |
| |
| if (regionIndex == ~0U) { |
| continue; |
| } |
| |
| const WrapperMemory* memory; |
| uint32_t offset, length; |
| float* region = |
| static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length)); |
| CHECK(length == problemSize * problemSize * sizeof(float)); |
| std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); }); |
| model.setOperandValueFromMemory(operandIndex, memory, offset, length); |
| } |
| |
| // Now select model outputs. |
| for (uint32_t operationIdx = 0, operationCount = model.operationCount(); |
| operationIdx < operationCount; operationIdx++) { |
| const auto& outputs = model.getOperationOutputs(operationIdx); |
| for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount; |
| outputIdx++) { |
| bool modelOutput = false; |
| const uint32_t operandIndex = outputs[outputIdx]; |
| const auto deadOperandI = deadOperands.find(operandIndex); |
| if (deadOperandI != deadOperands.end()) { |
| // This is not consumed within the model, so unless we |
| // make it an output of the model, it's dead. The |
| // further along we are in generating this model |
| // (i.e., the more operations we have generated), the |
| // more likely we are to classify this operation |
| // output as a model output. |
| const double probabilityOfModelOutput = |
| 0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount); |
| modelOutput = (randFrac() < probabilityOfModelOutput); |
| } else { |
| // This is consumed within the model, so we'll rarely |
| // make it an output of the model. |
| modelOutput = (randFrac() < 0.05); |
| } |
| if (!modelOutput) { |
| continue; |
| } |
| modelOutputs.push_back(operandIndex); |
| if (deadOperandI != deadOperands.end()) { |
| deadOperands.erase(deadOperandI); |
| const auto deadOperationI = deadOperations.find(operationIdx); |
| if (deadOperationI != deadOperations.end()) { |
| deadOperations.erase(deadOperationI); |
| } |
| } |
| } |
| } |
| if (!allowDeadOperations) { |
| // For each dead operation, pick a random output to become a model output. |
| for (uint32_t deadOperationIndex : deadOperations) { |
| const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex); |
| const uint32_t deadOperandIndex = |
| deadOperationOutputs[randUInt(deadOperationOutputs.size())]; |
| modelOutputs.push_back(deadOperandIndex); |
| } |
| } |
| // A model must have at least one output. |
| if (modelOutputs.empty()) { |
| const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount())); |
| modelOutputs.push_back(outputs[randUInt(outputs.size())]); |
| } |
| if (computeMode == WrapperExecution::ComputeMode::FENCED) { |
| if (std::any_of(modelOutputs.begin(), modelOutputs.end(), |
| [&operandsWithUnknownDimensions](uint32_t opndIdx) { |
| return operandsWithUnknownDimensions.count(opndIdx) != 0; |
| })) { |
| // Workaround for http://b/162980246: Fenced execution is documented |
| // as requiring model outputs to have fully specified dimensions, |
| // either from Model or from Execution, but its implementation |
| // requires this to come from Model. This test only guarantees that |
| // they have fully specified dimensions from Execution. So in the |
| // case of a Model where some output does not have fully specified |
| // dimensions, perform asynchronous execution instead. |
| computeMode = WrapperExecution::ComputeMode::ASYNC; |
| } |
| } |
| |
| model.identifyInputsAndOutputs(modelInputs, modelOutputs); |
| #ifdef VERBOSE |
| { |
| std::cout << "Original model: " << ModelStats(&model) << std::endl; |
| std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = "; |
| if (allowDeadOperations) { |
| std::cout << deadOperations.size(); |
| } else { |
| std::cout << "forbidden (converted " << deadOperations.size() << ")"; |
| } |
| std::cout << std::endl; |
| } |
| #endif |
| ASSERT_EQ(model.finish(), Result::NO_ERROR); |
| |
| // Non-partitioned compilation. |
| TestCompilation c(&model); |
| ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR); |
| ASSERT_EQ(c.finish(), Result::NO_ERROR); |
| |
| // Create some drivers for partitioned compilation. |
| CHECK(!signatures.empty()); |
| std::vector<std::set<Signature>> signaturesForDriver(signatures.size()); |
| // First assign each signature to a random driver (a driver is |
| // just represented as an entry in the signaturesForDriver |
| // vector). |
| for (Signature signature : signatures) { |
| signaturesForDriver[randUInt(signatures.size())].insert(signature); |
| } |
| // Now remove each entry that has no signatures. |
| auto firstExtra = |
| std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(), |
| [](const std::set<Signature>& sigSet) { return sigSet.empty(); }); |
| if (firstExtra != signaturesForDriver.end()) { |
| signaturesForDriver.erase(firstExtra, signaturesForDriver.end()); |
| } |
| // Now actually create the drivers. |
| std::vector<std::shared_ptr<Device>> devices; |
| for (unsigned i = 0; i < signaturesForDriver.size(); i++) { |
| const auto& signaturesForThisDriver = signaturesForDriver[i]; |
| // Minimum HAL version for this driver is highest minimum HAL version of |
| // any operation supported by this driver. |
| const HalVersion minHalVersion = getMinHalVersion( |
| std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(), |
| [](const Signature& a, const Signature& b) { |
| return getMinHalVersion(a.first) < getMinHalVersion(b.first); |
| }) |
| ->first); |
| const HalVersion actualHalVersion = |
| static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) + |
| randUInt(static_cast<int32_t>(HalVersion::LATEST) - |
| static_cast<int32_t>(minHalVersion) + 1)); |
| const std::string name = |
| "TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}"; |
| #ifdef VERBOSE |
| std::cout << "Creating " + name + " for collection of signatures that requires HAL " + |
| to_string(minHalVersion) |
| << std::endl; |
| #endif |
| auto device = DeviceManager::forTest_makeDriverDevice( |
| makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver)); |
| devices.push_back(device); |
| } |
| // CPU fallback device |
| devices.push_back(DeviceManager::getCpuDevice()); |
| |
| // Partitioned compilation. |
| // |
| // If a test case has both (a) unknown intermediate operand sizes and |
| // (b) partitions scheduled on pre-HAL 1.2 (pre-Android Q) devices, fallback |
| // is needed if the non-fallback partitioning fails. |
| // |
| // The issue is that prior to HAL 1.2, an output operand must have a known |
| // size provided either in the Model or in the Request; and in the case of |
| // partitioning, an intermediate operand of the original model that becomes |
| // an output operand of a partition won't have a known size provided in the |
| // Request. |
| // |
| // If a test case has a step model with no inputs or no outputs, fallback is needed. |
| // This is because our HAL specification requires a model to have at least one |
| // input and one output. |
| // |
| // If a fallback is needed, we retry the compilation with a fallback and require |
| // the fallback to succeed. Otherwise, we require the partitioning to succeed |
| // without CPU fallback. |
| TestCompilation cNoFallback(&model, devices); |
| TestCompilation cWithFallback(&model, devices); |
| ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), |
| Result::NO_ERROR); |
| auto compilationResult = cNoFallback.finish(); |
| const bool fallbackNeededForDynamicTemporaries = |
| compilationResult == Result::OP_FAILED && hasUnknownDimensions && |
| cNoFallback.getExecutionPlan().hasDynamicTemporaries() && |
| std::any_of(devices.begin(), devices.end(), [](const std::shared_ptr<Device>& device) { |
| return !isCompliantVersion(nn::kHalVersionV1_2ToApi.canonical, |
| device->getFeatureLevel()); |
| }); |
| const bool fallbackNeededForStepModelWithNoInputsOrNoOutputs = |
| cNoFallback.getExecutionPlan().forTest_hasStepModelWithNoInputsOrNoOutputs(); |
| const bool fallbackNeeded = fallbackNeededForDynamicTemporaries || |
| fallbackNeededForStepModelWithNoInputsOrNoOutputs; |
| if (fallbackNeeded) { |
| ASSERT_EQ(compilationResult, Result::OP_FAILED); |
| |
| ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback), |
| Result::NO_ERROR); |
| compilationResult = cWithFallback.finish(); |
| ASSERT_EQ(compilationResult, Result::NO_ERROR); |
| ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(), |
| DeviceManager::getCpuDevice()); |
| } else { |
| ASSERT_EQ(compilationResult, Result::NO_ERROR); |
| |
| const ExecutionPlan& plan = cNoFallback.getExecutionPlan(); |
| if (signaturesForDriver.size() == 1) { |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE); |
| ASSERT_TRUE(plan.forTest_simpleGetDevice() != DeviceManager::getCpuDevice()); |
| } else { |
| ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND); |
| auto stepToDeviceId = [](const std::shared_ptr<LogicalStep>& step) { |
| return step->executionStep()->getDevice(); |
| }; |
| std::set<decltype(stepToDeviceId(plan.forTest_compoundGetSteps()[0]))> deviceSet; |
| for (const auto& step : plan.forTest_compoundGetSteps()) { |
| deviceSet.insert(stepToDeviceId(step)); |
| } |
| // TODO(b/178517567): Figure out why we sometimes have 1 more |
| // signature than we have devices -- this means that we've scheduled |
| // one or more operations onto the CPU fallback device, which is not |
| // something we ever expect to do. |
| ASSERT_TRUE(deviceSet.size() == signaturesForDriver.size() || |
| deviceSet.size() == signaturesForDriver.size() + 1); |
| } |
| } |
| TestCompilation& c2 = (fallbackNeeded ? cWithFallback : cNoFallback); |
| #ifdef TRACE_DYNTEMP |
| { |
| const ExecutionPlan& plan = c2.getExecutionPlan(); |
| const size_t dynamicTemporaryCount = plan.forTest_flatGetDynamicTemporaries().size(); |
| std::cout << "TRACE_DYNTEMP: dynamic temporary count = " << dynamicTemporaryCount |
| << std::endl; |
| if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) { |
| size_t stepsWithModelOutputsThatAreDownstreamInputs = 0; |
| size_t countOfModelOutputsThatAreDownstreamInputs = 0; |
| for (const auto& step : plan.forTest_compoundGetSteps()) { |
| if (const size_t count = step->executionStep() |
| ->getModelOutputsThatAreDownstreamInputs() |
| .size()) { |
| ++stepsWithModelOutputsThatAreDownstreamInputs; |
| countOfModelOutputsThatAreDownstreamInputs += count; |
| } |
| } |
| if (countOfModelOutputsThatAreDownstreamInputs != 0) { |
| std::cout << "TRACE_DYNTEMP: model outputs that are downstream inputs: " |
| << countOfModelOutputsThatAreDownstreamInputs << " / " |
| << modelOutputs.size() << ", over " |
| << stepsWithModelOutputsThatAreDownstreamInputs << " / " |
| << plan.forTest_compoundGetSteps().size() << " steps" << std::endl; |
| EXPECT_LE(countOfModelOutputsThatAreDownstreamInputs, modelOutputs.size()); |
| } |
| } else { |
| EXPECT_EQ(dynamicTemporaryCount, size_t(0)) |
| << "Only COMPOUND plan should have dynamic temporaries"; |
| } |
| } |
| #endif |
| |
| #ifdef VERBOSE |
| { |
| std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size() |
| << std::endl; |
| // TODO: When dumping steps, include non-ExecutionSteps. |
| const ExecutionPlan& plan = c2.getExecutionPlan(); |
| switch (plan.forTest_getKind()) { |
| case ExecutionPlan::Kind::SIMPLE: |
| std::cout << "plan: simple" << std::endl; |
| break; |
| case ExecutionPlan::Kind::COMPOUND: { |
| const auto& steps = plan.forTest_compoundGetSteps(); |
| std::set<const Device*> devicesInPlan; |
| for (const auto& step : steps) { |
| if (const auto* executionStep = step->tryExecutionStep()) { |
| devicesInPlan.insert(executionStep->getDevice().get()); |
| } |
| } |
| std::cout << "plan: compound, " << steps.size() << " steps over " |
| << devicesInPlan.size() << " devices" << std::endl; |
| for (unsigned i = 0; i < steps.size(); i++) { |
| if (const auto executionStep = steps[i]->tryExecutionStep()) { |
| std::cout << "Step " << i << ": " |
| << ModelStats(executionStep->getStepModel()) |
| << ", device = " << executionStep->getDevice()->getName() |
| << std::endl; |
| } |
| } |
| break; |
| } |
| default: |
| std::cout << "Unexpected plan kind: " |
| << static_cast<unsigned>(plan.forTest_getKind()); |
| break; |
| } |
| } |
| #endif |
| |
| // For execution: |
| // - create golden inputs (one long vector) and golden output value |
| // - golden inputs will be copied to actual inputs before each |
| // of the two executions |
| // - golden output will be used to fill actual outputs before each |
| // of the two executions |
| // - create actual inputs and outputs |
| // - first execution (non-partitioned) |
| // - initialize inputs and (to avoid unrelated oddities) outputs |
| // - execute |
| // - copy outputs to a save area (one long vector) |
| // - second execution (partitioned) |
| // - (to avoid unrelated oddities) initialize inputs and outputs |
| // - execute |
| // - compare outputs to save area |
| |
| // If the runtime and drivers are working properly, execution |
| // should not change the inputs. Nonetheless, we reinitialize the |
| // inputs for each execution, so as to avoid unrelated problems |
| // appearing to be problems related to unpartitioned execution |
| // versus partitioned execution. Similarly, execution behavior |
| // should not be dependent on the outputs; but we'll initialize the |
| // outputs anyway. |
| std::vector<float> goldenInputs(problemSize * problemSize * model.inputCount()); |
| std::generate(goldenInputs.begin(), goldenInputs.end(), [this] { return randFrac(); }); |
| #ifdef VERBOSE |
| { |
| std::cout << "flat inputs = "; |
| dump(goldenInputs.begin(), goldenInputs.end()); |
| } |
| #endif |
| const float goldenOutput = randFrac(); |
| |
| // Create the memory for the actual inputs and outputs. |
| struct InputOutputDescriptor { |
| enum Kind { INPUT, OUTPUT }; |
| Kind mKind; |
| |
| // The input or output either resides in a local buffer |
| // (mVector, in which case mMemoryRegion is ignored); or in a |
| // shared memory region within a TestMemories instance |
| // (mMemoryRegion, in which case mVector is ignored). |
| enum Location { VECTOR, REGION }; |
| Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; } |
| |
| std::vector<float> mVector; |
| unsigned mMemoryRegion; |
| }; |
| std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount()); |
| for (unsigned i = 0; i < ioDescriptors.size(); i++) { |
| ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT |
| : InputOutputDescriptor::OUTPUT); |
| } |
| // We randomly interleave inputs and outputs in creation |
| // order, because when we we create memory regions in a |
| // TestMemories instance, the order in which regions are |
| // created within a single Memory is the order they'll be laid |
| // out in that memory; and when we have inputs and outputs |
| // within the same Memory, we want the possibility that |
| // they'll be interleaved. |
| std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng); |
| TestMemories ioMemories; |
| for (auto& desc : ioDescriptors) { |
| if (randFrac() < 0.5) { |
| desc.mVector.resize(problemSize * problemSize); |
| } else { |
| // TODO: common this with the way we create IK_VALUE inputs? |
| unsigned memoryIndex = ~0U; |
| if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) { |
| memoryIndex = randUInt(ioMemories.memoryCount()); |
| } else { |
| memoryIndex = ioMemories.addMemory(); |
| } |
| const size_t length = problemSize * problemSize * sizeof(float); |
| desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length); |
| } |
| } |
| ioMemories.layout(); |
| |
| // Function to set up actual inputs and outputs (initializing them |
| // and telling the WrapperExecution about them). |
| auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &goldenInputs, &goldenOutput, |
| problemSize, &problemType](WrapperExecution* e) { |
| uint32_t inputIndex = 0, outputIndex = 0; |
| for (auto& desc : ioDescriptors) { |
| if (desc.getLocation() == InputOutputDescriptor::VECTOR) { |
| if (desc.mKind == InputOutputDescriptor::INPUT) { |
| const size_t inputOffset = inputIndex * problemSize * problemSize; |
| std::copy(goldenInputs.begin() + inputOffset, |
| goldenInputs.begin() + inputOffset + problemSize * problemSize, |
| desc.mVector.begin()); |
| e->setInput(inputIndex++, desc.mVector.data(), |
| desc.mVector.size() * sizeof(float)); |
| } else { |
| std::fill(desc.mVector.begin(), |
| desc.mVector.begin() + problemSize * problemSize, goldenOutput); |
| e->setOutput(outputIndex++, desc.mVector.data(), |
| desc.mVector.size() * sizeof(float), &problemType.operandType); |
| } |
| } else { |
| const WrapperMemory* memory; |
| uint32_t offset, length; |
| float* region = static_cast<float*>( |
| ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length)); |
| CHECK(length == problemSize * problemSize * sizeof(float)); |
| if (desc.mKind == InputOutputDescriptor::INPUT) { |
| const size_t inputOffset = inputIndex * problemSize * problemSize; |
| std::copy(goldenInputs.begin() + inputOffset, |
| goldenInputs.begin() + inputOffset + problemSize * problemSize, |
| region); |
| e->setInputFromMemory(inputIndex++, memory, offset, length); |
| } else { |
| std::fill(region, region + problemSize * problemSize, goldenOutput); |
| e->setOutputFromMemory(outputIndex++, memory, offset, length, |
| &problemType.operandType); |
| } |
| } |
| }; |
| CHECK(inputIndex == model.inputCount()); |
| CHECK(outputIndex == model.outputCount()); |
| }; |
| |
| // Non-partitioned execution. |
| WrapperExecution e(&c); |
| ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e)); |
| ASSERT_EQ(e.compute(computeMode), Result::NO_ERROR); |
| |
| // Copy the outputs of the non-partitioned execution to a save area. |
| std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount()); |
| { |
| uint32_t outputIndex = 0; |
| for (const auto& desc : ioDescriptors) { |
| if (desc.mKind != InputOutputDescriptor::OUTPUT) { |
| continue; |
| } |
| const size_t outputOffset = outputIndex * problemSize * problemSize; |
| if (desc.getLocation() == InputOutputDescriptor::VECTOR) { |
| std::copy(desc.mVector.begin(), desc.mVector.end(), |
| nonPartitionedOutputs.begin() + outputOffset); |
| } else { |
| float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion)); |
| std::copy(region, region + problemSize * problemSize, |
| nonPartitionedOutputs.begin() + outputOffset); |
| } |
| #ifdef VERBOSE |
| { |
| std::cout << "nonpartitioned output[" << outputIndex << "] = "; |
| dump(nonPartitionedOutputs.begin() + outputOffset, |
| nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize); |
| } |
| #endif |
| outputIndex++; |
| } |
| } |
| |
| // Partitioned execution. |
| WrapperExecution e2(&c2); |
| ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2)); |
| ASSERT_EQ(e2.compute(computeMode), Result::NO_ERROR); |
| |
| // Compare the outputs of the partitioned execution to the save |
| // area containing the outpus of the non-partitioned execution. |
| { |
| uint32_t outputIndex = 0; |
| for (const auto& desc : ioDescriptors) { |
| if (desc.mKind != InputOutputDescriptor::OUTPUT) { |
| continue; |
| } |
| SCOPED_TRACE(outputIndex); |
| const size_t outputOffset = outputIndex * problemSize * problemSize; |
| if (desc.getLocation() == InputOutputDescriptor::VECTOR) { |
| #ifdef VERBOSE |
| std::cout << " partitioned output[" << outputIndex << "] = "; |
| dump(desc.mVector.begin(), desc.mVector.end()); |
| #endif |
| ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(), |
| nonPartitionedOutputs.begin() + outputOffset)); |
| } else { |
| float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion)); |
| #ifdef VERBOSE |
| std::cout << "part output[" << outputIndex << "] = "; |
| dump(region, region + problemSize * problemSize); |
| #endif |
| ASSERT_TRUE(std::equal(region, region + problemSize * problemSize, |
| nonPartitionedOutputs.begin() + outputOffset)); |
| } |
| outputIndex++; |
| } |
| } |
| } |
| |
| } // namespace |
| } // namespace android |