Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 17 | #define LOG_TAG "ExecutionBuilder" |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 18 | |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 19 | #include "ExecutionBuilder.h" |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 20 | |
Slava Shklyaev | a75fa2c | 2021-01-13 16:12:02 +0000 | [diff] [blame] | 21 | #include <ControlFlow.h> |
| 22 | #include <CpuExecutor.h> |
Slava Shklyaev | a75fa2c | 2021-01-13 16:12:02 +0000 | [diff] [blame] | 23 | #include <LegacyUtils.h> |
| 24 | #include <Tracing.h> |
Slava Shklyaev | c958cd8 | 2020-12-10 16:55:55 +0000 | [diff] [blame] | 25 | #include <android-base/logging.h> |
Michael Butler | 7d1ae27 | 2021-02-17 18:00:31 -0800 | [diff] [blame] | 26 | #include <nnapi/IBurst.h> |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 27 | #include <nnapi/IPreparedModel.h> |
Xusong Wang | aa1ac51 | 2021-03-03 16:30:03 -0800 | [diff] [blame] | 28 | #include <nnapi/Types.h> |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 29 | |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 30 | #include <algorithm> |
Michael Butler | bf25823 | 2019-12-16 18:32:45 -0800 | [diff] [blame] | 31 | #include <limits> |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 32 | #include <map> |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 33 | #include <memory> |
| 34 | #include <mutex> |
| 35 | #include <optional> |
| 36 | #include <string> |
| 37 | #include <thread> |
Slava Shklyaev | bae4514 | 2019-10-22 18:21:57 +0100 | [diff] [blame] | 38 | #include <tuple> |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 39 | #include <utility> |
| 40 | #include <vector> |
| 41 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 42 | #include "BurstBuilder.h" |
David Gross | 67f891d | 2017-09-10 14:31:58 -0700 | [diff] [blame] | 43 | #include "CompilationBuilder.h" |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 44 | #include "Manager.h" |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 45 | #include "ModelArgumentInfo.h" |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 46 | #include "ModelBuilder.h" |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 47 | #include "Telemetry.h" |
Slava Shklyaev | 8ea8dae | 2019-02-11 18:26:29 +0000 | [diff] [blame] | 48 | #include "TypeManager.h" |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 49 | |
| 50 | namespace android { |
| 51 | namespace nn { |
| 52 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 53 | // Partial validation of output shapes returned from driver, to ensure they |
| 54 | // conform to a very specific set of rules. |
| 55 | static bool validateOutputShapesFromDriver(ErrorStatus executionStatus, const ModelBuilder* model, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 56 | const std::vector<OutputShape>& shapes) { |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 57 | // Enforces the following rules (some of which are from b/154054474): |
| 58 | // - shapes vector is empty except in the case of NONE or OUTPUT_INSUFFICIENT_SIZE. |
| 59 | // If the vector is not empty, it must have as many entries as the step model has outputs. |
| 60 | // - If NONE, then either shapes vector is empty, or every shape is |
| 61 | // marked isSufficient and, if a tensor, has known rank. |
| 62 | // - If OUTPUT_INSUFFICIENT_SIZE, then the vector is not empty. At least one entry |
| 63 | // is marked !isSufficient. |
| 64 | switch (executionStatus) { |
| 65 | case ErrorStatus::NONE: { |
| 66 | NN_RET_CHECK(shapes.size() == 0 || shapes.size() == model->outputCount()) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 67 | << "With execution ErrorStatus " << executionStatus |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 68 | << " output shapes vector must be empty or of length " << model->outputCount() |
| 69 | << " but has length " << shapes.size(); |
| 70 | NN_RET_CHECK(std::all_of(shapes.begin(), shapes.end(), |
| 71 | [](const OutputShape& shape) { return shape.isSufficient; })) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 72 | << "With execution ErrorStatus " << executionStatus |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 73 | << " at least one output shape is unexpectedly marked !isSufficient"; |
| 74 | |
| 75 | const TypeManager* tm = TypeManager::get(); |
| 76 | for (uint32_t outputIndex = 0, outputCount = shapes.size(); outputIndex < outputCount; |
| 77 | ++outputIndex) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 78 | const Operand& outputOperand = model->getOutputOperand(outputIndex); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 79 | NN_RET_CHECK(!tm->isTensorType(outputOperand.type) || |
| 80 | (shapes[outputIndex].dimensions.size() != 0)) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 81 | << "With execution ErrorStatus " << executionStatus << " output#" |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 82 | << outputIndex << " shape unexpectedly has zero rank"; |
| 83 | } |
| 84 | |
| 85 | break; |
| 86 | } |
| 87 | case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE: { |
| 88 | NN_RET_CHECK(shapes.size() == model->outputCount()) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 89 | << "With execution ErrorStatus " << executionStatus |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 90 | << " output shapes vector must be of length " << model->outputCount() |
| 91 | << " but has length " << shapes.size(); |
| 92 | NN_RET_CHECK(std::any_of(shapes.begin(), shapes.end(), |
| 93 | [](const OutputShape& shape) { return !shape.isSufficient; })) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 94 | << "With execution ErrorStatus " << executionStatus |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 95 | << " at least one output shape must have been marked !isSufficient"; |
| 96 | break; |
| 97 | } |
| 98 | default: { |
| 99 | NN_RET_CHECK(shapes.size() == 0) |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 100 | << "With execution ErrorStatus " << executionStatus |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 101 | << " output shapes vector must be empty but has length " << shapes.size(); |
| 102 | break; |
| 103 | } |
| 104 | } |
| 105 | return true; |
| 106 | } |
| 107 | static bool validateOutputShapesFromDriver(int executionResultCode, const ModelBuilder* model, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 108 | const std::vector<OutputShape>& shapes) { |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 109 | return validateOutputShapesFromDriver(convertResultCodeToErrorStatus(executionResultCode), |
| 110 | model, shapes); |
| 111 | } |
| 112 | |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 113 | static MeasureTiming measureTiming(const ExecutionBuilder* execution) { |
| 114 | return execution->measureTiming() ? MeasureTiming::YES : MeasureTiming::NO; |
| 115 | } |
| 116 | |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 117 | static bool checkDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType, |
| 118 | const char* tag, bool allowUnspecified) { |
| 119 | if (newType != nullptr) { |
Slava Shklyaev | 8ea8dae | 2019-02-11 18:26:29 +0000 | [diff] [blame] | 120 | const Extension::OperandTypeInformation* info = nullptr; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 121 | if (isExtension(operand.type)) { |
Slava Shklyaev | 8ea8dae | 2019-02-11 18:26:29 +0000 | [diff] [blame] | 122 | NN_RET_CHECK(TypeManager::get()->getExtensionOperandTypeInfo(operand.type, &info)); |
| 123 | } |
| 124 | if (validateOperandType(*newType, info, tag, allowUnspecified) != |
| 125 | ANEURALNETWORKS_NO_ERROR) { |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 126 | LOG(ERROR) << tag << ": Invalid newType"; |
| 127 | return false; |
| 128 | } |
| 129 | if (operand.dimensions.size() == 0) { |
| 130 | return true; |
| 131 | } |
| 132 | if (operand.dimensions.size() != newType->dimensionCount) { |
David Gross | a18493a | 2021-01-26 15:12:22 -0800 | [diff] [blame] | 133 | LOG(ERROR) << tag << ": Setting with incompatible dimension count (existing = " |
| 134 | << operand.dimensions.size() << ", new = " << newType->dimensionCount << ")"; |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 135 | return false; |
| 136 | } |
| 137 | for (uint32_t i = 0; i < newType->dimensionCount; i++) { |
| 138 | if (operand.dimensions[i] != newType->dimensions[i] && operand.dimensions[i] != 0) { |
| 139 | LOG(ERROR) << tag << ": Overriding a fully specified dimension is disallowed"; |
| 140 | return false; |
| 141 | } |
| 142 | } |
| 143 | } else { |
Slava Shklyaev | 8ea8dae | 2019-02-11 18:26:29 +0000 | [diff] [blame] | 144 | if (!allowUnspecified && TypeManager::get()->isTensorType(operand.type) && |
| 145 | tensorHasUnspecifiedDimensions(operand)) { |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 146 | LOG(ERROR) << tag << ": Setting with operand type that is not fully specified"; |
| 147 | return false; |
| 148 | } |
| 149 | } |
| 150 | return true; |
| 151 | } |
| 152 | |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 153 | ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) |
Michael Butler | 2f6a628 | 2019-01-24 02:36:37 -0800 | [diff] [blame] | 154 | : mCompilation(compilation), |
| 155 | mModel(compilation->mModel), |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 156 | mPlan(&compilation->mPlan), |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 157 | mAllowCpuFallback(DeviceManager::partitioningAllowsFallback(compilation->mPartitioning)), |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 158 | mInputs(mModel->inputCount()), |
| 159 | mOutputs(mModel->outputCount()) { |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 160 | VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder with " << mInputs.size() |
| 161 | << " inputs and " << mOutputs.size() << " outputs"; |
| 162 | } |
| 163 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 164 | SimpleExecutionBuilder::SimpleExecutionBuilder(const CompilationBuilder* compilation) |
| 165 | : ExecutionBuilder(compilation) { |
| 166 | CHECK(mPlan->isSimple()); |
| 167 | } |
| 168 | |
| 169 | CompoundExecutionBuilder::CompoundExecutionBuilder(const CompilationBuilder* compilation) |
| 170 | : ExecutionBuilder(compilation) { |
| 171 | CHECK(mPlan->isCompound()); |
| 172 | } |
| 173 | |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 174 | const ModelBuilder* ExecutionBuilder::getSourceModel(uint32_t index) const { |
| 175 | return mPlan->getSourceModels().getModel(index); |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 176 | } |
| 177 | |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 178 | int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type, |
Jean-Luc Brouillet | d409e2c | 2017-09-27 23:59:20 -0700 | [diff] [blame] | 179 | const void* buffer, size_t length) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 180 | if (computationStarted()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 181 | LOG(ERROR) << "ANeuralNetworksExecution_setInput called after the " |
| 182 | "execution has started."; |
| 183 | return ANEURALNETWORKS_BAD_STATE; |
| 184 | } |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 185 | uint32_t count = static_cast<uint32_t>(mInputs.size()); |
| 186 | if (index >= count) { |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 187 | LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 188 | return ANEURALNETWORKS_BAD_DATA; |
| 189 | } |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 190 | if (!checkDimensionInfo(mModel->getInputOperand(index), type, |
| 191 | "ANeuralNetworksExecution_setInput", buffer == nullptr)) { |
| 192 | return ANEURALNETWORKS_BAD_DATA; |
Jean-Luc Brouillet | d409e2c | 2017-09-27 23:59:20 -0700 | [diff] [blame] | 193 | } |
| 194 | if (length > 0xFFFFFFFF) { |
| 195 | LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length; |
| 196 | return ANEURALNETWORKS_BAD_DATA; |
| 197 | } |
| 198 | uint32_t l = static_cast<uint32_t>(length); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 199 | if (!mInputs[index].unspecified()) { |
| 200 | LOG(ERROR) << "ANeuralNetworksExecution_setInput called when an input has already been " |
| 201 | "provided"; |
| 202 | return ANEURALNETWORKS_BAD_STATE; |
| 203 | } |
| 204 | int n; |
| 205 | std::tie(n, mInputs[index]) = ModelArgumentInfo::createFromPointer( |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 206 | mModel->getInputOperand(index), type, const_cast<void*>(buffer), l, |
| 207 | mInputAndOutputPaddingEnabled); |
| 208 | mHasCalledSetInputOutput = true; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 209 | return n; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 210 | } |
| 211 | |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 212 | int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 213 | const RuntimeMemory* memory, size_t offset, |
| 214 | size_t length) { |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 215 | // Should be similar to StepExecutor::setInputOrOutputFromMemory() |
David Gross | 1472013 | 2017-10-02 14:40:09 -0700 | [diff] [blame] | 216 | |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 217 | if (computationStarted()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 218 | LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory called after the " |
| 219 | "execution has started."; |
| 220 | return ANEURALNETWORKS_BAD_STATE; |
| 221 | } |
Jean-Luc Brouillet | 4fb1e85 | 2017-08-20 18:16:36 -0700 | [diff] [blame] | 222 | uint32_t count = static_cast<uint32_t>(mInputs.size()); |
| 223 | if (index >= count) { |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 224 | LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " |
Jean-Luc Brouillet | 4fb1e85 | 2017-08-20 18:16:36 -0700 | [diff] [blame] | 225 | << count; |
| 226 | return ANEURALNETWORKS_BAD_DATA; |
| 227 | } |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 228 | if (!checkDimensionInfo(mModel->getInputOperand(index), type, |
| 229 | "ANeuralNetworksExecution_setInputFromMemory", false)) { |
| 230 | return ANEURALNETWORKS_BAD_DATA; |
| 231 | } |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 232 | if (!memory->getValidator().validate(mCompilation, IOType::INPUT, index, type, offset, |
| 233 | length)) { |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 234 | return ANEURALNETWORKS_BAD_DATA; |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 235 | } |
Xusong Wang | 1b836a2 | 2020-02-06 13:17:33 -0800 | [diff] [blame] | 236 | // For some types of memory, e.g. MemoryRuntimeAHWB allocated from ANNMemory_createFromDesc, we |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 237 | // allow the client to specify offset == 0 && length == 0 indicating that the entire memory |
| 238 | // region is used. We update the length here because the drivers are still expecting a real |
| 239 | // length. For other memories that do not allow this semantic, it is checked in |
| 240 | // MemoryValidatorBase::validate before reaching here. |
Slava Shklyaev | 3698ad4 | 2020-11-06 13:50:31 +0000 | [diff] [blame] | 241 | if (validate(memory->getMemory()).ok() && offset == 0 && length == 0) { |
Michael Butler | cb35c63 | 2021-03-25 15:28:52 -0700 | [diff] [blame] | 242 | length = memory->getSize(); |
Miao Wang | e9ddab6 | 2017-09-05 14:41:05 -0700 | [diff] [blame] | 243 | } |
Jean-Luc Brouillet | d409e2c | 2017-09-27 23:59:20 -0700 | [diff] [blame] | 244 | // TODO validate the rest |
Jean-Luc Brouillet | 5d5150d | 2017-09-02 23:05:37 -0700 | [diff] [blame] | 245 | uint32_t poolIndex = mMemories.add(memory); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 246 | if (!mInputs[index].unspecified()) { |
| 247 | LOG(ERROR) |
| 248 | << "ANeuralNetworksExecution_setInputFromMemory called when an input has already " |
| 249 | "been provided"; |
| 250 | return ANEURALNETWORKS_BAD_STATE; |
| 251 | } |
| 252 | int n; |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 253 | std::tie(n, mInputs[index]) = |
| 254 | ModelArgumentInfo::createFromMemory(mModel->getInputOperand(index), type, poolIndex, |
| 255 | offset, length, mInputAndOutputPaddingEnabled); |
| 256 | mHasCalledSetInputOutput = true; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 257 | return n; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 258 | } |
| 259 | |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 260 | int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, |
| 261 | void* buffer, size_t length) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 262 | if (computationStarted()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 263 | LOG(ERROR) << "ANeuralNetworksExecution_setOutput called after the " |
| 264 | "execution has started."; |
| 265 | return ANEURALNETWORKS_BAD_STATE; |
| 266 | } |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 267 | uint32_t count = static_cast<uint32_t>(mOutputs.size()); |
| 268 | if (index >= count) { |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 269 | LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 270 | return ANEURALNETWORKS_BAD_DATA; |
| 271 | } |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 272 | if (!checkDimensionInfo(mModel->getOutputOperand(index), type, |
| 273 | "ANeuralNetworksExecution_setOutput", true)) { |
| 274 | return ANEURALNETWORKS_BAD_DATA; |
Jean-Luc Brouillet | d409e2c | 2017-09-27 23:59:20 -0700 | [diff] [blame] | 275 | } |
| 276 | if (length > 0xFFFFFFFF) { |
| 277 | LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length; |
| 278 | return ANEURALNETWORKS_BAD_DATA; |
| 279 | } |
| 280 | uint32_t l = static_cast<uint32_t>(length); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 281 | if (!mOutputs[index].unspecified()) { |
| 282 | LOG(ERROR) << "ANeuralNetworksExecution_setOutput called when an output has already been " |
| 283 | "provided"; |
| 284 | return ANEURALNETWORKS_BAD_STATE; |
| 285 | } |
| 286 | int n; |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 287 | std::tie(n, mOutputs[index]) = ModelArgumentInfo::createFromPointer( |
| 288 | mModel->getOutputOperand(index), type, buffer, l, mInputAndOutputPaddingEnabled); |
| 289 | mHasCalledSetInputOutput = true; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 290 | return n; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 291 | } |
| 292 | |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 293 | int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 294 | const RuntimeMemory* memory, size_t offset, |
| 295 | size_t length) { |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 296 | // Should be similar to StepExecutor::setInputOrOutputFromMemory() |
David Gross | 1472013 | 2017-10-02 14:40:09 -0700 | [diff] [blame] | 297 | |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 298 | if (computationStarted()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 299 | LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called after the " |
| 300 | "execution has started."; |
| 301 | return ANEURALNETWORKS_BAD_STATE; |
| 302 | } |
Jean-Luc Brouillet | 4fb1e85 | 2017-08-20 18:16:36 -0700 | [diff] [blame] | 303 | uint32_t count = static_cast<uint32_t>(mOutputs.size()); |
| 304 | if (index >= count) { |
David Gross | 7e03e90 | 2017-09-13 10:45:21 -0700 | [diff] [blame] | 305 | LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " " |
Jean-Luc Brouillet | 4fb1e85 | 2017-08-20 18:16:36 -0700 | [diff] [blame] | 306 | << count; |
| 307 | return ANEURALNETWORKS_BAD_DATA; |
| 308 | } |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 309 | if (!checkDimensionInfo(mModel->getOutputOperand(index), type, |
| 310 | "ANeuralNetworksExecution_setOutputFromMemory", true)) { |
| 311 | return ANEURALNETWORKS_BAD_DATA; |
| 312 | } |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 313 | if (!memory->getValidator().validate(mCompilation, IOType::OUTPUT, index, type, offset, |
| 314 | length)) { |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 315 | return ANEURALNETWORKS_BAD_DATA; |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 316 | } |
Xusong Wang | 1b836a2 | 2020-02-06 13:17:33 -0800 | [diff] [blame] | 317 | // For some types of memory, e.g. MemoryRuntimeAHWB allocated from ANNMemory_createFromDesc, we |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 318 | // allow the client to specify offset == 0 && length == 0 indicating that the entire memory |
| 319 | // region is used. We update the length here because the drivers are still expecting a real |
| 320 | // length. For other memories that do not allow this semantic, it is checked in |
| 321 | // MemoryValidatorBase::validate before reaching here. |
Slava Shklyaev | 3698ad4 | 2020-11-06 13:50:31 +0000 | [diff] [blame] | 322 | if (validate(memory->getMemory()).ok() && offset == 0 && length == 0) { |
Michael Butler | cb35c63 | 2021-03-25 15:28:52 -0700 | [diff] [blame] | 323 | length = memory->getSize(); |
Miao Wang | e9ddab6 | 2017-09-05 14:41:05 -0700 | [diff] [blame] | 324 | } |
Jean-Luc Brouillet | d409e2c | 2017-09-27 23:59:20 -0700 | [diff] [blame] | 325 | // TODO validate the rest |
Jean-Luc Brouillet | 5d5150d | 2017-09-02 23:05:37 -0700 | [diff] [blame] | 326 | uint32_t poolIndex = mMemories.add(memory); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 327 | if (!mOutputs[index].unspecified()) { |
| 328 | LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called when an output has " |
| 329 | "already been provided"; |
| 330 | return ANEURALNETWORKS_BAD_STATE; |
| 331 | } |
| 332 | int n; |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 333 | std::tie(n, mOutputs[index]) = |
| 334 | ModelArgumentInfo::createFromMemory(mModel->getOutputOperand(index), type, poolIndex, |
| 335 | offset, length, mInputAndOutputPaddingEnabled); |
| 336 | mHasCalledSetInputOutput = true; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 337 | return n; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 338 | } |
| 339 | |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 340 | int ExecutionBuilder::setMeasureTiming(bool measure) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 341 | if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) { |
| 342 | LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called on " |
| 343 | << "an ANeuralNetworksExecution created from an ANeuralNetworksCompilation " |
| 344 | << "that was not created by ANeuralNetworksCompilation_createForDevices " |
| 345 | << "with numDevices = 1"; |
| 346 | return ANEURALNETWORKS_BAD_DATA; |
| 347 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 348 | if (computationStarted()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 349 | LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called after the " |
| 350 | "execution has started."; |
| 351 | return ANEURALNETWORKS_BAD_STATE; |
| 352 | } |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 353 | mMeasureTiming = measure; |
| 354 | return ANEURALNETWORKS_NO_ERROR; |
| 355 | } |
| 356 | |
| 357 | int ExecutionBuilder::getDuration(int32_t durationCode, uint64_t* duration) const { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 358 | if (!completed()) { |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 359 | LOG(ERROR) << "ANeuralNetworksExecution_getDuration called before the " |
| 360 | "execution has finished."; |
Miao Wang | 0125e9b | 2020-03-09 11:28:06 -0700 | [diff] [blame] | 361 | *duration = UINT64_MAX; |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 362 | return ANEURALNETWORKS_BAD_STATE; |
| 363 | } |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 364 | if (completedWith() != Completion::NO_ERROR) { |
| 365 | LOG(ERROR) << "ANeuralNetworksExecution_getDuration called on an execution " |
| 366 | "that has encountered an error."; |
Miao Wang | 0125e9b | 2020-03-09 11:28:06 -0700 | [diff] [blame] | 367 | *duration = UINT64_MAX; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 368 | return ANEURALNETWORKS_BAD_STATE; |
| 369 | } |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 370 | |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 371 | if (!mMeasureTiming) { |
| 372 | *duration = UINT64_MAX; |
| 373 | return ANEURALNETWORKS_BAD_STATE; |
| 374 | } |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 375 | |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 376 | Timing timingLaunched = mTimingWithoutFencedExecutionCallback; |
| 377 | Timing timingFenced = timingLaunched; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 378 | if (mFencedExecutionCallback != nullptr) { |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 379 | auto result = mFencedExecutionCallback(); |
| 380 | if (!result.has_value()) { |
| 381 | LOG(ERROR) << "Fenced execution callback failed: " << result.error().message; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 382 | *duration = UINT64_MAX; |
| 383 | return ANEURALNETWORKS_BAD_STATE; |
| 384 | } |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 385 | std::tie(timingLaunched, timingFenced) = std::move(result).value(); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 386 | } |
Michael Butler | dcea61d | 2020-12-04 17:39:38 -0800 | [diff] [blame] | 387 | const OptionalDuration selectedDuration = [durationCode, &timingLaunched, |
| 388 | &timingFenced]() -> OptionalDuration { |
| 389 | switch (durationCode) { |
| 390 | case ANEURALNETWORKS_DURATION_ON_HARDWARE: |
| 391 | return timingLaunched.timeOnDevice; |
| 392 | case ANEURALNETWORKS_DURATION_IN_DRIVER: |
| 393 | return timingLaunched.timeInDriver; |
| 394 | case ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE: |
| 395 | return timingFenced.timeOnDevice; |
| 396 | case ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER: |
| 397 | return timingFenced.timeInDriver; |
| 398 | default: |
| 399 | LOG(FATAL) << "unexpected"; |
| 400 | return std::nullopt; |
| 401 | } |
| 402 | }(); |
| 403 | if (selectedDuration.has_value()) { |
| 404 | constexpr uint64_t kMaxTiming = std::numeric_limits<uint64_t>::max() - 1; |
Michael Butler | 4e4c500 | 2021-03-18 21:09:25 -0700 | [diff] [blame] | 405 | using CommonType = std::common_type_t<Duration::rep, uint64_t>; |
| 406 | const auto count = std::min<CommonType>(selectedDuration.value().count(), kMaxTiming); |
| 407 | *duration = static_cast<uint64_t>(count); |
Michael Butler | dcea61d | 2020-12-04 17:39:38 -0800 | [diff] [blame] | 408 | } else { |
| 409 | constexpr uint64_t kNoTiming = std::numeric_limits<uint64_t>::max(); |
| 410 | *duration = kNoTiming; |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 411 | } |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 412 | |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 413 | VLOG(EXECUTION) << "getDuration(" << durationCode << "): " << *duration; |
| 414 | return ANEURALNETWORKS_NO_ERROR; |
| 415 | } |
| 416 | |
Michael Butler | bf25823 | 2019-12-16 18:32:45 -0800 | [diff] [blame] | 417 | int ExecutionBuilder::setTimeoutDuration(uint64_t duration) { |
| 418 | if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) { |
| 419 | LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on an ANeuralNetworksExecution " |
| 420 | "created from an ANeuralNetworksCompilation that was not created by " |
| 421 | "ANeuralNetworksCompilation_createForDevices with numDevices = 1"; |
| 422 | return ANEURALNETWORKS_BAD_DATA; |
| 423 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 424 | if (computationStarted()) { |
Michael Butler | bf25823 | 2019-12-16 18:32:45 -0800 | [diff] [blame] | 425 | LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called after the execution has started."; |
| 426 | return ANEURALNETWORKS_BAD_STATE; |
| 427 | } |
Michael Butler | 3433401 | 2020-02-10 15:45:28 -0800 | [diff] [blame] | 428 | if (duration > 0) { |
| 429 | mTimeoutDuration = duration; |
| 430 | } else { |
| 431 | mTimeoutDuration.reset(); |
| 432 | } |
Michael Butler | bf25823 | 2019-12-16 18:32:45 -0800 | [diff] [blame] | 433 | return ANEURALNETWORKS_NO_ERROR; |
| 434 | } |
| 435 | |
| 436 | std::optional<uint64_t> ExecutionBuilder::getTimeoutDuration() const { |
| 437 | return mTimeoutDuration; |
| 438 | } |
| 439 | |
Przemysław Szczepaniak | 13241e7 | 2020-11-27 19:51:47 +0000 | [diff] [blame] | 440 | TimePoint ExecutionBuilder::getComputeStartTimePoint() const { |
| 441 | CHECK(computationStarted()) << "getComputeStartTimePoint called before " |
| 442 | << "execution has started."; |
| 443 | return mComputeStartTimePoint; |
| 444 | } |
| 445 | |
Slava Shklyaev | 1b72d33 | 2020-02-11 16:14:25 +0000 | [diff] [blame] | 446 | int ExecutionBuilder::setLoopTimeout(uint64_t duration) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 447 | if (computationStarted()) { |
Slava Shklyaev | 38abefa | 2020-03-20 14:30:53 +0000 | [diff] [blame] | 448 | LOG(ERROR) << "ANeuralNetworksExecution_setLoopTimeout called after the " |
| 449 | "execution has started."; |
| 450 | return ANEURALNETWORKS_BAD_STATE; |
| 451 | } |
Slava Shklyaev | 1b72d33 | 2020-02-11 16:14:25 +0000 | [diff] [blame] | 452 | if (duration > operation_while::kTimeoutNsMaximum) { |
| 453 | LOG(WARNING) << "ANeuralNetworksExecution_setLoopTimeout input exceeds the maximum allowed " |
| 454 | << "duration: " << duration << " > " << operation_while::kTimeoutNsMaximum; |
| 455 | duration = operation_while::kTimeoutNsMaximum; |
| 456 | } |
Slava Shklyaev | b0a5413 | 2020-02-11 16:12:27 +0000 | [diff] [blame] | 457 | mLoopTimeoutDuration = duration; |
Slava Shklyaev | 1b72d33 | 2020-02-11 16:14:25 +0000 | [diff] [blame] | 458 | return ANEURALNETWORKS_NO_ERROR; |
| 459 | } |
| 460 | |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 461 | int ExecutionBuilder::enableInputAndOutputPadding(bool enable) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 462 | if (computationStarted()) { |
Xusong Wang | 811f94f | 2021-02-16 10:43:33 -0800 | [diff] [blame] | 463 | LOG(ERROR) << "ANeuralNetworksExecution_enableInputAndOutputPadding called after the " |
| 464 | "execution has started."; |
| 465 | return ANEURALNETWORKS_BAD_STATE; |
| 466 | } |
| 467 | if (mHasCalledSetInputOutput) { |
| 468 | LOG(ERROR) << "ANeuralNetworksExecution_enableInputAndOutputPadding called after an input " |
| 469 | "or output is set."; |
| 470 | return ANEURALNETWORKS_BAD_STATE; |
| 471 | } |
| 472 | mInputAndOutputPaddingEnabled = enable; |
| 473 | return ANEURALNETWORKS_NO_ERROR; |
| 474 | } |
| 475 | |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 476 | int ExecutionBuilder::setReusable(bool reusable) { |
| 477 | if (computationStarted()) { |
| 478 | LOG(ERROR) << "ANeuralNetworksExecution_setReusable called after the " |
| 479 | "execution has started."; |
| 480 | return ANEURALNETWORKS_BAD_STATE; |
| 481 | } |
| 482 | mReusable = reusable; |
| 483 | return ANEURALNETWORKS_NO_ERROR; |
| 484 | } |
| 485 | |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 486 | int ExecutionBuilder::addExtensionAttribute(const char* extensionName, |
| 487 | uint16_t attributeCodeWithinExtension, const void* data, |
| 488 | size_t length) { |
| 489 | if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) { |
| 490 | LOG(ERROR) << "ANeuralNetworksExecution_addExtensionAttribute called on an " |
| 491 | "ANeuralNetworksExecution created from an ANeuralNetworksCompilation that " |
| 492 | "was not created by ANeuralNetworksCompilation_createForDevices with " |
| 493 | "numDevices = 1"; |
| 494 | return ANEURALNETWORKS_BAD_DATA; |
| 495 | } |
| 496 | if (computationStarted()) { |
| 497 | LOG(ERROR) << "ANeuralNetworksExecution_addExtensionAttribute called after the execution " |
| 498 | "has started."; |
| 499 | return ANEURALNETWORKS_BAD_STATE; |
| 500 | } |
| 501 | int32_t attributeToken = 0; |
| 502 | if (!TypeManager::get()->getExtensionType(extensionName, attributeCodeWithinExtension, |
| 503 | &attributeToken)) { |
| 504 | return ANEURALNETWORKS_BAD_DATA; |
| 505 | } |
| 506 | if (std::find_if(mMetadata.begin(), mMetadata.end(), [attributeToken](const auto& entry) { |
| 507 | return attributeToken == entry.token; |
| 508 | }) != mMetadata.end()) { |
| 509 | LOG(ERROR) << "ANeuralNetworksCompilation_addExtensionAttribute called more than once for " |
| 510 | "the same attribute"; |
| 511 | return ANEURALNETWORKS_BAD_DATA; |
| 512 | } |
| 513 | const uint8_t* dataPtr = reinterpret_cast<const uint8_t*>(data); |
| 514 | mMetadata.push_back({attributeToken, std::vector<uint8_t>(dataPtr, dataPtr + length)}); |
| 515 | return ANEURALNETWORKS_NO_ERROR; |
| 516 | } |
| 517 | |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 518 | int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 519 | if (!completed()) { |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 520 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the " |
| 521 | "execution has finished."; |
| 522 | return ANEURALNETWORKS_BAD_STATE; |
| 523 | } |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 524 | if (completedWith() == Completion::OTHER_ERROR) { |
| 525 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called on an execution " |
| 526 | "that has encountered an error."; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 527 | return ANEURALNETWORKS_BAD_STATE; |
| 528 | } |
| 529 | |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 530 | uint32_t count = static_cast<uint32_t>(mOutputs.size()); |
| 531 | if (index >= count) { |
| 532 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions bad index " << index |
| 533 | << " " << count; |
| 534 | return ANEURALNETWORKS_BAD_DATA; |
| 535 | } |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 536 | const auto& dims = mOutputs[index].dimensions(); |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 537 | if (dims.empty()) { |
| 538 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions can not query " |
| 539 | "dimensions of a scalar"; |
| 540 | return ANEURALNETWORKS_BAD_DATA; |
| 541 | } |
| 542 | std::copy(dims.begin(), dims.end(), dimensions); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 543 | return mOutputs[index].isSufficient() ? ANEURALNETWORKS_NO_ERROR |
| 544 | : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE; |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 545 | } |
| 546 | |
| 547 | int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 548 | if (!completed()) { |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 549 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called before the " |
| 550 | "execution has finished."; |
| 551 | return ANEURALNETWORKS_BAD_STATE; |
| 552 | } |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 553 | if (completedWith() == Completion::OTHER_ERROR) { |
| 554 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called on an execution " |
| 555 | "that has encountered an error."; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 556 | return ANEURALNETWORKS_BAD_STATE; |
| 557 | } |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 558 | uint32_t count = static_cast<uint32_t>(mOutputs.size()); |
| 559 | if (index >= count) { |
| 560 | LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank bad index " << index << " " |
| 561 | << count; |
| 562 | return ANEURALNETWORKS_BAD_DATA; |
| 563 | } |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 564 | *rank = static_cast<uint32_t>(mOutputs[index].dimensions().size()); |
| 565 | return mOutputs[index].isSufficient() ? ANEURALNETWORKS_NO_ERROR |
| 566 | : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE; |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 567 | } |
| 568 | |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 569 | bool ExecutionBuilder::checkAndSetComputationState(const char* name) { |
Xusong Wang | 9ef2153 | 2021-03-18 10:38:50 -0700 | [diff] [blame] | 570 | std::lock_guard<std::mutex> lock(mStateMutex); |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 571 | if (!mReusable && mState == State::COMPLETED) { |
| 572 | LOG(ERROR) << "ANeuralNetworksExecution_" << name |
| 573 | << " called on a non-reusable execution that has already completed"; |
| 574 | return false; |
| 575 | } |
| 576 | if (mState == State::COMPUTATION) { |
| 577 | LOG(ERROR) << "ANeuralNetworksExecution_" << name |
| 578 | << " called on an execution that has already started"; |
| 579 | return false; |
| 580 | } |
| 581 | mState = State::COMPUTATION; |
| 582 | return true; |
| 583 | } |
| 584 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 585 | // TODO(b/132321855): validate that we have full types for all inputs and outputs, |
| 586 | // that the graph is not cyclic, |
| 587 | static int validateRequest(const std::vector<ModelArgumentInfo>& inputs, |
| 588 | const std::vector<ModelArgumentInfo>& outputs) { |
| 589 | for (auto& p : inputs) { |
| 590 | if (p.state() == ModelArgumentInfo::UNSPECIFIED) { |
| 591 | LOG(ERROR) << "ANeuralNetworksExecution starts compute when not all inputs specified"; |
| 592 | return ANEURALNETWORKS_BAD_DATA; |
| 593 | } |
| 594 | } |
| 595 | for (auto& p : outputs) { |
| 596 | if (p.state() == ModelArgumentInfo::UNSPECIFIED) { |
| 597 | LOG(ERROR) << "ANeuralNetworksExecution starts compute when not all outputs specified"; |
| 598 | return ANEURALNETWORKS_BAD_DATA; |
| 599 | } |
| 600 | } |
| 601 | return ANEURALNETWORKS_NO_ERROR; |
| 602 | } |
| 603 | |
| 604 | int ExecutionBuilder::getValidationResultCode() { |
| 605 | if (!mValidationResultCode.has_value()) { |
| 606 | mValidationResultCode = validateRequest(mInputs, mOutputs); |
| 607 | } |
| 608 | return mValidationResultCode.value(); |
| 609 | } |
| 610 | |
| 611 | bool ExecutionBuilder::areOutputsFullySpecified() { |
| 612 | if (!mOutputsFullySpecified.has_value()) { |
| 613 | mOutputsFullySpecified = true; |
| 614 | for (uint32_t i = 0; i < mOutputs.size(); i++) { |
| 615 | if (mOutputs[i].state() != ModelArgumentInfo::HAS_NO_VALUE && |
| 616 | TypeManager::get()->isTensorType(mModel->getOutputOperand(i).type) && |
| 617 | tensorHasUnspecifiedDimensions(mModel->getOutputOperand(i).type, |
| 618 | mOutputs[i].initialDimensions())) { |
| 619 | mOutputsFullySpecified = false; |
| 620 | break; |
| 621 | } |
| 622 | } |
| 623 | } |
| 624 | return mOutputsFullySpecified.value(); |
| 625 | } |
| 626 | |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 627 | int ExecutionBuilder::prepareForCompute(const char* name, ExecutionMode mode) { |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 628 | if (!checkAndSetComputationState(name)) { |
| 629 | return ANEURALNETWORKS_BAD_STATE; |
| 630 | } |
| 631 | if (int n = getValidationResultCode(); n != ANEURALNETWORKS_NO_ERROR) { |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 632 | return finishComputation(n, {}, mode); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 633 | } |
| 634 | return ANEURALNETWORKS_NO_ERROR; |
| 635 | } |
| 636 | |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 637 | // Attempt synchronous execution of full model on CPU. |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 638 | // TODO: How should we handle timing in this case? |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 639 | // For Q this is irrelevant: We only support timing in conjunction |
| 640 | // with an explicit device list; and we do not support CPU fallback |
| 641 | // with an explicit device list. See CompilationBuilder::mExplicitDeviceList. |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 642 | static std::tuple<int, std::vector<OutputShape>, Timing> cpuFallbackFull( |
| 643 | ExecutionBuilder* executionBuilder) { |
Xusong Wang | 8f6d379 | 2019-08-12 16:04:40 -0700 | [diff] [blame] | 644 | CHECK(executionBuilder != nullptr); |
Mika Raento | 0bb84c7 | 2018-04-23 22:06:45 +0100 | [diff] [blame] | 645 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull"); |
Miao Wang | f759e29 | 2017-10-04 19:45:45 -0700 | [diff] [blame] | 646 | VLOG(EXECUTION) << "cpuFallbackFull"; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 647 | |
| 648 | // Get fallback executor. |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 649 | StepExecutor executor(executionBuilder, executionBuilder->getModel(), |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 650 | DeviceManager::getCpuDevice(), /*preparedModel=*/nullptr, |
| 651 | /*reusable=*/false); |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 652 | executor.mapInputsAndOutputsTrivially(); |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 653 | |
| 654 | // Attempt fallback execution. |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 655 | return executor.computeOnCpuFallback(); |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 656 | } |
| 657 | |
| 658 | // Attempt synchronous execution on CPU. |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 659 | // TODO: How should we handle timing in this case? |
David Gross | d665cf1 | 2019-03-28 13:38:16 -0700 | [diff] [blame] | 660 | // For Q this is irrelevant: We only support timing in conjunction |
| 661 | // with an explicit device list; and we do not support CPU fallback |
| 662 | // with an explicit device list. See CompilationBuilder::mExplicitDeviceList. |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 663 | static std::tuple<int, std::vector<OutputShape>, Timing, std::shared_ptr<StepExecutor>> |
| 664 | cpuFallbackPartial(const ExecutionPlan& plan, |
| 665 | std::shared_ptr<ExecutionPlan::Controller> controller) { |
Mika Raento | 0bb84c7 | 2018-04-23 22:06:45 +0100 | [diff] [blame] | 666 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial"); |
Miao Wang | f759e29 | 2017-10-04 19:45:45 -0700 | [diff] [blame] | 667 | VLOG(EXECUTION) << "cpuFallbackPartial"; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 668 | |
| 669 | // Get fallback executor. |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 670 | std::shared_ptr<StepExecutor> executor; |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 671 | int n1 = plan.fallback(controller, &executor, nullptr, nullptr); |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 672 | if (n1 != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 673 | return {n1, {}, {}, nullptr}; |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 674 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 675 | CHECK(executor != nullptr); |
| 676 | |
| 677 | // Attempt fallback execution. |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 678 | auto [n2, outputShapes, timing] = executor->computeOnCpuFallback(); |
| 679 | return {n2, std::move(outputShapes), timing, executor}; |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 680 | } |
| 681 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 682 | std::tuple<int, std::vector<OutputShape>, Timing> SimpleExecutionBuilder::computeInternal( |
| 683 | const OptionalTimePoint& deadline, BurstBuilder* burstBuilder) { |
| 684 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "SimpleExecutionBuilder::computeInternal"); |
| 685 | VLOG(EXECUTION) << "SimpleExecutionBuilder::computeInternal"; |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 686 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 687 | if (mExecutor == nullptr) { |
| 688 | mExecutor = mPlan->makeStepExecutor(mReusable, this); |
| 689 | } |
| 690 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 691 | auto burstController = burstBuilder ? burstBuilder->getControllerAt(0) : nullptr; |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 692 | auto [n, outputShapes, timing] = mExecutor->compute(deadline, burstController); |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 693 | |
| 694 | if (n == ANEURALNETWORKS_NO_ERROR) { |
| 695 | return {n, std::move(outputShapes), timing}; |
| 696 | } |
| 697 | |
| 698 | // ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE is not recoverable. |
| 699 | if (n == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) { |
| 700 | return {n, std::move(outputShapes), {}}; |
| 701 | } |
| 702 | |
| 703 | // If CPU fallback is not allowed and there was an error, end execution. |
| 704 | if (!mAllowCpuFallback) { |
| 705 | return {n, {}, {}}; |
| 706 | } |
| 707 | |
| 708 | // If CPU execution was already attempted, do not perform CPU fallback. |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 709 | if (mExecutor->isCpu()) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 710 | return {n, {}, {}}; |
| 711 | } |
| 712 | |
| 713 | // If the code has reached this point, a potentially recoverable error |
| 714 | // occurred during the execution. Do an execution fallback on the CPU. |
| 715 | return cpuFallbackFull(this); |
| 716 | } |
| 717 | |
| 718 | std::tuple<int, std::vector<OutputShape>, Timing> CompoundExecutionBuilder::computeInternal( |
| 719 | const OptionalTimePoint& deadline, BurstBuilder* burstBuilder) { |
| 720 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "CompoundExecutionBuilder::computeInternal"); |
| 721 | VLOG(EXECUTION) << "CompoundExecutionBuilder::computeInternal (from plan, iteratively)"; |
| 722 | |
| 723 | auto controller = mPlan->makeController(this, burstBuilder); |
| 724 | std::vector<OutputShape> outputShapes = getInitialOutputShapes(); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 725 | |
| 726 | // On this iteration, do I need to repeat the previous step because it |
| 727 | // reported insufficient size? |
| 728 | bool doInsufficientSizeFallback = false; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 729 | |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 730 | while (true) { |
Miao Wang | f759e29 | 2017-10-04 19:45:45 -0700 | [diff] [blame] | 731 | VLOG(EXECUTION) << "looking for next StepExecutor"; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 732 | |
| 733 | // Get the current step of the execution. |
| 734 | std::shared_ptr<StepExecutor> executor; |
Michael Butler | 7d1ae27 | 2021-02-17 18:00:31 -0800 | [diff] [blame] | 735 | SharedBurst burstController; |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 736 | int n = doInsufficientSizeFallback |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 737 | ? mPlan->fallback(controller, &executor, &burstController, &outputShapes) |
| 738 | : mPlan->next(controller, &executor, &burstController, &outputShapes); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 739 | doInsufficientSizeFallback = false; |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 740 | if (n != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 1b72d33 | 2020-02-11 16:14:25 +0000 | [diff] [blame] | 741 | // During the interpreted execution of control flow, a loop timeout |
| 742 | // might occur in ExecutionPlan::next(). |
| 743 | bool missedDeadline = n == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT || |
| 744 | n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT; |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 745 | if (mAllowCpuFallback && !missedDeadline) break; |
| 746 | return {n, {}, {}}; |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 747 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 748 | |
| 749 | // If the code reached the end of the plan without error, then return |
| 750 | // with no error. |
David Gross | c4172ec | 2017-10-04 23:05:05 -0700 | [diff] [blame] | 751 | if (executor == nullptr) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 752 | return {ANEURALNETWORKS_NO_ERROR, outputShapes, {}}; |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 753 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 754 | const bool executorIsCpu = executor->isCpu(); |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 755 | |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 756 | // Attempt to execute a single step of the execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 757 | auto [stepN, stepOutputShapes, _] = executor->compute(deadline, burstController); |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 758 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 759 | // Update global outputs and dynamic temporaries. |
| 760 | StepExecutor::UpdateOutputShapes updateOutputShapes = {}; |
| 761 | if (!executor->updateOutputShapes(stepN, stepOutputShapes, &outputShapes, |
| 762 | &updateOutputShapes)) { |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 763 | stepN = ANEURALNETWORKS_OP_FAILED; |
| 764 | } |
| 765 | |
| 766 | // If execution was successful, continue to next step. |
| 767 | if (stepN == ANEURALNETWORKS_NO_ERROR) { |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 768 | if (updateOutputShapes.zeroSizedInput) { |
| 769 | // We'll need to do full model CPU fallback |
| 770 | VLOG(EXECUTION) << "updateOutputShapes.zeroSizedInput"; |
| 771 | stepN = ANEURALNETWORKS_OP_FAILED; |
| 772 | } else { |
| 773 | CHECK(executor->areDynamicTemporariesAllocated()); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 774 | continue; |
| 775 | } |
| 776 | } |
| 777 | |
| 778 | if (stepN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) { |
| 779 | VLOG(EXECUTION) << "OUTPUT_INSUFFICIENT_SIZE: " << toString(updateOutputShapes); |
| 780 | if (updateOutputShapes.mainOutputInsufficient || |
| 781 | !updateOutputShapes.updatedDynamicTemporary) { |
| 782 | // Either: |
| 783 | // - At least one main model output is not of sufficient size; or |
| 784 | // - we didn't learn anything new about dynamic temporaries. |
| 785 | // Neither of these is recoverable, so end execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 786 | return {stepN, outputShapes, {}}; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 787 | } |
| 788 | // Every main model output is of sufficient size. This implies that |
| 789 | // at least one dynamic temporary is not of sufficient size. This |
| 790 | // is recoverable. |
| 791 | doInsufficientSizeFallback = true; |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 792 | continue; |
| 793 | } |
| 794 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 795 | // If CPU fallback is not allowed and there was an error, end execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 796 | if (!mAllowCpuFallback) { |
| 797 | return {stepN, {}, {}}; |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 798 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 799 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 800 | // If CPU execution was already attempted, perform a full CPU fallback. |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 801 | if (executorIsCpu) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 802 | break; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 803 | } |
| 804 | |
| 805 | // If the code reaches this point, attempt a partial fallback to CPU. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 806 | CHECK(mAllowCpuFallback); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 807 | if (updateOutputShapes.zeroSizedInput) { |
| 808 | // Do not attempt a partial fallback. |
| 809 | break; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 810 | } |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 811 | while (true) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 812 | auto [fallbackN, fallbackOutputShapes, _, fallbackExecutor] = |
| 813 | cpuFallbackPartial(*mPlan, controller); |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 814 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 815 | // Update global outputs and dynamic temporaries. |
| 816 | StepExecutor::UpdateOutputShapes fallbackUpdateOutputShapes = {}; |
| 817 | if (fallbackExecutor != nullptr && |
| 818 | !fallbackExecutor->updateOutputShapes(fallbackN, fallbackOutputShapes, |
| 819 | &outputShapes, &fallbackUpdateOutputShapes)) { |
| 820 | fallbackN = ANEURALNETWORKS_OP_FAILED; |
| 821 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 822 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 823 | // If execution was successful, continue to next step. |
| 824 | if (fallbackN == ANEURALNETWORKS_NO_ERROR) { |
| 825 | if (fallbackUpdateOutputShapes.zeroSizedInput) { |
| 826 | // We'll need to do full model CPU fallback |
| 827 | VLOG(EXECUTION) << "fallbackUpdateOutputShapes.zeroSizedInput"; |
| 828 | fallbackN = ANEURALNETWORKS_OP_FAILED; |
| 829 | break; |
| 830 | } |
| 831 | CHECK(fallbackExecutor->areDynamicTemporariesAllocated()); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 832 | goto nextStep; |
| 833 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 834 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 835 | if (fallbackN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) { |
| 836 | VLOG(EXECUTION) << "OUTPUT_INSUFFICIENT_SIZE: " |
| 837 | << toString(fallbackUpdateOutputShapes); |
| 838 | if (fallbackUpdateOutputShapes.mainOutputInsufficient || |
| 839 | !fallbackUpdateOutputShapes.updatedDynamicTemporary) { |
| 840 | // Either: |
| 841 | // - At least one main model output is not of sufficient size; or |
| 842 | // - we didn't learn anything new about dynamic temporaries. |
| 843 | // Neither of these is recoverable, so end execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 844 | return {fallbackN, outputShapes, {}}; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 845 | } |
| 846 | // Every main model output is of sufficient size. This implies |
| 847 | // that at least one dynamic temporary is not of sufficient |
| 848 | // size. This is recoverable. |
| 849 | continue; |
| 850 | } |
| 851 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 852 | // If the code reaches this point, then there was an error with the |
| 853 | // fallback. In this case, attempt full fallback. |
| 854 | break; |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 855 | } |
| 856 | |
| 857 | // If the code reaches this point, then there was an error with the |
| 858 | // fallback. In this case, attempt full fallback. |
| 859 | break; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 860 | |
| 861 | nextStep: |
| 862 | // Bottom of the outer loop |
| 863 | continue; |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 864 | } |
Michael Butler | a3632b6 | 2019-08-18 20:30:25 -0700 | [diff] [blame] | 865 | |
| 866 | // If the code has reached this point, a potentially recoverable error |
| 867 | // occurred during the step executions. Instead, do a full execution |
| 868 | // fallback on the CPU. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 869 | return cpuFallbackFull(this); |
David Gross | 24b141b | 2017-10-04 16:05:58 -0700 | [diff] [blame] | 870 | } |
| 871 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 872 | static bool waitForSyncFences(const std::vector<int>& waitFor) { |
| 873 | for (int syncFd : waitFor) { |
| 874 | if (syncFd > 0) { |
| 875 | auto r = syncWait(syncFd, -1); |
| 876 | if (r != FenceState::SIGNALED) { |
| 877 | VLOG(EXECUTION) << "syncWait failed, fd: " << syncFd; |
| 878 | return false; |
| 879 | } |
| 880 | } |
| 881 | } |
| 882 | return true; |
| 883 | } |
| 884 | |
| 885 | std::tuple<int, int, ExecuteFencedInfoCallback> SimpleExecutionBuilder::computeFencedInternal( |
| 886 | const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence, |
| 887 | const OptionalTimePoint& deadline) { |
| 888 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "SimpleExecutionBuilder::computeFencedInternal"); |
| 889 | VLOG(EXECUTION) << "SimpleExecutionBuilder::computeFencedInternal"; |
| 890 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 891 | if (mExecutor == nullptr) { |
| 892 | mExecutor = mPlan->makeStepExecutor(mReusable, this); |
| 893 | } |
| 894 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 895 | auto [n, syncFd, callback] = |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 896 | mExecutor->computeFenced(waitFor, timeoutDurationAfterFence, deadline); |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 897 | |
| 898 | if (n == ANEURALNETWORKS_NO_ERROR) { |
| 899 | return {ANEURALNETWORKS_NO_ERROR, syncFd, callback}; |
| 900 | } |
| 901 | |
| 902 | // If CPU fallback is not allowed and there was an error, end execution. |
| 903 | if (!mAllowCpuFallback) { |
| 904 | return {n, -1, nullptr}; |
| 905 | } |
| 906 | |
| 907 | // If CPU execution was already attempted, return from the function with an error. |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 908 | if (mExecutor->isCpu()) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 909 | return {n, -1, nullptr}; |
| 910 | } |
| 911 | |
| 912 | // If the code has reached this point, a potentially recoverable error |
| 913 | // occurred during the step executions. Instead, do a full execution |
| 914 | // fallback on the CPU. |
| 915 | VLOG(EXECUTION) << "Performing full fallback on the CPU."; |
| 916 | if (!waitForSyncFences(waitFor)) { |
| 917 | return {ANEURALNETWORKS_OP_FAILED, -1, nullptr}; |
| 918 | } |
| 919 | auto [fallbackN, fallbackOutputShapes, fallbackTiming] = cpuFallbackFull(this); |
| 920 | reportTimingWithoutFencedExecutionCallback(fallbackTiming); |
| 921 | return {fallbackN, -1, nullptr}; |
| 922 | } |
| 923 | |
| 924 | // In case of partitioned execution, computeFencedInternal call will return the sync |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 925 | // fence and the fenced compute callback returned from the last partition. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 926 | // Any failed partition will result in whole execution fallback to CPU if |
| 927 | // mAllowCpuFallback is set to true. |
| 928 | std::tuple<int, int, ExecuteFencedInfoCallback> CompoundExecutionBuilder::computeFencedInternal( |
| 929 | const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence, |
| 930 | const OptionalTimePoint& deadline) { |
| 931 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "CompoundExecutionBuilder::computeFencedInternal"); |
| 932 | VLOG(EXECUTION) << "CompoundExecutionBuilder::computeFencedInternal (from plan, iteratively)"; |
| 933 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 934 | // We should have detected this earlier in the call chain and fallen back to |
| 935 | // non-fenced execution. This is an implementation limitation: In order to |
| 936 | // support dynamic temporarires in this code, we'd need to implement |
| 937 | // something like the following: |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 938 | // - If a partition has outputs of unknown size, compute that partition in a |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 939 | // non fenced fashion, just as if it were scheduled on a driver that does |
| 940 | // not support fenced execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 941 | // - Implement something similar to the code in CompoundExecutionBuilder::computeInternal() |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 942 | // that handles a step execution that fails with |
| 943 | // ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 944 | CHECK(!mCompilation->hasDynamicTemporaries()); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 945 | |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 946 | // Initiate waitForFds, syncFence for the first step. |
| 947 | std::vector<int> waitForFds = waitFor; |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 948 | base::unique_fd syncFence; |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 949 | ExecuteFencedInfoCallback executeFencedInfoCallback; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 950 | |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 951 | std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this, nullptr); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 952 | while (true) { |
| 953 | VLOG(EXECUTION) << "looking for next StepExecutor"; |
| 954 | |
| 955 | // Get the current step of the execution. |
| 956 | std::shared_ptr<StepExecutor> executor; |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 957 | int n = mPlan->next(controller, &executor, nullptr, nullptr, syncFence.get()); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 958 | if (n != ANEURALNETWORKS_NO_ERROR) { |
Miao Wang | 03d30a6 | 2020-03-05 15:30:29 -0800 | [diff] [blame] | 959 | // During the interpreted execution of control flow, a loop timeout |
| 960 | // might occur in ExecutionPlan::next(). |
| 961 | bool missedDeadline = n == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT || |
| 962 | n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT; |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 963 | if (mAllowCpuFallback && !missedDeadline) break; |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 964 | // Return -1 for the sync fence fd, and nullptr for the callback. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 965 | return {n, -1, nullptr}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 966 | } |
| 967 | |
| 968 | // If the code reached the end of the plan without error, then return |
| 969 | // with no error. |
| 970 | if (executor == nullptr) { |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 971 | return {ANEURALNETWORKS_NO_ERROR, syncFence.release(), executeFencedInfoCallback}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 972 | } |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 973 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 974 | // Attempt to compute a single step of the execution. |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 975 | auto [stepN, syncFd, callback] = |
Michael Butler | b6016f6 | 2020-02-25 11:39:05 -0800 | [diff] [blame] | 976 | executor->computeFenced(waitForFds, timeoutDurationAfterFence, deadline); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 977 | |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 978 | // Update waitForFds, syncFence for the next step. |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 979 | syncFence.reset(syncFd); |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 980 | executeFencedInfoCallback = callback; |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 981 | waitForFds.clear(); |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 982 | if (syncFd >= 0) { |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 983 | waitForFds = {syncFd}; |
| 984 | } |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 985 | |
| 986 | // If execution was successful, continue to next step. |
| 987 | if (stepN == ANEURALNETWORKS_NO_ERROR) { |
| 988 | continue; |
| 989 | } |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 990 | // If CPU fallback is not allowed and there was an error, end execution. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 991 | if (!mAllowCpuFallback) { |
| 992 | return {stepN, -1, nullptr}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 993 | } |
| 994 | |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 995 | // If the code reaches this point, then there was an error with the |
| 996 | // fallback. In this case, attempt full fallback. |
| 997 | break; |
| 998 | } |
| 999 | |
| 1000 | // If the code has reached this point, a potentially recoverable error |
| 1001 | // occurred during the step executions. Instead, do a full execution |
| 1002 | // fallback on the CPU. |
| 1003 | VLOG(EXECUTION) << "Performing full fallback on the CPU."; |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1004 | if (!waitForSyncFences(waitFor)) { |
| 1005 | return {ANEURALNETWORKS_OP_FAILED, -1, nullptr}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1006 | } |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1007 | auto [fullN, fullOutputShapes, _] = cpuFallbackFull(this); |
Xusong Wang | 2ad9cac | 2021-07-12 17:17:50 -0700 | [diff] [blame] | 1008 | return {fullN, -1, nullptr}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1009 | } |
| 1010 | |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1011 | int ExecutionBuilder::computeFenced(const std::vector<int>& waitFor, |
| 1012 | uint64_t timeoutDurationAfterFence, int* syncFence) { |
| 1013 | CHECK(syncFence != nullptr); |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1014 | NN_RETURN_IF_ERROR( |
| 1015 | prepareForCompute("startComputeWithDependencies", ExecutionMode::ASYNC_WITH_DEPS)); |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1016 | if (timeoutDurationAfterFence > 0) { |
| 1017 | if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) { |
| 1018 | LOG(ERROR) |
| 1019 | << "ANeuralNetworksExecution_startComputeWithDependencies called with non-zero " |
| 1020 | "duration on an ANeuralNetworksExecution " |
| 1021 | "created from an ANeuralNetworksCompilation that was not created by " |
| 1022 | "ANeuralNetworksCompilation_createForDevices with numDevices = 1"; |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1023 | return finishComputation(ANEURALNETWORKS_BAD_DATA, {}, ExecutionMode::ASYNC_WITH_DEPS); |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1024 | } |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1025 | } |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1026 | if (!areOutputsFullySpecified()) { |
| 1027 | LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies" |
| 1028 | " not all outputs have fully specified dimensions"; |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1029 | return finishComputation(ANEURALNETWORKS_BAD_DATA, {}, ExecutionMode::ASYNC_WITH_DEPS); |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1030 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1031 | |
| 1032 | // Unlike ExecutionBuilder::compute, we do not need to reset output dimensions here because |
| 1033 | // fenced executions do not support dynamic output shape. |
| 1034 | |
Przemysław Szczepaniak | 13241e7 | 2020-11-27 19:51:47 +0000 | [diff] [blame] | 1035 | mComputeStartTimePoint = Clock::now(); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1036 | VLOG(EXECUTION) << "ExecutionBuilder::computeFenced"; |
| 1037 | int result; |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1038 | const auto deadline = makeDeadline(mTimeoutDuration); |
Xusong Wang | b121012 | 2021-07-02 11:16:23 -0700 | [diff] [blame] | 1039 | std::tie(result, *syncFence, mFencedExecutionCallback) = |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1040 | computeFencedInternal(waitFor, timeoutDurationAfterFence, deadline); |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1041 | // If there is an error, call finishComputation to mark the computation as completed. |
| 1042 | // Otherwise, we will call finishComputation in SyncFenceEvent::wait(). |
| 1043 | if (result != ANEURALNETWORKS_NO_ERROR) { |
| 1044 | // TODO(miaowang): support dynamic output shape only with memory domain. |
| 1045 | // For now just return empty output shapes. |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1046 | result = finishComputation(result, {}, ExecutionMode::ASYNC_WITH_DEPS); |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1047 | } |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1048 | return result; |
| 1049 | } |
| 1050 | |
Slava Shklyaev | a6d95b1 | 2020-11-27 17:29:10 +0000 | [diff] [blame] | 1051 | int ExecutionBuilder::compute(std::shared_ptr<ExecutionCallback>* synchronizationCallback, |
Michael Butler | 2f6a628 | 2019-01-24 02:36:37 -0800 | [diff] [blame] | 1052 | BurstBuilder* burstBuilder) { |
Michael Butler | a2a0e9a | 2019-01-29 11:20:30 -0800 | [diff] [blame] | 1053 | CHECK(synchronizationCallback == nullptr || burstBuilder == nullptr) |
| 1054 | << "synchronizationCallback and burstBuilder cannot simultaneously be used"; |
Michael Butler | 2f6a628 | 2019-01-24 02:36:37 -0800 | [diff] [blame] | 1055 | |
David Gross | a203d9a | 2018-11-15 21:10:05 -0800 | [diff] [blame] | 1056 | const bool synchronous = (synchronizationCallback == nullptr); |
David Gross | a203d9a | 2018-11-15 21:10:05 -0800 | [diff] [blame] | 1057 | if (!synchronous) { |
| 1058 | *synchronizationCallback = nullptr; |
| 1059 | } |
David Gross | 15ebba4 | 2017-09-13 19:33:14 -0700 | [diff] [blame] | 1060 | |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1061 | const char* name = burstBuilder ? "burstCompute" : synchronous ? "compute" : "startCompute"; |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1062 | const ExecutionMode mode = burstBuilder |
| 1063 | ? ExecutionMode::BURST |
| 1064 | : synchronous ? ExecutionMode::SYNC : ExecutionMode::ASYNC; |
| 1065 | NN_RETURN_IF_ERROR(prepareForCompute(name, mode)); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1066 | |
| 1067 | // Validate input memory dimensions. We need to do the validation in every computation because |
| 1068 | // the memory dimensions may change between computations. |
Jean-Luc Brouillet | 4fb1e85 | 2017-08-20 18:16:36 -0700 | [diff] [blame] | 1069 | for (auto& p : mInputs) { |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1070 | if (p.state() == ModelArgumentInfo::MEMORY) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1071 | const RuntimeMemory* memory = mMemories[p.locationAndLength().poolIndex]; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1072 | if (!memory->getValidator().validateInputDimensions(p.dimensions())) { |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1073 | return finishComputation(ANEURALNETWORKS_OP_FAILED, {}, mode); |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 1074 | } |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1075 | } |
| 1076 | } |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1077 | |
| 1078 | // Reset output dimensions. |
| 1079 | if (!areOutputsFullySpecified()) { |
| 1080 | for (auto& output : mOutputs) { |
| 1081 | output.reset(); |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1082 | } |
| 1083 | } |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1084 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1085 | const auto deadline = makeDeadline(mTimeoutDuration); |
Przemysław Szczepaniak | 13241e7 | 2020-11-27 19:51:47 +0000 | [diff] [blame] | 1086 | mComputeStartTimePoint = Clock::now(); |
David Gross | a203d9a | 2018-11-15 21:10:05 -0800 | [diff] [blame] | 1087 | if (synchronous) { |
David Gross | 0a96339 | 2020-09-18 14:16:31 -0700 | [diff] [blame] | 1088 | if (burstBuilder) { |
| 1089 | VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API, burst)"; |
| 1090 | } else { |
| 1091 | VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)"; |
| 1092 | } |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1093 | const auto [n, outputShapes, timing] = computeInternal(deadline, burstBuilder); |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 1094 | if (mMeasureTiming) { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1095 | mTimingWithoutFencedExecutionCallback = timing; |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 1096 | } |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1097 | return finishComputation(n, outputShapes, mode); |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1098 | } else /* asynchronous */ { |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1099 | // TODO: For asynchronous execution, entire plan-based-path should run in an |
| 1100 | // asynchronous thread -- take the asynchronous thread logic out of |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1101 | // CpuExecution::compute() and use it to wrap the plan-based-path. |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1102 | |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1103 | // TODO: use a thread pool |
Michael Butler | 31e672b | 2019-08-17 17:40:29 -0700 | [diff] [blame] | 1104 | // TODO(mikie): this could have NNTRACE so we could measure the overhead |
| 1105 | // of spinning up a new thread. |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1106 | |
| 1107 | // Prepare the callback for asynchronous execution. |
Slava Shklyaev | a6d95b1 | 2020-11-27 17:29:10 +0000 | [diff] [blame] | 1108 | // std::shared_ptr<ExecutionCallback> object is returned when the |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1109 | // execution has been successfully launched, otherwise a |
| 1110 | // nullptr is returned. The executionCallback is |
| 1111 | // abstracted in the NN API as an "event". |
Slava Shklyaev | a6d95b1 | 2020-11-27 17:29:10 +0000 | [diff] [blame] | 1112 | auto executionCallback = std::make_shared<ExecutionCallback>(); |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1113 | executionCallback->setOnFinish( |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1114 | [this, mode](ErrorStatus error, const std::vector<OutputShape>& outputShapes) { |
| 1115 | return finishComputation(error, outputShapes, mode); |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1116 | }); |
| 1117 | const auto asyncStartCompute = [this, deadline, executionCallback] { |
| 1118 | const auto [n, outputShapes, timing] = computeInternal(deadline, nullptr); |
| 1119 | const auto status = convertResultCodeToErrorStatus(n); |
| 1120 | executionCallback->notify(status, outputShapes, timing); |
| 1121 | }; |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1122 | if (DeviceManager::get()->syncExecRuntime()) { |
| 1123 | VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)"; |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1124 | asyncStartCompute(); |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1125 | } else { |
| 1126 | VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)"; |
Xusong Wang | 5e6ae1b | 2021-02-08 21:40:31 -0800 | [diff] [blame] | 1127 | std::thread asyncExecution(asyncStartCompute); |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1128 | executionCallback->bindThread(std::move(asyncExecution)); |
Xusong Wang | 14d3148 | 2018-10-25 18:49:54 -0700 | [diff] [blame] | 1129 | } |
| 1130 | *synchronizationCallback = executionCallback; |
| 1131 | return ANEURALNETWORKS_NO_ERROR; |
David Gross | a203d9a | 2018-11-15 21:10:05 -0800 | [diff] [blame] | 1132 | } |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1133 | } |
| 1134 | |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1135 | std::vector<OutputShape> ExecutionBuilder::getInitialOutputShapes() const { |
| 1136 | std::vector<OutputShape> outputShapes(mOutputs.size()); |
| 1137 | std::transform(mOutputs.begin(), mOutputs.end(), outputShapes.begin(), |
| 1138 | [](const auto& x) -> OutputShape { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1139 | std::vector<uint32_t> dimensions; |
Michael Butler | 0899ce9 | 2020-05-26 14:40:05 -0700 | [diff] [blame] | 1140 | if (x.state() != ModelArgumentInfo::HAS_NO_VALUE) { |
| 1141 | dimensions = x.dimensions(); |
| 1142 | } |
| 1143 | return {.dimensions = std::move(dimensions), .isSufficient = true}; |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1144 | }); |
| 1145 | return outputShapes; |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1146 | } |
| 1147 | |
| 1148 | // Check if the dimensions "to" is updatable by dimensions "from", where "from" must |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1149 | // have no lower a specification level. |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1150 | static bool isUpdatable(const std::vector<uint32_t>& to, const std::vector<uint32_t>& from) { |
| 1151 | if (to.size() == 0) return true; |
| 1152 | NN_RET_CHECK_EQ(to.size(), from.size()); |
| 1153 | for (uint32_t i = 0; i < to.size(); i++) { |
| 1154 | NN_RET_CHECK(to[i] == from[i] || to[i] == 0); |
| 1155 | } |
| 1156 | return true; |
| 1157 | } |
| 1158 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1159 | static bool isZeroSizedTensor(int executionResultCode, const OutputShape& outputShape) { |
| 1160 | return (executionResultCode == ANEURALNETWORKS_NO_ERROR) && outputShape.isSufficient && |
| 1161 | outputShape.dimensions.size() && |
| 1162 | (std::find(outputShape.dimensions.begin(), outputShape.dimensions.end(), uint32_t(0)) != |
| 1163 | outputShape.dimensions.end()); |
| 1164 | } |
| 1165 | |
| 1166 | bool ExecutionBuilder::updateOutputShapes(ErrorStatus status, |
| 1167 | const std::vector<OutputShape>& outputShapes) { |
| 1168 | NN_RET_CHECK(validateOutputShapesFromDriver(status, mModel, outputShapes)); |
| 1169 | |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1170 | if (outputShapes.size() == 0) { |
| 1171 | return true; |
| 1172 | } |
| 1173 | NN_RET_CHECK_EQ(outputShapes.size(), mOutputs.size()); |
| 1174 | for (uint32_t i = 0; i < outputShapes.size(); i++) { |
| 1175 | // Check if only unspecified dimensions or rank are overwritten. |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1176 | NN_RET_CHECK(isUpdatable(mOutputs[i].dimensions(), outputShapes[i].dimensions)); |
Xusong Wang | 8c49827 | 2020-03-30 15:15:07 -0700 | [diff] [blame] | 1177 | const OperandType operandType = mModel->getOutputOperand(i).type; |
| 1178 | NN_RET_CHECK(!TypeManager::get()->sizeOfDataOverflowsUInt32(operandType, |
| 1179 | outputShapes[i].dimensions)); |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1180 | } |
| 1181 | for (uint32_t i = 0; i < outputShapes.size(); i++) { |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1182 | mOutputs[i].dimensions() = outputShapes[i].dimensions; |
| 1183 | mOutputs[i].isSufficient() = outputShapes[i].isSufficient; |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1184 | } |
| 1185 | return true; |
| 1186 | } |
| 1187 | |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 1188 | bool ExecutionBuilder::updateMemories() { |
| 1189 | for (const auto& output : mOutputs) { |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1190 | if (output.state() != ModelArgumentInfo::MEMORY) continue; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1191 | const RuntimeMemory* memory = mMemories[output.locationAndLength().poolIndex]; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1192 | NN_RET_CHECK(memory->getValidator().updateMetadata({.dimensions = output.dimensions()})); |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 1193 | } |
| 1194 | return true; |
| 1195 | } |
| 1196 | |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1197 | int ExecutionBuilder::finishComputation(int result, const std::vector<OutputShape>& outputShapes, |
| 1198 | ExecutionMode mode) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1199 | const auto status = convertResultCodeToErrorStatus(result); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1200 | if (!updateOutputShapes(status, outputShapes) || !updateMemories()) { |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1201 | result = ANEURALNETWORKS_OP_FAILED; |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1202 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1203 | bool success = result == ANEURALNETWORKS_NO_ERROR; |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 1204 | for (const auto& output : mOutputs) { |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1205 | if (output.state() != ModelArgumentInfo::MEMORY) continue; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1206 | const RuntimeMemory* memory = mMemories[output.locationAndLength().poolIndex]; |
Xusong Wang | d39f919 | 2019-11-27 15:45:42 -0800 | [diff] [blame] | 1207 | memory->getValidator().setInitialized(success); |
| 1208 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1209 | switch (result) { |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1210 | case ANEURALNETWORKS_NO_ERROR: |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1211 | mCompletion = Completion::NO_ERROR; |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1212 | break; |
| 1213 | case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE: |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1214 | mCompletion = Completion::OUTPUT_INSUFFICIENT_SIZE; |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1215 | break; |
| 1216 | default: |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1217 | mCompletion = Completion::OTHER_ERROR; |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1218 | break; |
| 1219 | } |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1220 | { |
Xusong Wang | 9ef2153 | 2021-03-18 10:38:50 -0700 | [diff] [blame] | 1221 | std::lock_guard<std::mutex> lock(mStateMutex); |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1222 | CHECK(mState != State::PREPARATION) |
| 1223 | << "ExecutionBuilder::finishComputation is called in the preparation state"; |
| 1224 | CHECK(mState != State::COMPLETED) << "ExecutionBuilder::finishComputation is called twice"; |
| 1225 | mState = State::COMPLETED; |
| 1226 | } |
Przemysław Szczepaniak | b9f01ee | 2020-11-27 11:31:39 +0000 | [diff] [blame] | 1227 | telemetry::onExecutionFinish(this, mode, result); |
Xusong Wang | 2d704a5 | 2021-02-08 20:48:10 -0800 | [diff] [blame] | 1228 | return result; |
Xusong Wang | 9d3c7bf | 2018-10-31 08:37:25 -0700 | [diff] [blame] | 1229 | } |
| 1230 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1231 | std::string toString(StepExecutor::UpdateOutputShapes updateOutputShapes) { |
| 1232 | return "{ .updatedDynamicTemporary = " + |
| 1233 | std::to_string(updateOutputShapes.updatedDynamicTemporary) + |
| 1234 | ", .mainOutputInsufficient = " + |
| 1235 | std::to_string(updateOutputShapes.mainOutputInsufficient) + "}"; |
| 1236 | } |
| 1237 | |
| 1238 | bool StepExecutor::updateOutputShapes(int executionResultCode, const std::vector<OutputShape>& from, |
| 1239 | std::vector<OutputShape>* to, UpdateOutputShapes* update) { |
| 1240 | CHECK(update != nullptr); |
| 1241 | *update = {.updatedDynamicTemporary = false, |
| 1242 | .mainOutputInsufficient = false, |
| 1243 | .zeroSizedInput = false}; |
| 1244 | |
| 1245 | NN_RET_CHECK(validateOutputShapesFromDriver(executionResultCode, mModel, from)); |
| 1246 | |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1247 | if (from.size() == 0) { |
| 1248 | return true; |
| 1249 | } |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1250 | |
| 1251 | if (VLOG_IS_ON(EXECUTION)) { |
| 1252 | for (const auto& shape : from) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1253 | VLOG(EXECUTION) << "updateOutputShapes: " << shape; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1254 | } |
| 1255 | } |
| 1256 | |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1257 | if (mExecutionStep != nullptr) { |
Slava Shklyaev | 09453ae | 2019-11-22 12:24:58 +0000 | [diff] [blame] | 1258 | const auto& indexMapping = mExecutionStep->getOutputIndexStepModelToMainModel(); |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1259 | NN_RET_CHECK_LE(indexMapping.size(), from.size()); |
| 1260 | for (uint32_t i = 0, e = indexMapping.size(); i < e; i++) { |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1261 | const uint32_t toIndex = indexMapping[i]; |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1262 | NN_RET_CHECK_GT(to->size(), toIndex); |
| 1263 | NN_RET_CHECK(isUpdatable(to->at(toIndex).dimensions, from[i].dimensions)); |
| 1264 | (*to)[toIndex] = from[i]; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1265 | update->mainOutputInsufficient |= !(*to)[toIndex].isSufficient; |
| 1266 | if (mExecutionStep->getModelOutputsThatAreDownstreamInputs().count(toIndex) && |
| 1267 | isZeroSizedTensor(executionResultCode, from[i])) { |
| 1268 | update->zeroSizedInput = true; |
| 1269 | } |
| 1270 | } |
| 1271 | |
| 1272 | if (!mDynamicTemporaries->empty()) { |
| 1273 | // TODO(b/157236079): Instead of computing this here, precompute it in ExecutionStep? |
| 1274 | std::map<uint32_t, uint32_t> operandIndexStepModelOutputToSourceModelTemp; |
| 1275 | for (const auto& entry : mExecutionStep->getTempsAsStepModelOutputs()) { |
| 1276 | operandIndexStepModelOutputToSourceModelTemp.emplace(entry.second, entry.first); |
| 1277 | } |
| 1278 | |
| 1279 | const uint32_t sourceModelIndex = mExecutionStep->getSourceModelIndex(); |
| 1280 | for (uint32_t i = 0, e = mModel->outputCount(); i < e; i++) { |
| 1281 | const uint32_t stepModelOperandIndex = mModel->getOutputOperandIndex(i); |
| 1282 | const auto it = |
| 1283 | operandIndexStepModelOutputToSourceModelTemp.find(stepModelOperandIndex); |
| 1284 | if (it == operandIndexStepModelOutputToSourceModelTemp.end()) { |
| 1285 | continue; |
| 1286 | } |
| 1287 | const auto sourceOperandIndex = SourceOperandIndex(sourceModelIndex, it->second); |
| 1288 | VLOG(EXECUTION) << "updateOutputShapes checking to see if output#" << i |
| 1289 | << " sourceOperandIndex = (" << sourceOperandIndex.first << ", " |
| 1290 | << sourceOperandIndex.second << ") is a dynamic temporary"; |
| 1291 | // This is a temporary, but it might not be a dynamic temporary. |
| 1292 | const auto loc = mDynamicTemporaries->lookup(sourceOperandIndex, false); |
| 1293 | if (loc == std::nullopt) { |
| 1294 | continue; |
| 1295 | } |
| 1296 | NN_RET_CHECK(isUpdatable(*loc->dimensions, from[i].dimensions)); |
| 1297 | bool changedShape = false; |
| 1298 | const uint32_t actualSize = TypeManager::get()->getSizeOfData( |
| 1299 | mModel->getOperand(stepModelOperandIndex).type, from[i].dimensions); |
| 1300 | if (actualSize > 0) { |
| 1301 | changedShape = mDynamicTemporaries->redeclare(sourceOperandIndex, |
| 1302 | from[i].dimensions, actualSize); |
| 1303 | } else if (!from[i].isSufficient) { |
Xusong Wang | 3e3915b | 2021-02-23 18:34:17 -0800 | [diff] [blame] | 1304 | NN_RET_CHECK(loc->paddedLength < UINT32_MAX / 2) |
| 1305 | << "output#" << i << " paddedLength overflow"; |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1306 | changedShape = mDynamicTemporaries->redeclare( |
Xusong Wang | 3e3915b | 2021-02-23 18:34:17 -0800 | [diff] [blame] | 1307 | sourceOperandIndex, from[i].dimensions, 2 * loc->paddedLength); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1308 | } else { |
| 1309 | // The combination of not-fully-specified dimensions |
| 1310 | // and isSufficient means that we have no |
| 1311 | // information about whether the size of the dynamic |
| 1312 | // temporary is adequate. |
| 1313 | VLOG(EXECUTION) << "updateOutputShapes skipping redeclaration for output#" << i; |
| 1314 | if (executionResultCode == ANEURALNETWORKS_NO_ERROR) { |
| 1315 | NN_RET_CHECK(isZeroSizedTensor(executionResultCode, from[i])); |
| 1316 | // This is a zero-sized tensor, and by |
| 1317 | // definition, any dynamic temporary is an input |
| 1318 | // to an execution step. |
| 1319 | update->zeroSizedInput = true; |
| 1320 | } |
| 1321 | } |
| 1322 | if (changedShape) { |
| 1323 | // TODO: find a better place for this comment. |
| 1324 | // |
| 1325 | // isUpdatable(a, b) imposes a partial ordering a <= |
| 1326 | // b. Every fully specified dimensions vector is an |
| 1327 | // upper bound of that ordering. Therefore, any |
| 1328 | // change in dimensions moves towards an upper |
| 1329 | // bound, and hence there are a finite number of |
| 1330 | // such changes possible. |
| 1331 | // |
| 1332 | // actualSize can only be computed from dimensions |
| 1333 | // that are an upper bound. Therefore, once |
| 1334 | // actualSize is computed, it will not change. |
| 1335 | // |
| 1336 | // If dimensions are not fully specified, and |
| 1337 | // estimated size changes, it increases. There is |
| 1338 | // an upper bound on estimated size to avoid |
| 1339 | // overflow. |
| 1340 | // |
| 1341 | // Therefore, if we retry only when dimensions or |
| 1342 | // size chage, and we stop retrying if we would |
| 1343 | // otherwise overflow, we should only retry a finite |
| 1344 | // number of times. |
| 1345 | update->updatedDynamicTemporary = true; |
| 1346 | } |
| 1347 | } |
| 1348 | mDynamicTemporaries->vlogDump("finished updateOutputShapes"); |
Xusong Wang | 94b62fc | 2019-01-21 23:16:20 -0800 | [diff] [blame] | 1349 | } |
| 1350 | } else { |
| 1351 | NN_RET_CHECK_EQ(from.size(), to->size()); |
| 1352 | for (uint32_t i = 0, e = from.size(); i < e; i++) { |
| 1353 | NN_RET_CHECK(isUpdatable(to->at(i).dimensions, from[i].dimensions)); |
| 1354 | (*to)[i] = from[i]; |
| 1355 | } |
| 1356 | } |
| 1357 | return true; |
| 1358 | } |
| 1359 | |
David Gross | 257ee7a | 2019-01-23 14:59:10 -0800 | [diff] [blame] | 1360 | StepExecutor::StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model, |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1361 | std::shared_ptr<Device> device, |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1362 | std::shared_ptr<RuntimePreparedModel> preparedModel, bool reusable, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1363 | const ExecutionStep* step, DynamicTemporaries* dynamicTemporaries) |
Xusong Wang | c73a89b | 2018-11-05 09:59:30 -0800 | [diff] [blame] | 1364 | : mExecutionBuilder(executionBuilder), |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 1365 | mExecutionStep(step), |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1366 | mDynamicTemporaries(dynamicTemporaries), |
Xusong Wang | c73a89b | 2018-11-05 09:59:30 -0800 | [diff] [blame] | 1367 | mModel(model), |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1368 | mDevice(device), |
Xusong Wang | c73a89b | 2018-11-05 09:59:30 -0800 | [diff] [blame] | 1369 | mPreparedModel(preparedModel), |
| 1370 | mInputs(model->inputCount()), |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1371 | mOutputs(model->outputCount()), |
| 1372 | mReusable(reusable) { |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1373 | CHECK(mDevice != nullptr); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1374 | CHECK_EQ(step == nullptr, dynamicTemporaries == nullptr); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1375 | CHECK(!(reusable && dynamicTemporaries != nullptr)); |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 1376 | VLOG(EXECUTION) << "StepExecutor::StepExecutor with " << mInputs.size() << " inputs and " |
| 1377 | << mOutputs.size() << " outputs"; |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1378 | } |
David Gross | 2b79e90 | 2017-10-01 15:26:33 -0700 | [diff] [blame] | 1379 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1380 | bool StepExecutor::areDynamicTemporariesAllocated() const { |
| 1381 | return !mDynamicTemporaries || mDynamicTemporaries->allocated(mExecutionStep->getIndex()); |
| 1382 | } |
| 1383 | |
David Gross | 2b79e90 | 2017-10-01 15:26:33 -0700 | [diff] [blame] | 1384 | void StepExecutor::mapInputsAndOutputsTrivially() { |
| 1385 | mInputs = mExecutionBuilder->mInputs; |
| 1386 | mOutputs = mExecutionBuilder->mOutputs; |
| 1387 | mMemories = mExecutionBuilder->mMemories; |
| 1388 | } |
| 1389 | |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1390 | void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 1391 | ModelArgumentInfo* executorInputOrOutput, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1392 | const Dimensions* builderDimensions) { |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 1393 | auto updateDimensions = [executorInputOrOutput, builderDimensions] { |
| 1394 | if (!builderDimensions) { |
| 1395 | return; |
| 1396 | } |
| 1397 | executorInputOrOutput->dimensions() = *builderDimensions; |
| 1398 | }; |
| 1399 | |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1400 | *executorInputOrOutput = builderInputOrOutput; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1401 | switch (executorInputOrOutput->state()) { |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1402 | default: |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 1403 | CHECK(false) << "unexpected ModelArgumentInfo::state"; |
Slava Shklyaev | 6e4a90e | 2018-11-01 16:59:26 +0000 | [diff] [blame] | 1404 | break; |
David Gross | 6ff8800 | 2018-06-01 11:01:12 -0700 | [diff] [blame] | 1405 | case ModelArgumentInfo::HAS_NO_VALUE: |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1406 | case ModelArgumentInfo::UNSPECIFIED: |
| 1407 | break; |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 1408 | case ModelArgumentInfo::POINTER: |
| 1409 | updateDimensions(); |
| 1410 | break; |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1411 | case ModelArgumentInfo::MEMORY: { |
David Gross | bb38a42 | 2020-09-22 14:51:37 -0700 | [diff] [blame] | 1412 | updateDimensions(); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1413 | const uint32_t builderPoolIndex = builderInputOrOutput.locationAndLength().poolIndex; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1414 | const RuntimeMemory* memory = mExecutionBuilder->mMemories[builderPoolIndex]; |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1415 | const uint32_t executorPoolIndex = mMemories.add(memory); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1416 | executorInputOrOutput->locationAndLength().poolIndex = executorPoolIndex; |
David Gross | df068ab | 2017-10-01 20:48:10 -0700 | [diff] [blame] | 1417 | break; |
| 1418 | } |
| 1419 | } |
| 1420 | } |
| 1421 | |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 1422 | int StepExecutor::setInputOrOutputFromMemory(const Operand& inputOrOutputOperand, |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1423 | const RuntimeMemory* memory, uint32_t offset, |
Xusong Wang | 3e3915b | 2021-02-23 18:34:17 -0800 | [diff] [blame] | 1424 | uint32_t length, const Dimensions& dimensions, |
Slava Shklyaev | 20bd535 | 2019-12-13 16:46:14 +0000 | [diff] [blame] | 1425 | ModelArgumentInfo* inputOrOutputInfo) { |
David Gross | 1472013 | 2017-10-02 14:40:09 -0700 | [diff] [blame] | 1426 | // Should be similar to |
| 1427 | // ExecutionBuilder::setInputFromMemory() |
| 1428 | // ExecutionBuilder::setOutputFromMemory() |
| 1429 | |
| 1430 | uint32_t poolIndex = mMemories.add(memory); |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1431 | CHECK(inputOrOutputInfo->unspecified()); |
| 1432 | int n; |
| 1433 | std::tie(n, *inputOrOutputInfo) = |
| 1434 | ModelArgumentInfo::createFromMemory(inputOrOutputOperand, |
Xusong Wang | 3e3915b | 2021-02-23 18:34:17 -0800 | [diff] [blame] | 1435 | /*type=*/nullptr, poolIndex, offset, length); |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1436 | if (n == ANEURALNETWORKS_NO_ERROR && dimensions.size()) { |
| 1437 | CHECK(isUpdatable(inputOrOutputInfo->dimensions(), dimensions)); |
| 1438 | inputOrOutputInfo->dimensions() = dimensions; |
| 1439 | } |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1440 | return n; |
David Gross | 1472013 | 2017-10-02 14:40:09 -0700 | [diff] [blame] | 1441 | } |
| 1442 | |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1443 | static std::string toString(std::vector<uint32_t> dimensions) { |
| 1444 | std::string ret = "("; |
| 1445 | bool wroteOne = false; |
| 1446 | for (uint32_t dimension : dimensions) { |
| 1447 | if (wroteOne) { |
| 1448 | ret += ", "; |
| 1449 | } else { |
| 1450 | wroteOne = true; |
| 1451 | } |
| 1452 | ret += std::to_string(dimension); |
| 1453 | } |
| 1454 | ret += ")"; |
| 1455 | return ret; |
| 1456 | }; |
| 1457 | |
Miao Wang | 484e970 | 2019-01-16 13:42:15 -0800 | [diff] [blame] | 1458 | static void logArguments(const char* kind, const std::vector<ModelArgumentInfo>& args) { |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1459 | for (unsigned i = 0; i < args.size(); i++) { |
| 1460 | const auto& arg = args[i]; |
| 1461 | std::string prefix = kind + std::string("[") + std::to_string(i) + "] = "; |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1462 | switch (arg.state()) { |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1463 | case ModelArgumentInfo::POINTER: |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1464 | VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer()) << ") dim" |
| 1465 | << toString(arg.dimensions()); |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1466 | break; |
| 1467 | case ModelArgumentInfo::MEMORY: |
| 1468 | VLOG(EXECUTION) << prefix << "MEMORY(" |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1469 | << "pool=" << arg.locationAndLength().poolIndex << ", " |
David Gross | 948ffa8 | 2020-08-14 15:30:49 -0700 | [diff] [blame] | 1470 | << "off=" << arg.locationAndLength().offset << ") dim" |
| 1471 | << toString(arg.dimensions()); |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1472 | break; |
| 1473 | case ModelArgumentInfo::HAS_NO_VALUE: |
| 1474 | VLOG(EXECUTION) << prefix << "HAS_NO_VALUE"; |
| 1475 | break; |
| 1476 | case ModelArgumentInfo::UNSPECIFIED: |
| 1477 | VLOG(EXECUTION) << prefix << "UNSPECIFIED"; |
| 1478 | break; |
| 1479 | default: |
David Gross | 3f1b454 | 2020-03-12 17:27:48 -0700 | [diff] [blame] | 1480 | VLOG(EXECUTION) << prefix << "state(" << arg.state() << ")"; |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1481 | break; |
| 1482 | } |
| 1483 | } |
| 1484 | } |
| 1485 | |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1486 | bool StepExecutor::isCpu() const { |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 1487 | return mDevice == DeviceManager::getCpuDevice(); |
Slava Shklyaev | e5c52dd | 2019-01-18 14:48:07 +0000 | [diff] [blame] | 1488 | } |
| 1489 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1490 | std::pair<int, std::shared_ptr<RuntimeExecution>> StepExecutor::getReusableExecution() { |
| 1491 | CHECK(mReusable); |
| 1492 | if (mExecution == nullptr) { |
| 1493 | CHECK(mPreparedModel != nullptr); |
| 1494 | const MeasureTiming measure = measureTiming(mExecutionBuilder); |
| 1495 | const OptionalDuration loopTimeoutDuration = |
| 1496 | makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration()); |
| 1497 | auto [n, execution] = mPreparedModel->createReusableExecution( |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 1498 | mInputs, mOutputs, mMemories.getObjects(), measure, loopTimeoutDuration, |
| 1499 | mExecutionBuilder->getMetadata()); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1500 | if (n != ANEURALNETWORKS_NO_ERROR) { |
| 1501 | return {n, nullptr}; |
| 1502 | } |
| 1503 | mExecution = std::move(execution); |
| 1504 | } |
| 1505 | return {ANEURALNETWORKS_NO_ERROR, mExecution}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1506 | } |
| 1507 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1508 | std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::compute( |
| 1509 | const OptionalTimePoint& deadline, const SharedBurst& burstController) { |
David Gross | a1d08a9 | 2017-11-15 18:04:01 -0800 | [diff] [blame] | 1510 | if (VLOG_IS_ON(EXECUTION)) { |
| 1511 | logArguments("input", mInputs); |
| 1512 | logArguments("output", mOutputs); |
| 1513 | } |
David Gross | 2b79e90 | 2017-10-01 15:26:33 -0700 | [diff] [blame] | 1514 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1515 | int n; |
| 1516 | std::vector<OutputShape> outputShapes; |
| 1517 | Timing timing; |
| 1518 | if (mReusable) { |
| 1519 | auto [nCreate, execution] = getReusableExecution(); |
| 1520 | if (nCreate != ANEURALNETWORKS_NO_ERROR) { |
| 1521 | return {nCreate, {}, {}}; |
| 1522 | } |
| 1523 | std::tie(n, outputShapes, timing) = execution->compute(burstController, deadline); |
| 1524 | } else { |
| 1525 | CHECK(mPreparedModel != nullptr); |
| 1526 | const MeasureTiming measure = measureTiming(mExecutionBuilder); |
| 1527 | const OptionalDuration loopTimeoutDuration = |
| 1528 | makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration()); |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 1529 | std::tie(n, outputShapes, timing) = mPreparedModel->execute( |
| 1530 | mInputs, mOutputs, mMemories.getObjects(), burstController, measure, deadline, |
| 1531 | loopTimeoutDuration, mExecutionBuilder->getMetadata()); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1532 | } |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1533 | mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1534 | return {n, std::move(outputShapes), std::move(timing)}; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1535 | } |
| 1536 | |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 1537 | std::tuple<int, int, ExecuteFencedInfoCallback> StepExecutor::computeFenced( |
Michael Butler | b6016f6 | 2020-02-25 11:39:05 -0800 | [diff] [blame] | 1538 | const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence, |
Michael Butler | dcea61d | 2020-12-04 17:39:38 -0800 | [diff] [blame] | 1539 | const OptionalTimePoint& deadline) { |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1540 | if (VLOG_IS_ON(EXECUTION)) { |
| 1541 | logArguments("input", mInputs); |
| 1542 | logArguments("output", mOutputs); |
| 1543 | } |
| 1544 | |
Michael Butler | dcea61d | 2020-12-04 17:39:38 -0800 | [diff] [blame] | 1545 | OptionalDuration optionalTimeoutDurationAfterFence; |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1546 | if (timeoutDurationAfterFence > 0) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1547 | optionalTimeoutDurationAfterFence = makeTimeoutDuration(timeoutDurationAfterFence); |
Miao Wang | 9cc4676 | 2020-01-21 14:59:54 -0800 | [diff] [blame] | 1548 | } |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1549 | |
| 1550 | int n; |
| 1551 | int syncFenceFd; |
| 1552 | ExecuteFencedInfoCallback executeFencedInfoCallback; |
| 1553 | Timing timing; |
| 1554 | if (mReusable) { |
| 1555 | auto [nCreate, execution] = getReusableExecution(); |
| 1556 | if (nCreate != ANEURALNETWORKS_NO_ERROR) { |
| 1557 | return {nCreate, -1, nullptr}; |
| 1558 | } |
| 1559 | std::tie(n, syncFenceFd, executeFencedInfoCallback, timing) = |
| 1560 | execution->computeFenced(waitFor, deadline, optionalTimeoutDurationAfterFence); |
| 1561 | } else { |
| 1562 | CHECK(mPreparedModel != nullptr); |
| 1563 | const MeasureTiming measure = measureTiming(mExecutionBuilder); |
| 1564 | const OptionalDuration loopTimeoutDuration = |
| 1565 | makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration()); |
| 1566 | std::tie(n, syncFenceFd, executeFencedInfoCallback, timing) = mPreparedModel->executeFenced( |
| 1567 | mInputs, mOutputs, mMemories.getObjects(), waitFor, measure, deadline, |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 1568 | loopTimeoutDuration, optionalTimeoutDurationAfterFence, |
| 1569 | mExecutionBuilder->getMetadata()); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1570 | } |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 1571 | if (syncFenceFd < 0 && executeFencedInfoCallback == nullptr) { |
David Gross | fdad204 | 2020-03-31 16:11:16 -0700 | [diff] [blame] | 1572 | mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing); |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1573 | } |
Slava Shklyaev | 20b9bd1 | 2020-11-11 17:01:11 +0000 | [diff] [blame] | 1574 | return {n, syncFenceFd, executeFencedInfoCallback}; |
Miao Wang | e022724 | 2019-12-20 16:07:39 -0800 | [diff] [blame] | 1575 | } |
| 1576 | |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 1577 | // For cpuFallback{Partial,Full}, recompile the model on CPU and then start compute. |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1578 | std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::computeOnCpuFallback() { |
| 1579 | NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "StepExecutor::computeOnCpuFallback"); |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 1580 | VLOG(EXECUTION) << "Re-compile the model on CPU"; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1581 | const ModelFactory makeModel = [this] { return mModel->makeModel(); }; |
Michael Butler | bf25823 | 2019-12-16 18:32:45 -0800 | [diff] [blame] | 1582 | // TODO: Propagate user preference and compilation priority to this point instead of using |
| 1583 | // default values of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER and |
| 1584 | // ANEURALNETWORKS_PRIORITY_MEDIUM |
Michael Butler | f89b141 | 2019-09-22 22:46:48 -0700 | [diff] [blame] | 1585 | const ExecutionPreference preference = |
Xusong Wang | 001be4b | 2019-07-02 13:53:25 -0700 | [diff] [blame] | 1586 | static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER); |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1587 | const Priority priority = convertToCanonicalPriority(ANEURALNETWORKS_PRIORITY_DEFAULT); |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 1588 | auto [n, preparedModel] = DeviceManager::getCpuDevice()->prepareModel( |
| 1589 | makeModel, preference, priority, {}, {}, {}, {}, {}); |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1590 | if (n != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1591 | return {n, {}, {}}; |
Michael Butler | 1d541a3 | 2019-08-19 12:05:45 -0700 | [diff] [blame] | 1592 | } |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1593 | |
| 1594 | // Prepare device memories for CPU fallback. |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1595 | std::vector<const RuntimeMemory*> memories = mMemories.getObjects(); |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1596 | std::vector<bool> isUsedAsInput(memories.size(), false); |
| 1597 | std::vector<bool> isUsedAsOutput(memories.size(), false); |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1598 | std::vector<std::unique_ptr<RuntimeMemory>> blobAhwbs; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1599 | |
| 1600 | // Mark the input and output usages. |
| 1601 | for (auto& input : mInputs) { |
| 1602 | if (input.state() == ModelArgumentInfo::MEMORY) { |
| 1603 | const uint32_t poolIndex = input.locationAndLength().poolIndex; |
| 1604 | isUsedAsInput[poolIndex] = true; |
| 1605 | } |
| 1606 | } |
| 1607 | for (auto& output : mOutputs) { |
| 1608 | if (output.state() == ModelArgumentInfo::MEMORY) { |
| 1609 | const uint32_t poolIndex = output.locationAndLength().poolIndex; |
| 1610 | // Cannot allocate output buffers with unknown shapes. |
| 1611 | if (mMemories[poolIndex]->getValidator().createdWithUnknownShape()) { |
| 1612 | LOG(ERROR) << "Cannot fallback to CPU because at least one of the output operands " |
| 1613 | "has unknown shape."; |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1614 | return {ANEURALNETWORKS_OP_FAILED, {}, {}}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1615 | } |
| 1616 | isUsedAsOutput[poolIndex] = true; |
| 1617 | } |
| 1618 | } |
| 1619 | |
| 1620 | // Allocate BLOB mode AHardwareBuffers and read the data from input device memories. |
| 1621 | for (uint32_t i = 0; i < memories.size(); i++) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1622 | const RuntimeMemory* memory = mMemories[i]; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1623 | if (memory->getIBuffer() != nullptr) { |
| 1624 | const uint32_t size = memory->getValidator().getMetadata().logicalSize; |
| 1625 | auto [nAhwb, blobAhwb] = MemoryRuntimeAHWB::create(size); |
| 1626 | if (nAhwb != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1627 | return {nAhwb, {}, {}}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1628 | } |
| 1629 | if (isUsedAsInput[i]) { |
Slava Shklyaev | 3698ad4 | 2020-11-06 13:50:31 +0000 | [diff] [blame] | 1630 | n = copyIBufferToMemory(memory->getIBuffer(), blobAhwb->getMemory()); |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1631 | if (n != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1632 | return {n, {}, {}}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1633 | } |
| 1634 | } |
| 1635 | memories[i] = blobAhwb.get(); |
| 1636 | blobAhwbs.push_back(std::move(blobAhwb)); |
| 1637 | } |
| 1638 | } |
| 1639 | |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1640 | const MeasureTiming measure = measureTiming(mExecutionBuilder); |
| 1641 | const OptionalDuration loopTimeoutDuration = |
| 1642 | makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration()); |
| 1643 | auto [nExecute, outputShapes, timing] = preparedModel->execute( |
Miao Wang | e179786 | 2021-10-21 19:35:04 +0000 | [diff] [blame] | 1644 | mInputs, mOutputs, memories, nullptr, measure, {}, loopTimeoutDuration, {}); |
Xusong Wang | 1d789c7 | 2021-02-11 15:04:07 -0800 | [diff] [blame] | 1645 | mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing); |
| 1646 | if (nExecute != ANEURALNETWORKS_NO_ERROR) { |
| 1647 | return {nExecute, std::move(outputShapes), timing}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1648 | } |
| 1649 | |
| 1650 | // Write back to output device memories. |
| 1651 | for (uint32_t i = 0; i < memories.size(); i++) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1652 | const RuntimeMemory* memory = mMemories[i]; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1653 | if (memory->getIBuffer() != nullptr && isUsedAsOutput[i]) { |
Slava Shklyaev | 3698ad4 | 2020-11-06 13:50:31 +0000 | [diff] [blame] | 1654 | n = copyMemoryToIBuffer(memories[i]->getMemory(), memory->getIBuffer(), {}); |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1655 | if (n != ANEURALNETWORKS_NO_ERROR) { |
Slava Shklyaev | 9f29f43 | 2020-08-13 13:16:03 +0100 | [diff] [blame] | 1656 | return {n, {}, {}}; |
Xusong Wang | b3f9c62 | 2020-02-20 12:37:51 -0800 | [diff] [blame] | 1657 | } |
| 1658 | } |
| 1659 | } |
| 1660 | return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing}; |
Jean-Luc Brouillet | 873c008 | 2017-07-25 00:17:50 -0700 | [diff] [blame] | 1661 | } |
| 1662 | |
Jean-Luc Brouillet | 5d5150d | 2017-09-02 23:05:37 -0700 | [diff] [blame] | 1663 | } // namespace nn |
| 1664 | } // namespace android |