blob: 922434d7d70e92c6fba51ed9906794b2efb1d902 [file] [log] [blame]
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
David Gross7e03e902017-09-13 10:45:21 -070017#define LOG_TAG "ExecutionBuilder"
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -070018
David Gross7e03e902017-09-13 10:45:21 -070019#include "ExecutionBuilder.h"
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -070020
Slava Shklyaeva75fa2c2021-01-13 16:12:02 +000021#include <ControlFlow.h>
22#include <CpuExecutor.h>
Slava Shklyaeva75fa2c2021-01-13 16:12:02 +000023#include <LegacyUtils.h>
24#include <Tracing.h>
Slava Shklyaevc958cd82020-12-10 16:55:55 +000025#include <android-base/logging.h>
Michael Butler7d1ae272021-02-17 18:00:31 -080026#include <nnapi/IBurst.h>
Slava Shklyaev20b9bd12020-11-11 17:01:11 +000027#include <nnapi/IPreparedModel.h>
Xusong Wangaa1ac512021-03-03 16:30:03 -080028#include <nnapi/Types.h>
Slava Shklyaev20b9bd12020-11-11 17:01:11 +000029
Xusong Wang001be4b2019-07-02 13:53:25 -070030#include <algorithm>
Michael Butlerbf258232019-12-16 18:32:45 -080031#include <limits>
David Gross948ffa82020-08-14 15:30:49 -070032#include <map>
Xusong Wang001be4b2019-07-02 13:53:25 -070033#include <memory>
34#include <mutex>
35#include <optional>
36#include <string>
37#include <thread>
Slava Shklyaevbae45142019-10-22 18:21:57 +010038#include <tuple>
Xusong Wang001be4b2019-07-02 13:53:25 -070039#include <utility>
40#include <vector>
41
Xusong Wang5e6ae1b2021-02-08 21:40:31 -080042#include "BurstBuilder.h"
David Gross67f891d2017-09-10 14:31:58 -070043#include "CompilationBuilder.h"
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -070044#include "Manager.h"
Xusong Wang001be4b2019-07-02 13:53:25 -070045#include "ModelArgumentInfo.h"
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -070046#include "ModelBuilder.h"
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +000047#include "Telemetry.h"
Slava Shklyaev8ea8dae2019-02-11 18:26:29 +000048#include "TypeManager.h"
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -070049
50namespace android {
51namespace nn {
52
David Gross948ffa82020-08-14 15:30:49 -070053// Partial validation of output shapes returned from driver, to ensure they
54// conform to a very specific set of rules.
55static bool validateOutputShapesFromDriver(ErrorStatus executionStatus, const ModelBuilder* model,
Slava Shklyaev9f29f432020-08-13 13:16:03 +010056 const std::vector<OutputShape>& shapes) {
David Gross948ffa82020-08-14 15:30:49 -070057 // Enforces the following rules (some of which are from b/154054474):
58 // - shapes vector is empty except in the case of NONE or OUTPUT_INSUFFICIENT_SIZE.
59 // If the vector is not empty, it must have as many entries as the step model has outputs.
60 // - If NONE, then either shapes vector is empty, or every shape is
61 // marked isSufficient and, if a tensor, has known rank.
62 // - If OUTPUT_INSUFFICIENT_SIZE, then the vector is not empty. At least one entry
63 // is marked !isSufficient.
64 switch (executionStatus) {
65 case ErrorStatus::NONE: {
66 NN_RET_CHECK(shapes.size() == 0 || shapes.size() == model->outputCount())
Slava Shklyaev9f29f432020-08-13 13:16:03 +010067 << "With execution ErrorStatus " << executionStatus
David Gross948ffa82020-08-14 15:30:49 -070068 << " output shapes vector must be empty or of length " << model->outputCount()
69 << " but has length " << shapes.size();
70 NN_RET_CHECK(std::all_of(shapes.begin(), shapes.end(),
71 [](const OutputShape& shape) { return shape.isSufficient; }))
Slava Shklyaev9f29f432020-08-13 13:16:03 +010072 << "With execution ErrorStatus " << executionStatus
David Gross948ffa82020-08-14 15:30:49 -070073 << " at least one output shape is unexpectedly marked !isSufficient";
74
75 const TypeManager* tm = TypeManager::get();
76 for (uint32_t outputIndex = 0, outputCount = shapes.size(); outputIndex < outputCount;
77 ++outputIndex) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +010078 const Operand& outputOperand = model->getOutputOperand(outputIndex);
David Gross948ffa82020-08-14 15:30:49 -070079 NN_RET_CHECK(!tm->isTensorType(outputOperand.type) ||
80 (shapes[outputIndex].dimensions.size() != 0))
Slava Shklyaev9f29f432020-08-13 13:16:03 +010081 << "With execution ErrorStatus " << executionStatus << " output#"
David Gross948ffa82020-08-14 15:30:49 -070082 << outputIndex << " shape unexpectedly has zero rank";
83 }
84
85 break;
86 }
87 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE: {
88 NN_RET_CHECK(shapes.size() == model->outputCount())
Slava Shklyaev9f29f432020-08-13 13:16:03 +010089 << "With execution ErrorStatus " << executionStatus
David Gross948ffa82020-08-14 15:30:49 -070090 << " output shapes vector must be of length " << model->outputCount()
91 << " but has length " << shapes.size();
92 NN_RET_CHECK(std::any_of(shapes.begin(), shapes.end(),
93 [](const OutputShape& shape) { return !shape.isSufficient; }))
Slava Shklyaev9f29f432020-08-13 13:16:03 +010094 << "With execution ErrorStatus " << executionStatus
David Gross948ffa82020-08-14 15:30:49 -070095 << " at least one output shape must have been marked !isSufficient";
96 break;
97 }
98 default: {
99 NN_RET_CHECK(shapes.size() == 0)
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100100 << "With execution ErrorStatus " << executionStatus
David Gross948ffa82020-08-14 15:30:49 -0700101 << " output shapes vector must be empty but has length " << shapes.size();
102 break;
103 }
104 }
105 return true;
106}
107static bool validateOutputShapesFromDriver(int executionResultCode, const ModelBuilder* model,
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100108 const std::vector<OutputShape>& shapes) {
David Gross948ffa82020-08-14 15:30:49 -0700109 return validateOutputShapesFromDriver(convertResultCodeToErrorStatus(executionResultCode),
110 model, shapes);
111}
112
David Gross257ee7a2019-01-23 14:59:10 -0800113static MeasureTiming measureTiming(const ExecutionBuilder* execution) {
114 return execution->measureTiming() ? MeasureTiming::YES : MeasureTiming::NO;
115}
116
David Gross6ff88002018-06-01 11:01:12 -0700117static bool checkDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType,
118 const char* tag, bool allowUnspecified) {
119 if (newType != nullptr) {
Slava Shklyaev8ea8dae2019-02-11 18:26:29 +0000120 const Extension::OperandTypeInformation* info = nullptr;
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100121 if (isExtension(operand.type)) {
Slava Shklyaev8ea8dae2019-02-11 18:26:29 +0000122 NN_RET_CHECK(TypeManager::get()->getExtensionOperandTypeInfo(operand.type, &info));
123 }
124 if (validateOperandType(*newType, info, tag, allowUnspecified) !=
125 ANEURALNETWORKS_NO_ERROR) {
David Gross6ff88002018-06-01 11:01:12 -0700126 LOG(ERROR) << tag << ": Invalid newType";
127 return false;
128 }
129 if (operand.dimensions.size() == 0) {
130 return true;
131 }
132 if (operand.dimensions.size() != newType->dimensionCount) {
David Grossa18493a2021-01-26 15:12:22 -0800133 LOG(ERROR) << tag << ": Setting with incompatible dimension count (existing = "
134 << operand.dimensions.size() << ", new = " << newType->dimensionCount << ")";
David Gross6ff88002018-06-01 11:01:12 -0700135 return false;
136 }
137 for (uint32_t i = 0; i < newType->dimensionCount; i++) {
138 if (operand.dimensions[i] != newType->dimensions[i] && operand.dimensions[i] != 0) {
139 LOG(ERROR) << tag << ": Overriding a fully specified dimension is disallowed";
140 return false;
141 }
142 }
143 } else {
Slava Shklyaev8ea8dae2019-02-11 18:26:29 +0000144 if (!allowUnspecified && TypeManager::get()->isTensorType(operand.type) &&
145 tensorHasUnspecifiedDimensions(operand)) {
David Gross6ff88002018-06-01 11:01:12 -0700146 LOG(ERROR) << tag << ": Setting with operand type that is not fully specified";
147 return false;
148 }
149 }
150 return true;
151}
152
Miao Wang484e9702019-01-16 13:42:15 -0800153ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation)
Michael Butler2f6a6282019-01-24 02:36:37 -0800154 : mCompilation(compilation),
155 mModel(compilation->mModel),
Miao Wang484e9702019-01-16 13:42:15 -0800156 mPlan(&compilation->mPlan),
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800157 mAllowCpuFallback(DeviceManager::partitioningAllowsFallback(compilation->mPartitioning)),
Miao Wang484e9702019-01-16 13:42:15 -0800158 mInputs(mModel->inputCount()),
159 mOutputs(mModel->outputCount()) {
Slava Shklyaev20bd5352019-12-13 16:46:14 +0000160 VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder with " << mInputs.size()
161 << " inputs and " << mOutputs.size() << " outputs";
162}
163
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800164SimpleExecutionBuilder::SimpleExecutionBuilder(const CompilationBuilder* compilation)
165 : ExecutionBuilder(compilation) {
166 CHECK(mPlan->isSimple());
167}
168
169CompoundExecutionBuilder::CompoundExecutionBuilder(const CompilationBuilder* compilation)
170 : ExecutionBuilder(compilation) {
171 CHECK(mPlan->isCompound());
172}
173
Slava Shklyaev20bd5352019-12-13 16:46:14 +0000174const ModelBuilder* ExecutionBuilder::getSourceModel(uint32_t index) const {
175 return mPlan->getSourceModels().getModel(index);
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700176}
177
David Gross7e03e902017-09-13 10:45:21 -0700178int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
Jean-Luc Brouilletd409e2c2017-09-27 23:59:20 -0700179 const void* buffer, size_t length) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800180 if (computationStarted()) {
David Grossd665cf12019-03-28 13:38:16 -0700181 LOG(ERROR) << "ANeuralNetworksExecution_setInput called after the "
182 "execution has started.";
183 return ANEURALNETWORKS_BAD_STATE;
184 }
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700185 uint32_t count = static_cast<uint32_t>(mInputs.size());
186 if (index >= count) {
David Gross7e03e902017-09-13 10:45:21 -0700187 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700188 return ANEURALNETWORKS_BAD_DATA;
189 }
David Gross6ff88002018-06-01 11:01:12 -0700190 if (!checkDimensionInfo(mModel->getInputOperand(index), type,
191 "ANeuralNetworksExecution_setInput", buffer == nullptr)) {
192 return ANEURALNETWORKS_BAD_DATA;
Jean-Luc Brouilletd409e2c2017-09-27 23:59:20 -0700193 }
194 if (length > 0xFFFFFFFF) {
195 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
196 return ANEURALNETWORKS_BAD_DATA;
197 }
198 uint32_t l = static_cast<uint32_t>(length);
David Gross3f1b4542020-03-12 17:27:48 -0700199 if (!mInputs[index].unspecified()) {
200 LOG(ERROR) << "ANeuralNetworksExecution_setInput called when an input has already been "
201 "provided";
202 return ANEURALNETWORKS_BAD_STATE;
203 }
204 int n;
205 std::tie(n, mInputs[index]) = ModelArgumentInfo::createFromPointer(
Xusong Wang811f94f2021-02-16 10:43:33 -0800206 mModel->getInputOperand(index), type, const_cast<void*>(buffer), l,
207 mInputAndOutputPaddingEnabled);
208 mHasCalledSetInputOutput = true;
David Gross3f1b4542020-03-12 17:27:48 -0700209 return n;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700210}
211
David Gross7e03e902017-09-13 10:45:21 -0700212int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100213 const RuntimeMemory* memory, size_t offset,
214 size_t length) {
Slava Shklyaev20bd5352019-12-13 16:46:14 +0000215 // Should be similar to StepExecutor::setInputOrOutputFromMemory()
David Gross14720132017-10-02 14:40:09 -0700216
Xusong Wang2d704a52021-02-08 20:48:10 -0800217 if (computationStarted()) {
David Grossd665cf12019-03-28 13:38:16 -0700218 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory called after the "
219 "execution has started.";
220 return ANEURALNETWORKS_BAD_STATE;
221 }
Jean-Luc Brouillet4fb1e852017-08-20 18:16:36 -0700222 uint32_t count = static_cast<uint32_t>(mInputs.size());
223 if (index >= count) {
David Gross7e03e902017-09-13 10:45:21 -0700224 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
Jean-Luc Brouillet4fb1e852017-08-20 18:16:36 -0700225 << count;
226 return ANEURALNETWORKS_BAD_DATA;
227 }
David Gross6ff88002018-06-01 11:01:12 -0700228 if (!checkDimensionInfo(mModel->getInputOperand(index), type,
229 "ANeuralNetworksExecution_setInputFromMemory", false)) {
230 return ANEURALNETWORKS_BAD_DATA;
231 }
Xusong Wangd39f9192019-11-27 15:45:42 -0800232 if (!memory->getValidator().validate(mCompilation, IOType::INPUT, index, type, offset,
233 length)) {
Miao Wang484e9702019-01-16 13:42:15 -0800234 return ANEURALNETWORKS_BAD_DATA;
Xusong Wangd39f9192019-11-27 15:45:42 -0800235 }
Xusong Wang1b836a22020-02-06 13:17:33 -0800236 // For some types of memory, e.g. MemoryRuntimeAHWB allocated from ANNMemory_createFromDesc, we
Xusong Wangd39f9192019-11-27 15:45:42 -0800237 // allow the client to specify offset == 0 && length == 0 indicating that the entire memory
238 // region is used. We update the length here because the drivers are still expecting a real
239 // length. For other memories that do not allow this semantic, it is checked in
240 // MemoryValidatorBase::validate before reaching here.
Slava Shklyaev3698ad42020-11-06 13:50:31 +0000241 if (validate(memory->getMemory()).ok() && offset == 0 && length == 0) {
Michael Butlercb35c632021-03-25 15:28:52 -0700242 length = memory->getSize();
Miao Wange9ddab62017-09-05 14:41:05 -0700243 }
Jean-Luc Brouilletd409e2c2017-09-27 23:59:20 -0700244 // TODO validate the rest
Jean-Luc Brouillet5d5150d2017-09-02 23:05:37 -0700245 uint32_t poolIndex = mMemories.add(memory);
David Gross3f1b4542020-03-12 17:27:48 -0700246 if (!mInputs[index].unspecified()) {
247 LOG(ERROR)
248 << "ANeuralNetworksExecution_setInputFromMemory called when an input has already "
249 "been provided";
250 return ANEURALNETWORKS_BAD_STATE;
251 }
252 int n;
Xusong Wang811f94f2021-02-16 10:43:33 -0800253 std::tie(n, mInputs[index]) =
254 ModelArgumentInfo::createFromMemory(mModel->getInputOperand(index), type, poolIndex,
255 offset, length, mInputAndOutputPaddingEnabled);
256 mHasCalledSetInputOutput = true;
David Gross3f1b4542020-03-12 17:27:48 -0700257 return n;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700258}
259
Miao Wang484e9702019-01-16 13:42:15 -0800260int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type,
261 void* buffer, size_t length) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800262 if (computationStarted()) {
David Grossd665cf12019-03-28 13:38:16 -0700263 LOG(ERROR) << "ANeuralNetworksExecution_setOutput called after the "
264 "execution has started.";
265 return ANEURALNETWORKS_BAD_STATE;
266 }
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700267 uint32_t count = static_cast<uint32_t>(mOutputs.size());
268 if (index >= count) {
David Gross7e03e902017-09-13 10:45:21 -0700269 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700270 return ANEURALNETWORKS_BAD_DATA;
271 }
David Gross6ff88002018-06-01 11:01:12 -0700272 if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
273 "ANeuralNetworksExecution_setOutput", true)) {
274 return ANEURALNETWORKS_BAD_DATA;
Jean-Luc Brouilletd409e2c2017-09-27 23:59:20 -0700275 }
276 if (length > 0xFFFFFFFF) {
277 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
278 return ANEURALNETWORKS_BAD_DATA;
279 }
280 uint32_t l = static_cast<uint32_t>(length);
David Gross3f1b4542020-03-12 17:27:48 -0700281 if (!mOutputs[index].unspecified()) {
282 LOG(ERROR) << "ANeuralNetworksExecution_setOutput called when an output has already been "
283 "provided";
284 return ANEURALNETWORKS_BAD_STATE;
285 }
286 int n;
Xusong Wang811f94f2021-02-16 10:43:33 -0800287 std::tie(n, mOutputs[index]) = ModelArgumentInfo::createFromPointer(
288 mModel->getOutputOperand(index), type, buffer, l, mInputAndOutputPaddingEnabled);
289 mHasCalledSetInputOutput = true;
David Gross3f1b4542020-03-12 17:27:48 -0700290 return n;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700291}
292
David Gross7e03e902017-09-13 10:45:21 -0700293int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100294 const RuntimeMemory* memory, size_t offset,
295 size_t length) {
Slava Shklyaev20bd5352019-12-13 16:46:14 +0000296 // Should be similar to StepExecutor::setInputOrOutputFromMemory()
David Gross14720132017-10-02 14:40:09 -0700297
Xusong Wang2d704a52021-02-08 20:48:10 -0800298 if (computationStarted()) {
David Grossd665cf12019-03-28 13:38:16 -0700299 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called after the "
300 "execution has started.";
301 return ANEURALNETWORKS_BAD_STATE;
302 }
Jean-Luc Brouillet4fb1e852017-08-20 18:16:36 -0700303 uint32_t count = static_cast<uint32_t>(mOutputs.size());
304 if (index >= count) {
David Gross7e03e902017-09-13 10:45:21 -0700305 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
Jean-Luc Brouillet4fb1e852017-08-20 18:16:36 -0700306 << count;
307 return ANEURALNETWORKS_BAD_DATA;
308 }
David Gross6ff88002018-06-01 11:01:12 -0700309 if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
310 "ANeuralNetworksExecution_setOutputFromMemory", true)) {
311 return ANEURALNETWORKS_BAD_DATA;
312 }
Xusong Wangd39f9192019-11-27 15:45:42 -0800313 if (!memory->getValidator().validate(mCompilation, IOType::OUTPUT, index, type, offset,
314 length)) {
Miao Wang484e9702019-01-16 13:42:15 -0800315 return ANEURALNETWORKS_BAD_DATA;
Xusong Wangd39f9192019-11-27 15:45:42 -0800316 }
Xusong Wang1b836a22020-02-06 13:17:33 -0800317 // For some types of memory, e.g. MemoryRuntimeAHWB allocated from ANNMemory_createFromDesc, we
Xusong Wangd39f9192019-11-27 15:45:42 -0800318 // allow the client to specify offset == 0 && length == 0 indicating that the entire memory
319 // region is used. We update the length here because the drivers are still expecting a real
320 // length. For other memories that do not allow this semantic, it is checked in
321 // MemoryValidatorBase::validate before reaching here.
Slava Shklyaev3698ad42020-11-06 13:50:31 +0000322 if (validate(memory->getMemory()).ok() && offset == 0 && length == 0) {
Michael Butlercb35c632021-03-25 15:28:52 -0700323 length = memory->getSize();
Miao Wange9ddab62017-09-05 14:41:05 -0700324 }
Jean-Luc Brouilletd409e2c2017-09-27 23:59:20 -0700325 // TODO validate the rest
Jean-Luc Brouillet5d5150d2017-09-02 23:05:37 -0700326 uint32_t poolIndex = mMemories.add(memory);
David Gross3f1b4542020-03-12 17:27:48 -0700327 if (!mOutputs[index].unspecified()) {
328 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called when an output has "
329 "already been provided";
330 return ANEURALNETWORKS_BAD_STATE;
331 }
332 int n;
Xusong Wang811f94f2021-02-16 10:43:33 -0800333 std::tie(n, mOutputs[index]) =
334 ModelArgumentInfo::createFromMemory(mModel->getOutputOperand(index), type, poolIndex,
335 offset, length, mInputAndOutputPaddingEnabled);
336 mHasCalledSetInputOutput = true;
David Gross3f1b4542020-03-12 17:27:48 -0700337 return n;
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -0700338}
339
David Gross257ee7a2019-01-23 14:59:10 -0800340int ExecutionBuilder::setMeasureTiming(bool measure) {
David Grossd665cf12019-03-28 13:38:16 -0700341 if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
342 LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called on "
343 << "an ANeuralNetworksExecution created from an ANeuralNetworksCompilation "
344 << "that was not created by ANeuralNetworksCompilation_createForDevices "
345 << "with numDevices = 1";
346 return ANEURALNETWORKS_BAD_DATA;
347 }
Xusong Wang2d704a52021-02-08 20:48:10 -0800348 if (computationStarted()) {
David Grossd665cf12019-03-28 13:38:16 -0700349 LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called after the "
350 "execution has started.";
351 return ANEURALNETWORKS_BAD_STATE;
352 }
David Gross257ee7a2019-01-23 14:59:10 -0800353 mMeasureTiming = measure;
354 return ANEURALNETWORKS_NO_ERROR;
355}
356
357int ExecutionBuilder::getDuration(int32_t durationCode, uint64_t* duration) const {
Xusong Wang2d704a52021-02-08 20:48:10 -0800358 if (!completed()) {
David Grossd665cf12019-03-28 13:38:16 -0700359 LOG(ERROR) << "ANeuralNetworksExecution_getDuration called before the "
360 "execution has finished.";
Miao Wang0125e9b2020-03-09 11:28:06 -0700361 *duration = UINT64_MAX;
David Grossd665cf12019-03-28 13:38:16 -0700362 return ANEURALNETWORKS_BAD_STATE;
363 }
David Grossfdad2042020-03-31 16:11:16 -0700364 if (completedWith() != Completion::NO_ERROR) {
365 LOG(ERROR) << "ANeuralNetworksExecution_getDuration called on an execution "
366 "that has encountered an error.";
Miao Wang0125e9b2020-03-09 11:28:06 -0700367 *duration = UINT64_MAX;
Miao Wange0227242019-12-20 16:07:39 -0800368 return ANEURALNETWORKS_BAD_STATE;
369 }
David Grossd665cf12019-03-28 13:38:16 -0700370
David Gross257ee7a2019-01-23 14:59:10 -0800371 if (!mMeasureTiming) {
372 *duration = UINT64_MAX;
373 return ANEURALNETWORKS_BAD_STATE;
374 }
David Grossd665cf12019-03-28 13:38:16 -0700375
David Grossfdad2042020-03-31 16:11:16 -0700376 Timing timingLaunched = mTimingWithoutFencedExecutionCallback;
377 Timing timingFenced = timingLaunched;
Miao Wange0227242019-12-20 16:07:39 -0800378 if (mFencedExecutionCallback != nullptr) {
Slava Shklyaev20b9bd12020-11-11 17:01:11 +0000379 auto result = mFencedExecutionCallback();
380 if (!result.has_value()) {
381 LOG(ERROR) << "Fenced execution callback failed: " << result.error().message;
Miao Wange0227242019-12-20 16:07:39 -0800382 *duration = UINT64_MAX;
383 return ANEURALNETWORKS_BAD_STATE;
384 }
Slava Shklyaev20b9bd12020-11-11 17:01:11 +0000385 std::tie(timingLaunched, timingFenced) = std::move(result).value();
Miao Wange0227242019-12-20 16:07:39 -0800386 }
Michael Butlerdcea61d2020-12-04 17:39:38 -0800387 const OptionalDuration selectedDuration = [durationCode, &timingLaunched,
388 &timingFenced]() -> OptionalDuration {
389 switch (durationCode) {
390 case ANEURALNETWORKS_DURATION_ON_HARDWARE:
391 return timingLaunched.timeOnDevice;
392 case ANEURALNETWORKS_DURATION_IN_DRIVER:
393 return timingLaunched.timeInDriver;
394 case ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE:
395 return timingFenced.timeOnDevice;
396 case ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER:
397 return timingFenced.timeInDriver;
398 default:
399 LOG(FATAL) << "unexpected";
400 return std::nullopt;
401 }
402 }();
403 if (selectedDuration.has_value()) {
404 constexpr uint64_t kMaxTiming = std::numeric_limits<uint64_t>::max() - 1;
Michael Butler4e4c5002021-03-18 21:09:25 -0700405 using CommonType = std::common_type_t<Duration::rep, uint64_t>;
406 const auto count = std::min<CommonType>(selectedDuration.value().count(), kMaxTiming);
407 *duration = static_cast<uint64_t>(count);
Michael Butlerdcea61d2020-12-04 17:39:38 -0800408 } else {
409 constexpr uint64_t kNoTiming = std::numeric_limits<uint64_t>::max();
410 *duration = kNoTiming;
David Gross257ee7a2019-01-23 14:59:10 -0800411 }
David Grossd665cf12019-03-28 13:38:16 -0700412
David Gross257ee7a2019-01-23 14:59:10 -0800413 VLOG(EXECUTION) << "getDuration(" << durationCode << "): " << *duration;
414 return ANEURALNETWORKS_NO_ERROR;
415}
416
Michael Butlerbf258232019-12-16 18:32:45 -0800417int ExecutionBuilder::setTimeoutDuration(uint64_t duration) {
418 if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
419 LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called on an ANeuralNetworksExecution "
420 "created from an ANeuralNetworksCompilation that was not created by "
421 "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
422 return ANEURALNETWORKS_BAD_DATA;
423 }
Xusong Wang2d704a52021-02-08 20:48:10 -0800424 if (computationStarted()) {
Michael Butlerbf258232019-12-16 18:32:45 -0800425 LOG(ERROR) << "ANeuralNetworksExecution_setTimeout called after the execution has started.";
426 return ANEURALNETWORKS_BAD_STATE;
427 }
Michael Butler34334012020-02-10 15:45:28 -0800428 if (duration > 0) {
429 mTimeoutDuration = duration;
430 } else {
431 mTimeoutDuration.reset();
432 }
Michael Butlerbf258232019-12-16 18:32:45 -0800433 return ANEURALNETWORKS_NO_ERROR;
434}
435
436std::optional<uint64_t> ExecutionBuilder::getTimeoutDuration() const {
437 return mTimeoutDuration;
438}
439
Przemysław Szczepaniak13241e72020-11-27 19:51:47 +0000440TimePoint ExecutionBuilder::getComputeStartTimePoint() const {
441 CHECK(computationStarted()) << "getComputeStartTimePoint called before "
442 << "execution has started.";
443 return mComputeStartTimePoint;
444}
445
Slava Shklyaev1b72d332020-02-11 16:14:25 +0000446int ExecutionBuilder::setLoopTimeout(uint64_t duration) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800447 if (computationStarted()) {
Slava Shklyaev38abefa2020-03-20 14:30:53 +0000448 LOG(ERROR) << "ANeuralNetworksExecution_setLoopTimeout called after the "
449 "execution has started.";
450 return ANEURALNETWORKS_BAD_STATE;
451 }
Slava Shklyaev1b72d332020-02-11 16:14:25 +0000452 if (duration > operation_while::kTimeoutNsMaximum) {
453 LOG(WARNING) << "ANeuralNetworksExecution_setLoopTimeout input exceeds the maximum allowed "
454 << "duration: " << duration << " > " << operation_while::kTimeoutNsMaximum;
455 duration = operation_while::kTimeoutNsMaximum;
456 }
Slava Shklyaevb0a54132020-02-11 16:12:27 +0000457 mLoopTimeoutDuration = duration;
Slava Shklyaev1b72d332020-02-11 16:14:25 +0000458 return ANEURALNETWORKS_NO_ERROR;
459}
460
Xusong Wang811f94f2021-02-16 10:43:33 -0800461int ExecutionBuilder::enableInputAndOutputPadding(bool enable) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800462 if (computationStarted()) {
Xusong Wang811f94f2021-02-16 10:43:33 -0800463 LOG(ERROR) << "ANeuralNetworksExecution_enableInputAndOutputPadding called after the "
464 "execution has started.";
465 return ANEURALNETWORKS_BAD_STATE;
466 }
467 if (mHasCalledSetInputOutput) {
468 LOG(ERROR) << "ANeuralNetworksExecution_enableInputAndOutputPadding called after an input "
469 "or output is set.";
470 return ANEURALNETWORKS_BAD_STATE;
471 }
472 mInputAndOutputPaddingEnabled = enable;
473 return ANEURALNETWORKS_NO_ERROR;
474}
475
Xusong Wang2d704a52021-02-08 20:48:10 -0800476int ExecutionBuilder::setReusable(bool reusable) {
477 if (computationStarted()) {
478 LOG(ERROR) << "ANeuralNetworksExecution_setReusable called after the "
479 "execution has started.";
480 return ANEURALNETWORKS_BAD_STATE;
481 }
482 mReusable = reusable;
483 return ANEURALNETWORKS_NO_ERROR;
484}
485
Miao Wange1797862021-10-21 19:35:04 +0000486int ExecutionBuilder::addExtensionAttribute(const char* extensionName,
487 uint16_t attributeCodeWithinExtension, const void* data,
488 size_t length) {
489 if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
490 LOG(ERROR) << "ANeuralNetworksExecution_addExtensionAttribute called on an "
491 "ANeuralNetworksExecution created from an ANeuralNetworksCompilation that "
492 "was not created by ANeuralNetworksCompilation_createForDevices with "
493 "numDevices = 1";
494 return ANEURALNETWORKS_BAD_DATA;
495 }
496 if (computationStarted()) {
497 LOG(ERROR) << "ANeuralNetworksExecution_addExtensionAttribute called after the execution "
498 "has started.";
499 return ANEURALNETWORKS_BAD_STATE;
500 }
501 int32_t attributeToken = 0;
502 if (!TypeManager::get()->getExtensionType(extensionName, attributeCodeWithinExtension,
503 &attributeToken)) {
504 return ANEURALNETWORKS_BAD_DATA;
505 }
506 if (std::find_if(mMetadata.begin(), mMetadata.end(), [attributeToken](const auto& entry) {
507 return attributeToken == entry.token;
508 }) != mMetadata.end()) {
509 LOG(ERROR) << "ANeuralNetworksCompilation_addExtensionAttribute called more than once for "
510 "the same attribute";
511 return ANEURALNETWORKS_BAD_DATA;
512 }
513 const uint8_t* dataPtr = reinterpret_cast<const uint8_t*>(data);
514 mMetadata.push_back({attributeToken, std::vector<uint8_t>(dataPtr, dataPtr + length)});
515 return ANEURALNETWORKS_NO_ERROR;
516}
517
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700518int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800519 if (!completed()) {
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700520 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
521 "execution has finished.";
522 return ANEURALNETWORKS_BAD_STATE;
523 }
David Grossfdad2042020-03-31 16:11:16 -0700524 if (completedWith() == Completion::OTHER_ERROR) {
525 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called on an execution "
526 "that has encountered an error.";
Miao Wange0227242019-12-20 16:07:39 -0800527 return ANEURALNETWORKS_BAD_STATE;
528 }
529
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700530 uint32_t count = static_cast<uint32_t>(mOutputs.size());
531 if (index >= count) {
532 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions bad index " << index
533 << " " << count;
534 return ANEURALNETWORKS_BAD_DATA;
535 }
David Gross3f1b4542020-03-12 17:27:48 -0700536 const auto& dims = mOutputs[index].dimensions();
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700537 if (dims.empty()) {
538 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions can not query "
539 "dimensions of a scalar";
540 return ANEURALNETWORKS_BAD_DATA;
541 }
542 std::copy(dims.begin(), dims.end(), dimensions);
David Gross3f1b4542020-03-12 17:27:48 -0700543 return mOutputs[index].isSufficient() ? ANEURALNETWORKS_NO_ERROR
544 : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700545}
546
547int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) {
Xusong Wang2d704a52021-02-08 20:48:10 -0800548 if (!completed()) {
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700549 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called before the "
550 "execution has finished.";
551 return ANEURALNETWORKS_BAD_STATE;
552 }
David Grossfdad2042020-03-31 16:11:16 -0700553 if (completedWith() == Completion::OTHER_ERROR) {
554 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called on an execution "
555 "that has encountered an error.";
Miao Wange0227242019-12-20 16:07:39 -0800556 return ANEURALNETWORKS_BAD_STATE;
557 }
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700558 uint32_t count = static_cast<uint32_t>(mOutputs.size());
559 if (index >= count) {
560 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank bad index " << index << " "
561 << count;
562 return ANEURALNETWORKS_BAD_DATA;
563 }
David Gross3f1b4542020-03-12 17:27:48 -0700564 *rank = static_cast<uint32_t>(mOutputs[index].dimensions().size());
565 return mOutputs[index].isSufficient() ? ANEURALNETWORKS_NO_ERROR
566 : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
Xusong Wang9d3c7bf2018-10-31 08:37:25 -0700567}
568
Xusong Wang2d704a52021-02-08 20:48:10 -0800569bool ExecutionBuilder::checkAndSetComputationState(const char* name) {
Xusong Wang9ef21532021-03-18 10:38:50 -0700570 std::lock_guard<std::mutex> lock(mStateMutex);
Xusong Wang2d704a52021-02-08 20:48:10 -0800571 if (!mReusable && mState == State::COMPLETED) {
572 LOG(ERROR) << "ANeuralNetworksExecution_" << name
573 << " called on a non-reusable execution that has already completed";
574 return false;
575 }
576 if (mState == State::COMPUTATION) {
577 LOG(ERROR) << "ANeuralNetworksExecution_" << name
578 << " called on an execution that has already started";
579 return false;
580 }
581 mState = State::COMPUTATION;
582 return true;
583}
584
Xusong Wang1d789c72021-02-11 15:04:07 -0800585// TODO(b/132321855): validate that we have full types for all inputs and outputs,
586// that the graph is not cyclic,
587static int validateRequest(const std::vector<ModelArgumentInfo>& inputs,
588 const std::vector<ModelArgumentInfo>& outputs) {
589 for (auto& p : inputs) {
590 if (p.state() == ModelArgumentInfo::UNSPECIFIED) {
591 LOG(ERROR) << "ANeuralNetworksExecution starts compute when not all inputs specified";
592 return ANEURALNETWORKS_BAD_DATA;
593 }
594 }
595 for (auto& p : outputs) {
596 if (p.state() == ModelArgumentInfo::UNSPECIFIED) {
597 LOG(ERROR) << "ANeuralNetworksExecution starts compute when not all outputs specified";
598 return ANEURALNETWORKS_BAD_DATA;
599 }
600 }
601 return ANEURALNETWORKS_NO_ERROR;
602}
603
604int ExecutionBuilder::getValidationResultCode() {
605 if (!mValidationResultCode.has_value()) {
606 mValidationResultCode = validateRequest(mInputs, mOutputs);
607 }
608 return mValidationResultCode.value();
609}
610
611bool ExecutionBuilder::areOutputsFullySpecified() {
612 if (!mOutputsFullySpecified.has_value()) {
613 mOutputsFullySpecified = true;
614 for (uint32_t i = 0; i < mOutputs.size(); i++) {
615 if (mOutputs[i].state() != ModelArgumentInfo::HAS_NO_VALUE &&
616 TypeManager::get()->isTensorType(mModel->getOutputOperand(i).type) &&
617 tensorHasUnspecifiedDimensions(mModel->getOutputOperand(i).type,
618 mOutputs[i].initialDimensions())) {
619 mOutputsFullySpecified = false;
620 break;
621 }
622 }
623 }
624 return mOutputsFullySpecified.value();
625}
626
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +0000627int ExecutionBuilder::prepareForCompute(const char* name, ExecutionMode mode) {
Xusong Wang1d789c72021-02-11 15:04:07 -0800628 if (!checkAndSetComputationState(name)) {
629 return ANEURALNETWORKS_BAD_STATE;
630 }
631 if (int n = getValidationResultCode(); n != ANEURALNETWORKS_NO_ERROR) {
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +0000632 return finishComputation(n, {}, mode);
Xusong Wang1d789c72021-02-11 15:04:07 -0800633 }
634 return ANEURALNETWORKS_NO_ERROR;
635}
636
David Grossc4172ec2017-10-04 23:05:05 -0700637// Attempt synchronous execution of full model on CPU.
David Gross257ee7a2019-01-23 14:59:10 -0800638// TODO: How should we handle timing in this case?
David Grossd665cf12019-03-28 13:38:16 -0700639// For Q this is irrelevant: We only support timing in conjunction
640// with an explicit device list; and we do not support CPU fallback
641// with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
Michael Butler1d541a32019-08-19 12:05:45 -0700642static std::tuple<int, std::vector<OutputShape>, Timing> cpuFallbackFull(
643 ExecutionBuilder* executionBuilder) {
Xusong Wang8f6d3792019-08-12 16:04:40 -0700644 CHECK(executionBuilder != nullptr);
Mika Raento0bb84c72018-04-23 22:06:45 +0100645 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
Miao Wangf759e292017-10-04 19:45:45 -0700646 VLOG(EXECUTION) << "cpuFallbackFull";
Michael Butlera3632b62019-08-18 20:30:25 -0700647
648 // Get fallback executor.
David Grossc4172ec2017-10-04 23:05:05 -0700649 StepExecutor executor(executionBuilder, executionBuilder->getModel(),
Xusong Wang1d789c72021-02-11 15:04:07 -0800650 DeviceManager::getCpuDevice(), /*preparedModel=*/nullptr,
651 /*reusable=*/false);
David Grossc4172ec2017-10-04 23:05:05 -0700652 executor.mapInputsAndOutputsTrivially();
Michael Butlera3632b62019-08-18 20:30:25 -0700653
654 // Attempt fallback execution.
Michael Butler1d541a32019-08-19 12:05:45 -0700655 return executor.computeOnCpuFallback();
David Grossc4172ec2017-10-04 23:05:05 -0700656}
657
658// Attempt synchronous execution on CPU.
David Gross257ee7a2019-01-23 14:59:10 -0800659// TODO: How should we handle timing in this case?
David Grossd665cf12019-03-28 13:38:16 -0700660// For Q this is irrelevant: We only support timing in conjunction
661// with an explicit device list; and we do not support CPU fallback
662// with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
Michael Butler1d541a32019-08-19 12:05:45 -0700663static std::tuple<int, std::vector<OutputShape>, Timing, std::shared_ptr<StepExecutor>>
664cpuFallbackPartial(const ExecutionPlan& plan,
665 std::shared_ptr<ExecutionPlan::Controller> controller) {
Mika Raento0bb84c72018-04-23 22:06:45 +0100666 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
Miao Wangf759e292017-10-04 19:45:45 -0700667 VLOG(EXECUTION) << "cpuFallbackPartial";
Michael Butlera3632b62019-08-18 20:30:25 -0700668
669 // Get fallback executor.
David Grossc4172ec2017-10-04 23:05:05 -0700670 std::shared_ptr<StepExecutor> executor;
David Grossbb38a422020-09-22 14:51:37 -0700671 int n1 = plan.fallback(controller, &executor, nullptr, nullptr);
Michael Butler1d541a32019-08-19 12:05:45 -0700672 if (n1 != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +0100673 return {n1, {}, {}, nullptr};
Michael Butler1d541a32019-08-19 12:05:45 -0700674 }
Michael Butlera3632b62019-08-18 20:30:25 -0700675 CHECK(executor != nullptr);
676
677 // Attempt fallback execution.
Michael Butler1d541a32019-08-19 12:05:45 -0700678 auto [n2, outputShapes, timing] = executor->computeOnCpuFallback();
679 return {n2, std::move(outputShapes), timing, executor};
David Grossc4172ec2017-10-04 23:05:05 -0700680}
681
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800682std::tuple<int, std::vector<OutputShape>, Timing> SimpleExecutionBuilder::computeInternal(
683 const OptionalTimePoint& deadline, BurstBuilder* burstBuilder) {
684 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "SimpleExecutionBuilder::computeInternal");
685 VLOG(EXECUTION) << "SimpleExecutionBuilder::computeInternal";
Michael Butler1d541a32019-08-19 12:05:45 -0700686
Xusong Wang1d789c72021-02-11 15:04:07 -0800687 if (mExecutor == nullptr) {
688 mExecutor = mPlan->makeStepExecutor(mReusable, this);
689 }
690
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800691 auto burstController = burstBuilder ? burstBuilder->getControllerAt(0) : nullptr;
Xusong Wang1d789c72021-02-11 15:04:07 -0800692 auto [n, outputShapes, timing] = mExecutor->compute(deadline, burstController);
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800693
694 if (n == ANEURALNETWORKS_NO_ERROR) {
695 return {n, std::move(outputShapes), timing};
696 }
697
698 // ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE is not recoverable.
699 if (n == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
700 return {n, std::move(outputShapes), {}};
701 }
702
703 // If CPU fallback is not allowed and there was an error, end execution.
704 if (!mAllowCpuFallback) {
705 return {n, {}, {}};
706 }
707
708 // If CPU execution was already attempted, do not perform CPU fallback.
Xusong Wang1d789c72021-02-11 15:04:07 -0800709 if (mExecutor->isCpu()) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800710 return {n, {}, {}};
711 }
712
713 // If the code has reached this point, a potentially recoverable error
714 // occurred during the execution. Do an execution fallback on the CPU.
715 return cpuFallbackFull(this);
716}
717
718std::tuple<int, std::vector<OutputShape>, Timing> CompoundExecutionBuilder::computeInternal(
719 const OptionalTimePoint& deadline, BurstBuilder* burstBuilder) {
720 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "CompoundExecutionBuilder::computeInternal");
721 VLOG(EXECUTION) << "CompoundExecutionBuilder::computeInternal (from plan, iteratively)";
722
723 auto controller = mPlan->makeController(this, burstBuilder);
724 std::vector<OutputShape> outputShapes = getInitialOutputShapes();
David Gross948ffa82020-08-14 15:30:49 -0700725
726 // On this iteration, do I need to repeat the previous step because it
727 // reported insufficient size?
728 bool doInsufficientSizeFallback = false;
Michael Butlera3632b62019-08-18 20:30:25 -0700729
David Gross24b141b2017-10-04 16:05:58 -0700730 while (true) {
Miao Wangf759e292017-10-04 19:45:45 -0700731 VLOG(EXECUTION) << "looking for next StepExecutor";
Michael Butlera3632b62019-08-18 20:30:25 -0700732
733 // Get the current step of the execution.
734 std::shared_ptr<StepExecutor> executor;
Michael Butler7d1ae272021-02-17 18:00:31 -0800735 SharedBurst burstController;
David Grossbb38a422020-09-22 14:51:37 -0700736 int n = doInsufficientSizeFallback
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800737 ? mPlan->fallback(controller, &executor, &burstController, &outputShapes)
738 : mPlan->next(controller, &executor, &burstController, &outputShapes);
David Gross948ffa82020-08-14 15:30:49 -0700739 doInsufficientSizeFallback = false;
David Grossc4172ec2017-10-04 23:05:05 -0700740 if (n != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev1b72d332020-02-11 16:14:25 +0000741 // During the interpreted execution of control flow, a loop timeout
742 // might occur in ExecutionPlan::next().
743 bool missedDeadline = n == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT ||
744 n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT;
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800745 if (mAllowCpuFallback && !missedDeadline) break;
746 return {n, {}, {}};
David Grossc4172ec2017-10-04 23:05:05 -0700747 }
Michael Butlera3632b62019-08-18 20:30:25 -0700748
749 // If the code reached the end of the plan without error, then return
750 // with no error.
David Grossc4172ec2017-10-04 23:05:05 -0700751 if (executor == nullptr) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800752 return {ANEURALNETWORKS_NO_ERROR, outputShapes, {}};
David Gross24b141b2017-10-04 16:05:58 -0700753 }
Michael Butlera3632b62019-08-18 20:30:25 -0700754 const bool executorIsCpu = executor->isCpu();
David Gross24b141b2017-10-04 16:05:58 -0700755
Michael Butlera3632b62019-08-18 20:30:25 -0700756 // Attempt to execute a single step of the execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800757 auto [stepN, stepOutputShapes, _] = executor->compute(deadline, burstController);
Michael Butlera3632b62019-08-18 20:30:25 -0700758
David Gross948ffa82020-08-14 15:30:49 -0700759 // Update global outputs and dynamic temporaries.
760 StepExecutor::UpdateOutputShapes updateOutputShapes = {};
761 if (!executor->updateOutputShapes(stepN, stepOutputShapes, &outputShapes,
762 &updateOutputShapes)) {
Michael Butler1d541a32019-08-19 12:05:45 -0700763 stepN = ANEURALNETWORKS_OP_FAILED;
764 }
765
766 // If execution was successful, continue to next step.
767 if (stepN == ANEURALNETWORKS_NO_ERROR) {
David Gross948ffa82020-08-14 15:30:49 -0700768 if (updateOutputShapes.zeroSizedInput) {
769 // We'll need to do full model CPU fallback
770 VLOG(EXECUTION) << "updateOutputShapes.zeroSizedInput";
771 stepN = ANEURALNETWORKS_OP_FAILED;
772 } else {
773 CHECK(executor->areDynamicTemporariesAllocated());
David Gross948ffa82020-08-14 15:30:49 -0700774 continue;
775 }
776 }
777
778 if (stepN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
779 VLOG(EXECUTION) << "OUTPUT_INSUFFICIENT_SIZE: " << toString(updateOutputShapes);
780 if (updateOutputShapes.mainOutputInsufficient ||
781 !updateOutputShapes.updatedDynamicTemporary) {
782 // Either:
783 // - At least one main model output is not of sufficient size; or
784 // - we didn't learn anything new about dynamic temporaries.
785 // Neither of these is recoverable, so end execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800786 return {stepN, outputShapes, {}};
David Gross948ffa82020-08-14 15:30:49 -0700787 }
788 // Every main model output is of sufficient size. This implies that
789 // at least one dynamic temporary is not of sufficient size. This
790 // is recoverable.
791 doInsufficientSizeFallback = true;
Michael Butler1d541a32019-08-19 12:05:45 -0700792 continue;
793 }
794
David Gross948ffa82020-08-14 15:30:49 -0700795 // If CPU fallback is not allowed and there was an error, end execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800796 if (!mAllowCpuFallback) {
797 return {stepN, {}, {}};
David Gross24b141b2017-10-04 16:05:58 -0700798 }
Michael Butlera3632b62019-08-18 20:30:25 -0700799
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800800 // If CPU execution was already attempted, perform a full CPU fallback.
Michael Butlera3632b62019-08-18 20:30:25 -0700801 if (executorIsCpu) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800802 break;
Michael Butlera3632b62019-08-18 20:30:25 -0700803 }
804
805 // If the code reaches this point, attempt a partial fallback to CPU.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800806 CHECK(mAllowCpuFallback);
David Gross948ffa82020-08-14 15:30:49 -0700807 if (updateOutputShapes.zeroSizedInput) {
808 // Do not attempt a partial fallback.
809 break;
Michael Butlera3632b62019-08-18 20:30:25 -0700810 }
David Gross948ffa82020-08-14 15:30:49 -0700811 while (true) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800812 auto [fallbackN, fallbackOutputShapes, _, fallbackExecutor] =
813 cpuFallbackPartial(*mPlan, controller);
Michael Butlera3632b62019-08-18 20:30:25 -0700814
David Gross948ffa82020-08-14 15:30:49 -0700815 // Update global outputs and dynamic temporaries.
816 StepExecutor::UpdateOutputShapes fallbackUpdateOutputShapes = {};
817 if (fallbackExecutor != nullptr &&
818 !fallbackExecutor->updateOutputShapes(fallbackN, fallbackOutputShapes,
819 &outputShapes, &fallbackUpdateOutputShapes)) {
820 fallbackN = ANEURALNETWORKS_OP_FAILED;
821 }
Michael Butlera3632b62019-08-18 20:30:25 -0700822
David Gross948ffa82020-08-14 15:30:49 -0700823 // If execution was successful, continue to next step.
824 if (fallbackN == ANEURALNETWORKS_NO_ERROR) {
825 if (fallbackUpdateOutputShapes.zeroSizedInput) {
826 // We'll need to do full model CPU fallback
827 VLOG(EXECUTION) << "fallbackUpdateOutputShapes.zeroSizedInput";
828 fallbackN = ANEURALNETWORKS_OP_FAILED;
829 break;
830 }
831 CHECK(fallbackExecutor->areDynamicTemporariesAllocated());
David Gross948ffa82020-08-14 15:30:49 -0700832 goto nextStep;
833 }
Michael Butlera3632b62019-08-18 20:30:25 -0700834
David Gross948ffa82020-08-14 15:30:49 -0700835 if (fallbackN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
836 VLOG(EXECUTION) << "OUTPUT_INSUFFICIENT_SIZE: "
837 << toString(fallbackUpdateOutputShapes);
838 if (fallbackUpdateOutputShapes.mainOutputInsufficient ||
839 !fallbackUpdateOutputShapes.updatedDynamicTemporary) {
840 // Either:
841 // - At least one main model output is not of sufficient size; or
842 // - we didn't learn anything new about dynamic temporaries.
843 // Neither of these is recoverable, so end execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800844 return {fallbackN, outputShapes, {}};
David Gross948ffa82020-08-14 15:30:49 -0700845 }
846 // Every main model output is of sufficient size. This implies
847 // that at least one dynamic temporary is not of sufficient
848 // size. This is recoverable.
849 continue;
850 }
851
David Gross948ffa82020-08-14 15:30:49 -0700852 // If the code reaches this point, then there was an error with the
853 // fallback. In this case, attempt full fallback.
854 break;
Michael Butlera3632b62019-08-18 20:30:25 -0700855 }
856
857 // If the code reaches this point, then there was an error with the
858 // fallback. In this case, attempt full fallback.
859 break;
David Gross948ffa82020-08-14 15:30:49 -0700860
861 nextStep:
862 // Bottom of the outer loop
863 continue;
David Gross24b141b2017-10-04 16:05:58 -0700864 }
Michael Butlera3632b62019-08-18 20:30:25 -0700865
866 // If the code has reached this point, a potentially recoverable error
867 // occurred during the step executions. Instead, do a full execution
868 // fallback on the CPU.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800869 return cpuFallbackFull(this);
David Gross24b141b2017-10-04 16:05:58 -0700870}
871
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800872static bool waitForSyncFences(const std::vector<int>& waitFor) {
873 for (int syncFd : waitFor) {
874 if (syncFd > 0) {
875 auto r = syncWait(syncFd, -1);
876 if (r != FenceState::SIGNALED) {
877 VLOG(EXECUTION) << "syncWait failed, fd: " << syncFd;
878 return false;
879 }
880 }
881 }
882 return true;
883}
884
885std::tuple<int, int, ExecuteFencedInfoCallback> SimpleExecutionBuilder::computeFencedInternal(
886 const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence,
887 const OptionalTimePoint& deadline) {
888 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "SimpleExecutionBuilder::computeFencedInternal");
889 VLOG(EXECUTION) << "SimpleExecutionBuilder::computeFencedInternal";
890
Xusong Wang1d789c72021-02-11 15:04:07 -0800891 if (mExecutor == nullptr) {
892 mExecutor = mPlan->makeStepExecutor(mReusable, this);
893 }
894
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800895 auto [n, syncFd, callback] =
Xusong Wang1d789c72021-02-11 15:04:07 -0800896 mExecutor->computeFenced(waitFor, timeoutDurationAfterFence, deadline);
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800897
898 if (n == ANEURALNETWORKS_NO_ERROR) {
899 return {ANEURALNETWORKS_NO_ERROR, syncFd, callback};
900 }
901
902 // If CPU fallback is not allowed and there was an error, end execution.
903 if (!mAllowCpuFallback) {
904 return {n, -1, nullptr};
905 }
906
907 // If CPU execution was already attempted, return from the function with an error.
Xusong Wang1d789c72021-02-11 15:04:07 -0800908 if (mExecutor->isCpu()) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800909 return {n, -1, nullptr};
910 }
911
912 // If the code has reached this point, a potentially recoverable error
913 // occurred during the step executions. Instead, do a full execution
914 // fallback on the CPU.
915 VLOG(EXECUTION) << "Performing full fallback on the CPU.";
916 if (!waitForSyncFences(waitFor)) {
917 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr};
918 }
919 auto [fallbackN, fallbackOutputShapes, fallbackTiming] = cpuFallbackFull(this);
920 reportTimingWithoutFencedExecutionCallback(fallbackTiming);
921 return {fallbackN, -1, nullptr};
922}
923
924// In case of partitioned execution, computeFencedInternal call will return the sync
Miao Wange0227242019-12-20 16:07:39 -0800925// fence and the fenced compute callback returned from the last partition.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800926// Any failed partition will result in whole execution fallback to CPU if
927// mAllowCpuFallback is set to true.
928std::tuple<int, int, ExecuteFencedInfoCallback> CompoundExecutionBuilder::computeFencedInternal(
929 const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence,
930 const OptionalTimePoint& deadline) {
931 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "CompoundExecutionBuilder::computeFencedInternal");
932 VLOG(EXECUTION) << "CompoundExecutionBuilder::computeFencedInternal (from plan, iteratively)";
933
David Gross948ffa82020-08-14 15:30:49 -0700934 // We should have detected this earlier in the call chain and fallen back to
935 // non-fenced execution. This is an implementation limitation: In order to
936 // support dynamic temporarires in this code, we'd need to implement
937 // something like the following:
Xusong Wang1d789c72021-02-11 15:04:07 -0800938 // - If a partition has outputs of unknown size, compute that partition in a
David Gross948ffa82020-08-14 15:30:49 -0700939 // non fenced fashion, just as if it were scheduled on a driver that does
940 // not support fenced execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800941 // - Implement something similar to the code in CompoundExecutionBuilder::computeInternal()
David Gross948ffa82020-08-14 15:30:49 -0700942 // that handles a step execution that fails with
943 // ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800944 CHECK(!mCompilation->hasDynamicTemporaries());
Miao Wange0227242019-12-20 16:07:39 -0800945
Miao Wang9cc46762020-01-21 14:59:54 -0800946 // Initiate waitForFds, syncFence for the first step.
947 std::vector<int> waitForFds = waitFor;
Xusong Wang2ad9cac2021-07-12 17:17:50 -0700948 base::unique_fd syncFence;
Slava Shklyaev20b9bd12020-11-11 17:01:11 +0000949 ExecuteFencedInfoCallback executeFencedInfoCallback;
Miao Wange0227242019-12-20 16:07:39 -0800950
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800951 std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this, nullptr);
Miao Wange0227242019-12-20 16:07:39 -0800952 while (true) {
953 VLOG(EXECUTION) << "looking for next StepExecutor";
954
955 // Get the current step of the execution.
956 std::shared_ptr<StepExecutor> executor;
Xusong Wang2ad9cac2021-07-12 17:17:50 -0700957 int n = mPlan->next(controller, &executor, nullptr, nullptr, syncFence.get());
Miao Wange0227242019-12-20 16:07:39 -0800958 if (n != ANEURALNETWORKS_NO_ERROR) {
Miao Wang03d30a62020-03-05 15:30:29 -0800959 // During the interpreted execution of control flow, a loop timeout
960 // might occur in ExecutionPlan::next().
961 bool missedDeadline = n == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT ||
962 n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT;
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800963 if (mAllowCpuFallback && !missedDeadline) break;
Miao Wang9cc46762020-01-21 14:59:54 -0800964 // Return -1 for the sync fence fd, and nullptr for the callback.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800965 return {n, -1, nullptr};
Miao Wange0227242019-12-20 16:07:39 -0800966 }
967
968 // If the code reached the end of the plan without error, then return
969 // with no error.
970 if (executor == nullptr) {
Xusong Wang2ad9cac2021-07-12 17:17:50 -0700971 return {ANEURALNETWORKS_NO_ERROR, syncFence.release(), executeFencedInfoCallback};
Miao Wange0227242019-12-20 16:07:39 -0800972 }
Miao Wange0227242019-12-20 16:07:39 -0800973
Xusong Wang1d789c72021-02-11 15:04:07 -0800974 // Attempt to compute a single step of the execution.
Miao Wang9cc46762020-01-21 14:59:54 -0800975 auto [stepN, syncFd, callback] =
Michael Butlerb6016f62020-02-25 11:39:05 -0800976 executor->computeFenced(waitForFds, timeoutDurationAfterFence, deadline);
Miao Wange0227242019-12-20 16:07:39 -0800977
Miao Wang9cc46762020-01-21 14:59:54 -0800978 // Update waitForFds, syncFence for the next step.
Xusong Wang2ad9cac2021-07-12 17:17:50 -0700979 syncFence.reset(syncFd);
Slava Shklyaev20b9bd12020-11-11 17:01:11 +0000980 executeFencedInfoCallback = callback;
Miao Wang9cc46762020-01-21 14:59:54 -0800981 waitForFds.clear();
Xusong Wang2ad9cac2021-07-12 17:17:50 -0700982 if (syncFd >= 0) {
Miao Wang9cc46762020-01-21 14:59:54 -0800983 waitForFds = {syncFd};
984 }
Miao Wange0227242019-12-20 16:07:39 -0800985
986 // If execution was successful, continue to next step.
987 if (stepN == ANEURALNETWORKS_NO_ERROR) {
988 continue;
989 }
David Gross948ffa82020-08-14 15:30:49 -0700990 // If CPU fallback is not allowed and there was an error, end execution.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -0800991 if (!mAllowCpuFallback) {
992 return {stepN, -1, nullptr};
Miao Wange0227242019-12-20 16:07:39 -0800993 }
994
Miao Wange0227242019-12-20 16:07:39 -0800995 // If the code reaches this point, then there was an error with the
996 // fallback. In this case, attempt full fallback.
997 break;
998 }
999
1000 // If the code has reached this point, a potentially recoverable error
1001 // occurred during the step executions. Instead, do a full execution
1002 // fallback on the CPU.
1003 VLOG(EXECUTION) << "Performing full fallback on the CPU.";
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001004 if (!waitForSyncFences(waitFor)) {
1005 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr};
Miao Wange0227242019-12-20 16:07:39 -08001006 }
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001007 auto [fullN, fullOutputShapes, _] = cpuFallbackFull(this);
Xusong Wang2ad9cac2021-07-12 17:17:50 -07001008 return {fullN, -1, nullptr};
Miao Wange0227242019-12-20 16:07:39 -08001009}
1010
Miao Wang9cc46762020-01-21 14:59:54 -08001011int ExecutionBuilder::computeFenced(const std::vector<int>& waitFor,
1012 uint64_t timeoutDurationAfterFence, int* syncFence) {
1013 CHECK(syncFence != nullptr);
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001014 NN_RETURN_IF_ERROR(
1015 prepareForCompute("startComputeWithDependencies", ExecutionMode::ASYNC_WITH_DEPS));
Miao Wang9cc46762020-01-21 14:59:54 -08001016 if (timeoutDurationAfterFence > 0) {
1017 if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
1018 LOG(ERROR)
1019 << "ANeuralNetworksExecution_startComputeWithDependencies called with non-zero "
1020 "duration on an ANeuralNetworksExecution "
1021 "created from an ANeuralNetworksCompilation that was not created by "
1022 "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001023 return finishComputation(ANEURALNETWORKS_BAD_DATA, {}, ExecutionMode::ASYNC_WITH_DEPS);
Miao Wang9cc46762020-01-21 14:59:54 -08001024 }
Miao Wang9cc46762020-01-21 14:59:54 -08001025 }
Xusong Wang1d789c72021-02-11 15:04:07 -08001026 if (!areOutputsFullySpecified()) {
1027 LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
1028 " not all outputs have fully specified dimensions";
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001029 return finishComputation(ANEURALNETWORKS_BAD_DATA, {}, ExecutionMode::ASYNC_WITH_DEPS);
Miao Wang9cc46762020-01-21 14:59:54 -08001030 }
Xusong Wang2d704a52021-02-08 20:48:10 -08001031
1032 // Unlike ExecutionBuilder::compute, we do not need to reset output dimensions here because
1033 // fenced executions do not support dynamic output shape.
1034
Przemysław Szczepaniak13241e72020-11-27 19:51:47 +00001035 mComputeStartTimePoint = Clock::now();
Miao Wange0227242019-12-20 16:07:39 -08001036 VLOG(EXECUTION) << "ExecutionBuilder::computeFenced";
1037 int result;
Xusong Wang1d789c72021-02-11 15:04:07 -08001038 const auto deadline = makeDeadline(mTimeoutDuration);
Xusong Wangb1210122021-07-02 11:16:23 -07001039 std::tie(result, *syncFence, mFencedExecutionCallback) =
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001040 computeFencedInternal(waitFor, timeoutDurationAfterFence, deadline);
Xusong Wang2d704a52021-02-08 20:48:10 -08001041 // If there is an error, call finishComputation to mark the computation as completed.
1042 // Otherwise, we will call finishComputation in SyncFenceEvent::wait().
1043 if (result != ANEURALNETWORKS_NO_ERROR) {
1044 // TODO(miaowang): support dynamic output shape only with memory domain.
1045 // For now just return empty output shapes.
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001046 result = finishComputation(result, {}, ExecutionMode::ASYNC_WITH_DEPS);
Xusong Wang2d704a52021-02-08 20:48:10 -08001047 }
Miao Wange0227242019-12-20 16:07:39 -08001048 return result;
1049}
1050
Slava Shklyaeva6d95b12020-11-27 17:29:10 +00001051int ExecutionBuilder::compute(std::shared_ptr<ExecutionCallback>* synchronizationCallback,
Michael Butler2f6a6282019-01-24 02:36:37 -08001052 BurstBuilder* burstBuilder) {
Michael Butlera2a0e9a2019-01-29 11:20:30 -08001053 CHECK(synchronizationCallback == nullptr || burstBuilder == nullptr)
1054 << "synchronizationCallback and burstBuilder cannot simultaneously be used";
Michael Butler2f6a6282019-01-24 02:36:37 -08001055
David Grossa203d9a2018-11-15 21:10:05 -08001056 const bool synchronous = (synchronizationCallback == nullptr);
David Grossa203d9a2018-11-15 21:10:05 -08001057 if (!synchronous) {
1058 *synchronizationCallback = nullptr;
1059 }
David Gross15ebba42017-09-13 19:33:14 -07001060
Xusong Wang2d704a52021-02-08 20:48:10 -08001061 const char* name = burstBuilder ? "burstCompute" : synchronous ? "compute" : "startCompute";
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001062 const ExecutionMode mode = burstBuilder
1063 ? ExecutionMode::BURST
1064 : synchronous ? ExecutionMode::SYNC : ExecutionMode::ASYNC;
1065 NN_RETURN_IF_ERROR(prepareForCompute(name, mode));
Xusong Wang1d789c72021-02-11 15:04:07 -08001066
1067 // Validate input memory dimensions. We need to do the validation in every computation because
1068 // the memory dimensions may change between computations.
Jean-Luc Brouillet4fb1e852017-08-20 18:16:36 -07001069 for (auto& p : mInputs) {
Xusong Wang1d789c72021-02-11 15:04:07 -08001070 if (p.state() == ModelArgumentInfo::MEMORY) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001071 const RuntimeMemory* memory = mMemories[p.locationAndLength().poolIndex];
David Gross3f1b4542020-03-12 17:27:48 -07001072 if (!memory->getValidator().validateInputDimensions(p.dimensions())) {
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001073 return finishComputation(ANEURALNETWORKS_OP_FAILED, {}, mode);
Xusong Wangd39f9192019-11-27 15:45:42 -08001074 }
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001075 }
1076 }
Xusong Wang1d789c72021-02-11 15:04:07 -08001077
1078 // Reset output dimensions.
1079 if (!areOutputsFullySpecified()) {
1080 for (auto& output : mOutputs) {
1081 output.reset();
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001082 }
1083 }
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001084
Xusong Wang1d789c72021-02-11 15:04:07 -08001085 const auto deadline = makeDeadline(mTimeoutDuration);
Przemysław Szczepaniak13241e72020-11-27 19:51:47 +00001086 mComputeStartTimePoint = Clock::now();
David Grossa203d9a2018-11-15 21:10:05 -08001087 if (synchronous) {
David Gross0a963392020-09-18 14:16:31 -07001088 if (burstBuilder) {
1089 VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API, burst)";
1090 } else {
1091 VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)";
1092 }
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001093 const auto [n, outputShapes, timing] = computeInternal(deadline, burstBuilder);
David Gross257ee7a2019-01-23 14:59:10 -08001094 if (mMeasureTiming) {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001095 mTimingWithoutFencedExecutionCallback = timing;
David Gross257ee7a2019-01-23 14:59:10 -08001096 }
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001097 return finishComputation(n, outputShapes, mode);
Xusong Wang14d31482018-10-25 18:49:54 -07001098 } else /* asynchronous */ {
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001099 // TODO: For asynchronous execution, entire plan-based-path should run in an
1100 // asynchronous thread -- take the asynchronous thread logic out of
Xusong Wang1d789c72021-02-11 15:04:07 -08001101 // CpuExecution::compute() and use it to wrap the plan-based-path.
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001102
Xusong Wang14d31482018-10-25 18:49:54 -07001103 // TODO: use a thread pool
Michael Butler31e672b2019-08-17 17:40:29 -07001104 // TODO(mikie): this could have NNTRACE so we could measure the overhead
1105 // of spinning up a new thread.
Xusong Wang14d31482018-10-25 18:49:54 -07001106
1107 // Prepare the callback for asynchronous execution.
Slava Shklyaeva6d95b12020-11-27 17:29:10 +00001108 // std::shared_ptr<ExecutionCallback> object is returned when the
Xusong Wang14d31482018-10-25 18:49:54 -07001109 // execution has been successfully launched, otherwise a
1110 // nullptr is returned. The executionCallback is
1111 // abstracted in the NN API as an "event".
Slava Shklyaeva6d95b12020-11-27 17:29:10 +00001112 auto executionCallback = std::make_shared<ExecutionCallback>();
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001113 executionCallback->setOnFinish(
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001114 [this, mode](ErrorStatus error, const std::vector<OutputShape>& outputShapes) {
1115 return finishComputation(error, outputShapes, mode);
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001116 });
1117 const auto asyncStartCompute = [this, deadline, executionCallback] {
1118 const auto [n, outputShapes, timing] = computeInternal(deadline, nullptr);
1119 const auto status = convertResultCodeToErrorStatus(n);
1120 executionCallback->notify(status, outputShapes, timing);
1121 };
Xusong Wang14d31482018-10-25 18:49:54 -07001122 if (DeviceManager::get()->syncExecRuntime()) {
1123 VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)";
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001124 asyncStartCompute();
Xusong Wang14d31482018-10-25 18:49:54 -07001125 } else {
1126 VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)";
Xusong Wang5e6ae1b2021-02-08 21:40:31 -08001127 std::thread asyncExecution(asyncStartCompute);
Michael Butler1d541a32019-08-19 12:05:45 -07001128 executionCallback->bindThread(std::move(asyncExecution));
Xusong Wang14d31482018-10-25 18:49:54 -07001129 }
1130 *synchronizationCallback = executionCallback;
1131 return ANEURALNETWORKS_NO_ERROR;
David Grossa203d9a2018-11-15 21:10:05 -08001132 }
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001133}
1134
Michael Butler1d541a32019-08-19 12:05:45 -07001135std::vector<OutputShape> ExecutionBuilder::getInitialOutputShapes() const {
1136 std::vector<OutputShape> outputShapes(mOutputs.size());
1137 std::transform(mOutputs.begin(), mOutputs.end(), outputShapes.begin(),
1138 [](const auto& x) -> OutputShape {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001139 std::vector<uint32_t> dimensions;
Michael Butler0899ce92020-05-26 14:40:05 -07001140 if (x.state() != ModelArgumentInfo::HAS_NO_VALUE) {
1141 dimensions = x.dimensions();
1142 }
1143 return {.dimensions = std::move(dimensions), .isSufficient = true};
Michael Butler1d541a32019-08-19 12:05:45 -07001144 });
1145 return outputShapes;
Xusong Wang94b62fc2019-01-21 23:16:20 -08001146}
1147
1148// Check if the dimensions "to" is updatable by dimensions "from", where "from" must
David Gross948ffa82020-08-14 15:30:49 -07001149// have no lower a specification level.
Xusong Wang94b62fc2019-01-21 23:16:20 -08001150static bool isUpdatable(const std::vector<uint32_t>& to, const std::vector<uint32_t>& from) {
1151 if (to.size() == 0) return true;
1152 NN_RET_CHECK_EQ(to.size(), from.size());
1153 for (uint32_t i = 0; i < to.size(); i++) {
1154 NN_RET_CHECK(to[i] == from[i] || to[i] == 0);
1155 }
1156 return true;
1157}
1158
David Gross948ffa82020-08-14 15:30:49 -07001159static bool isZeroSizedTensor(int executionResultCode, const OutputShape& outputShape) {
1160 return (executionResultCode == ANEURALNETWORKS_NO_ERROR) && outputShape.isSufficient &&
1161 outputShape.dimensions.size() &&
1162 (std::find(outputShape.dimensions.begin(), outputShape.dimensions.end(), uint32_t(0)) !=
1163 outputShape.dimensions.end());
1164}
1165
1166bool ExecutionBuilder::updateOutputShapes(ErrorStatus status,
1167 const std::vector<OutputShape>& outputShapes) {
1168 NN_RET_CHECK(validateOutputShapesFromDriver(status, mModel, outputShapes));
1169
Xusong Wang94b62fc2019-01-21 23:16:20 -08001170 if (outputShapes.size() == 0) {
1171 return true;
1172 }
1173 NN_RET_CHECK_EQ(outputShapes.size(), mOutputs.size());
1174 for (uint32_t i = 0; i < outputShapes.size(); i++) {
1175 // Check if only unspecified dimensions or rank are overwritten.
David Gross3f1b4542020-03-12 17:27:48 -07001176 NN_RET_CHECK(isUpdatable(mOutputs[i].dimensions(), outputShapes[i].dimensions));
Xusong Wang8c498272020-03-30 15:15:07 -07001177 const OperandType operandType = mModel->getOutputOperand(i).type;
1178 NN_RET_CHECK(!TypeManager::get()->sizeOfDataOverflowsUInt32(operandType,
1179 outputShapes[i].dimensions));
Xusong Wang94b62fc2019-01-21 23:16:20 -08001180 }
1181 for (uint32_t i = 0; i < outputShapes.size(); i++) {
David Gross3f1b4542020-03-12 17:27:48 -07001182 mOutputs[i].dimensions() = outputShapes[i].dimensions;
1183 mOutputs[i].isSufficient() = outputShapes[i].isSufficient;
Xusong Wang94b62fc2019-01-21 23:16:20 -08001184 }
1185 return true;
1186}
1187
Xusong Wangd39f9192019-11-27 15:45:42 -08001188bool ExecutionBuilder::updateMemories() {
1189 for (const auto& output : mOutputs) {
David Gross3f1b4542020-03-12 17:27:48 -07001190 if (output.state() != ModelArgumentInfo::MEMORY) continue;
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001191 const RuntimeMemory* memory = mMemories[output.locationAndLength().poolIndex];
David Gross3f1b4542020-03-12 17:27:48 -07001192 NN_RET_CHECK(memory->getValidator().updateMetadata({.dimensions = output.dimensions()}));
Xusong Wangd39f9192019-11-27 15:45:42 -08001193 }
1194 return true;
1195}
1196
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001197int ExecutionBuilder::finishComputation(int result, const std::vector<OutputShape>& outputShapes,
1198 ExecutionMode mode) {
Xusong Wang2d704a52021-02-08 20:48:10 -08001199 const auto status = convertResultCodeToErrorStatus(result);
David Gross948ffa82020-08-14 15:30:49 -07001200 if (!updateOutputShapes(status, outputShapes) || !updateMemories()) {
Xusong Wang2d704a52021-02-08 20:48:10 -08001201 result = ANEURALNETWORKS_OP_FAILED;
Xusong Wang94b62fc2019-01-21 23:16:20 -08001202 }
Xusong Wang2d704a52021-02-08 20:48:10 -08001203 bool success = result == ANEURALNETWORKS_NO_ERROR;
Xusong Wangd39f9192019-11-27 15:45:42 -08001204 for (const auto& output : mOutputs) {
David Gross3f1b4542020-03-12 17:27:48 -07001205 if (output.state() != ModelArgumentInfo::MEMORY) continue;
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001206 const RuntimeMemory* memory = mMemories[output.locationAndLength().poolIndex];
Xusong Wangd39f9192019-11-27 15:45:42 -08001207 memory->getValidator().setInitialized(success);
1208 }
Xusong Wang2d704a52021-02-08 20:48:10 -08001209 switch (result) {
David Grossfdad2042020-03-31 16:11:16 -07001210 case ANEURALNETWORKS_NO_ERROR:
Xusong Wang2d704a52021-02-08 20:48:10 -08001211 mCompletion = Completion::NO_ERROR;
David Grossfdad2042020-03-31 16:11:16 -07001212 break;
1213 case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
Xusong Wang2d704a52021-02-08 20:48:10 -08001214 mCompletion = Completion::OUTPUT_INSUFFICIENT_SIZE;
David Grossfdad2042020-03-31 16:11:16 -07001215 break;
1216 default:
Xusong Wang2d704a52021-02-08 20:48:10 -08001217 mCompletion = Completion::OTHER_ERROR;
David Grossfdad2042020-03-31 16:11:16 -07001218 break;
1219 }
Xusong Wang2d704a52021-02-08 20:48:10 -08001220 {
Xusong Wang9ef21532021-03-18 10:38:50 -07001221 std::lock_guard<std::mutex> lock(mStateMutex);
Xusong Wang2d704a52021-02-08 20:48:10 -08001222 CHECK(mState != State::PREPARATION)
1223 << "ExecutionBuilder::finishComputation is called in the preparation state";
1224 CHECK(mState != State::COMPLETED) << "ExecutionBuilder::finishComputation is called twice";
1225 mState = State::COMPLETED;
1226 }
Przemysław Szczepaniakb9f01ee2020-11-27 11:31:39 +00001227 telemetry::onExecutionFinish(this, mode, result);
Xusong Wang2d704a52021-02-08 20:48:10 -08001228 return result;
Xusong Wang9d3c7bf2018-10-31 08:37:25 -07001229}
1230
David Gross948ffa82020-08-14 15:30:49 -07001231std::string toString(StepExecutor::UpdateOutputShapes updateOutputShapes) {
1232 return "{ .updatedDynamicTemporary = " +
1233 std::to_string(updateOutputShapes.updatedDynamicTemporary) +
1234 ", .mainOutputInsufficient = " +
1235 std::to_string(updateOutputShapes.mainOutputInsufficient) + "}";
1236}
1237
1238bool StepExecutor::updateOutputShapes(int executionResultCode, const std::vector<OutputShape>& from,
1239 std::vector<OutputShape>* to, UpdateOutputShapes* update) {
1240 CHECK(update != nullptr);
1241 *update = {.updatedDynamicTemporary = false,
1242 .mainOutputInsufficient = false,
1243 .zeroSizedInput = false};
1244
1245 NN_RET_CHECK(validateOutputShapesFromDriver(executionResultCode, mModel, from));
1246
Xusong Wang94b62fc2019-01-21 23:16:20 -08001247 if (from.size() == 0) {
1248 return true;
1249 }
David Gross948ffa82020-08-14 15:30:49 -07001250
1251 if (VLOG_IS_ON(EXECUTION)) {
1252 for (const auto& shape : from) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001253 VLOG(EXECUTION) << "updateOutputShapes: " << shape;
David Gross948ffa82020-08-14 15:30:49 -07001254 }
1255 }
1256
Xusong Wang94b62fc2019-01-21 23:16:20 -08001257 if (mExecutionStep != nullptr) {
Slava Shklyaev09453ae2019-11-22 12:24:58 +00001258 const auto& indexMapping = mExecutionStep->getOutputIndexStepModelToMainModel();
Xusong Wang94b62fc2019-01-21 23:16:20 -08001259 NN_RET_CHECK_LE(indexMapping.size(), from.size());
1260 for (uint32_t i = 0, e = indexMapping.size(); i < e; i++) {
David Gross948ffa82020-08-14 15:30:49 -07001261 const uint32_t toIndex = indexMapping[i];
Xusong Wang94b62fc2019-01-21 23:16:20 -08001262 NN_RET_CHECK_GT(to->size(), toIndex);
1263 NN_RET_CHECK(isUpdatable(to->at(toIndex).dimensions, from[i].dimensions));
1264 (*to)[toIndex] = from[i];
David Gross948ffa82020-08-14 15:30:49 -07001265 update->mainOutputInsufficient |= !(*to)[toIndex].isSufficient;
1266 if (mExecutionStep->getModelOutputsThatAreDownstreamInputs().count(toIndex) &&
1267 isZeroSizedTensor(executionResultCode, from[i])) {
1268 update->zeroSizedInput = true;
1269 }
1270 }
1271
1272 if (!mDynamicTemporaries->empty()) {
1273 // TODO(b/157236079): Instead of computing this here, precompute it in ExecutionStep?
1274 std::map<uint32_t, uint32_t> operandIndexStepModelOutputToSourceModelTemp;
1275 for (const auto& entry : mExecutionStep->getTempsAsStepModelOutputs()) {
1276 operandIndexStepModelOutputToSourceModelTemp.emplace(entry.second, entry.first);
1277 }
1278
1279 const uint32_t sourceModelIndex = mExecutionStep->getSourceModelIndex();
1280 for (uint32_t i = 0, e = mModel->outputCount(); i < e; i++) {
1281 const uint32_t stepModelOperandIndex = mModel->getOutputOperandIndex(i);
1282 const auto it =
1283 operandIndexStepModelOutputToSourceModelTemp.find(stepModelOperandIndex);
1284 if (it == operandIndexStepModelOutputToSourceModelTemp.end()) {
1285 continue;
1286 }
1287 const auto sourceOperandIndex = SourceOperandIndex(sourceModelIndex, it->second);
1288 VLOG(EXECUTION) << "updateOutputShapes checking to see if output#" << i
1289 << " sourceOperandIndex = (" << sourceOperandIndex.first << ", "
1290 << sourceOperandIndex.second << ") is a dynamic temporary";
1291 // This is a temporary, but it might not be a dynamic temporary.
1292 const auto loc = mDynamicTemporaries->lookup(sourceOperandIndex, false);
1293 if (loc == std::nullopt) {
1294 continue;
1295 }
1296 NN_RET_CHECK(isUpdatable(*loc->dimensions, from[i].dimensions));
1297 bool changedShape = false;
1298 const uint32_t actualSize = TypeManager::get()->getSizeOfData(
1299 mModel->getOperand(stepModelOperandIndex).type, from[i].dimensions);
1300 if (actualSize > 0) {
1301 changedShape = mDynamicTemporaries->redeclare(sourceOperandIndex,
1302 from[i].dimensions, actualSize);
1303 } else if (!from[i].isSufficient) {
Xusong Wang3e3915b2021-02-23 18:34:17 -08001304 NN_RET_CHECK(loc->paddedLength < UINT32_MAX / 2)
1305 << "output#" << i << " paddedLength overflow";
David Gross948ffa82020-08-14 15:30:49 -07001306 changedShape = mDynamicTemporaries->redeclare(
Xusong Wang3e3915b2021-02-23 18:34:17 -08001307 sourceOperandIndex, from[i].dimensions, 2 * loc->paddedLength);
David Gross948ffa82020-08-14 15:30:49 -07001308 } else {
1309 // The combination of not-fully-specified dimensions
1310 // and isSufficient means that we have no
1311 // information about whether the size of the dynamic
1312 // temporary is adequate.
1313 VLOG(EXECUTION) << "updateOutputShapes skipping redeclaration for output#" << i;
1314 if (executionResultCode == ANEURALNETWORKS_NO_ERROR) {
1315 NN_RET_CHECK(isZeroSizedTensor(executionResultCode, from[i]));
1316 // This is a zero-sized tensor, and by
1317 // definition, any dynamic temporary is an input
1318 // to an execution step.
1319 update->zeroSizedInput = true;
1320 }
1321 }
1322 if (changedShape) {
1323 // TODO: find a better place for this comment.
1324 //
1325 // isUpdatable(a, b) imposes a partial ordering a <=
1326 // b. Every fully specified dimensions vector is an
1327 // upper bound of that ordering. Therefore, any
1328 // change in dimensions moves towards an upper
1329 // bound, and hence there are a finite number of
1330 // such changes possible.
1331 //
1332 // actualSize can only be computed from dimensions
1333 // that are an upper bound. Therefore, once
1334 // actualSize is computed, it will not change.
1335 //
1336 // If dimensions are not fully specified, and
1337 // estimated size changes, it increases. There is
1338 // an upper bound on estimated size to avoid
1339 // overflow.
1340 //
1341 // Therefore, if we retry only when dimensions or
1342 // size chage, and we stop retrying if we would
1343 // otherwise overflow, we should only retry a finite
1344 // number of times.
1345 update->updatedDynamicTemporary = true;
1346 }
1347 }
1348 mDynamicTemporaries->vlogDump("finished updateOutputShapes");
Xusong Wang94b62fc2019-01-21 23:16:20 -08001349 }
1350 } else {
1351 NN_RET_CHECK_EQ(from.size(), to->size());
1352 for (uint32_t i = 0, e = from.size(); i < e; i++) {
1353 NN_RET_CHECK(isUpdatable(to->at(i).dimensions, from[i].dimensions));
1354 (*to)[i] = from[i];
1355 }
1356 }
1357 return true;
1358}
1359
David Gross257ee7a2019-01-23 14:59:10 -08001360StepExecutor::StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model,
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001361 std::shared_ptr<Device> device,
Xusong Wang1d789c72021-02-11 15:04:07 -08001362 std::shared_ptr<RuntimePreparedModel> preparedModel, bool reusable,
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001363 const ExecutionStep* step, DynamicTemporaries* dynamicTemporaries)
Xusong Wangc73a89b2018-11-05 09:59:30 -08001364 : mExecutionBuilder(executionBuilder),
Slava Shklyaev20bd5352019-12-13 16:46:14 +00001365 mExecutionStep(step),
David Gross948ffa82020-08-14 15:30:49 -07001366 mDynamicTemporaries(dynamicTemporaries),
Xusong Wangc73a89b2018-11-05 09:59:30 -08001367 mModel(model),
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001368 mDevice(device),
Xusong Wangc73a89b2018-11-05 09:59:30 -08001369 mPreparedModel(preparedModel),
1370 mInputs(model->inputCount()),
Xusong Wang1d789c72021-02-11 15:04:07 -08001371 mOutputs(model->outputCount()),
1372 mReusable(reusable) {
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001373 CHECK(mDevice != nullptr);
David Gross948ffa82020-08-14 15:30:49 -07001374 CHECK_EQ(step == nullptr, dynamicTemporaries == nullptr);
Xusong Wang1d789c72021-02-11 15:04:07 -08001375 CHECK(!(reusable && dynamicTemporaries != nullptr));
Slava Shklyaev20bd5352019-12-13 16:46:14 +00001376 VLOG(EXECUTION) << "StepExecutor::StepExecutor with " << mInputs.size() << " inputs and "
1377 << mOutputs.size() << " outputs";
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001378}
David Gross2b79e902017-10-01 15:26:33 -07001379
David Gross948ffa82020-08-14 15:30:49 -07001380bool StepExecutor::areDynamicTemporariesAllocated() const {
1381 return !mDynamicTemporaries || mDynamicTemporaries->allocated(mExecutionStep->getIndex());
1382}
1383
David Gross2b79e902017-10-01 15:26:33 -07001384void StepExecutor::mapInputsAndOutputsTrivially() {
1385 mInputs = mExecutionBuilder->mInputs;
1386 mOutputs = mExecutionBuilder->mOutputs;
1387 mMemories = mExecutionBuilder->mMemories;
1388}
1389
David Grossdf068ab2017-10-01 20:48:10 -07001390void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
David Grossbb38a422020-09-22 14:51:37 -07001391 ModelArgumentInfo* executorInputOrOutput,
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001392 const Dimensions* builderDimensions) {
David Grossbb38a422020-09-22 14:51:37 -07001393 auto updateDimensions = [executorInputOrOutput, builderDimensions] {
1394 if (!builderDimensions) {
1395 return;
1396 }
1397 executorInputOrOutput->dimensions() = *builderDimensions;
1398 };
1399
David Grossdf068ab2017-10-01 20:48:10 -07001400 *executorInputOrOutput = builderInputOrOutput;
David Gross3f1b4542020-03-12 17:27:48 -07001401 switch (executorInputOrOutput->state()) {
David Grossdf068ab2017-10-01 20:48:10 -07001402 default:
Slava Shklyaev20bd5352019-12-13 16:46:14 +00001403 CHECK(false) << "unexpected ModelArgumentInfo::state";
Slava Shklyaev6e4a90e2018-11-01 16:59:26 +00001404 break;
David Gross6ff88002018-06-01 11:01:12 -07001405 case ModelArgumentInfo::HAS_NO_VALUE:
David Grossdf068ab2017-10-01 20:48:10 -07001406 case ModelArgumentInfo::UNSPECIFIED:
1407 break;
David Grossbb38a422020-09-22 14:51:37 -07001408 case ModelArgumentInfo::POINTER:
1409 updateDimensions();
1410 break;
David Grossdf068ab2017-10-01 20:48:10 -07001411 case ModelArgumentInfo::MEMORY: {
David Grossbb38a422020-09-22 14:51:37 -07001412 updateDimensions();
David Gross3f1b4542020-03-12 17:27:48 -07001413 const uint32_t builderPoolIndex = builderInputOrOutput.locationAndLength().poolIndex;
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001414 const RuntimeMemory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
David Grossdf068ab2017-10-01 20:48:10 -07001415 const uint32_t executorPoolIndex = mMemories.add(memory);
David Gross3f1b4542020-03-12 17:27:48 -07001416 executorInputOrOutput->locationAndLength().poolIndex = executorPoolIndex;
David Grossdf068ab2017-10-01 20:48:10 -07001417 break;
1418 }
1419 }
1420}
1421
Slava Shklyaev20bd5352019-12-13 16:46:14 +00001422int StepExecutor::setInputOrOutputFromMemory(const Operand& inputOrOutputOperand,
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001423 const RuntimeMemory* memory, uint32_t offset,
Xusong Wang3e3915b2021-02-23 18:34:17 -08001424 uint32_t length, const Dimensions& dimensions,
Slava Shklyaev20bd5352019-12-13 16:46:14 +00001425 ModelArgumentInfo* inputOrOutputInfo) {
David Gross14720132017-10-02 14:40:09 -07001426 // Should be similar to
1427 // ExecutionBuilder::setInputFromMemory()
1428 // ExecutionBuilder::setOutputFromMemory()
1429
1430 uint32_t poolIndex = mMemories.add(memory);
David Gross3f1b4542020-03-12 17:27:48 -07001431 CHECK(inputOrOutputInfo->unspecified());
1432 int n;
1433 std::tie(n, *inputOrOutputInfo) =
1434 ModelArgumentInfo::createFromMemory(inputOrOutputOperand,
Xusong Wang3e3915b2021-02-23 18:34:17 -08001435 /*type=*/nullptr, poolIndex, offset, length);
David Gross948ffa82020-08-14 15:30:49 -07001436 if (n == ANEURALNETWORKS_NO_ERROR && dimensions.size()) {
1437 CHECK(isUpdatable(inputOrOutputInfo->dimensions(), dimensions));
1438 inputOrOutputInfo->dimensions() = dimensions;
1439 }
David Gross3f1b4542020-03-12 17:27:48 -07001440 return n;
David Gross14720132017-10-02 14:40:09 -07001441}
1442
David Gross948ffa82020-08-14 15:30:49 -07001443static std::string toString(std::vector<uint32_t> dimensions) {
1444 std::string ret = "(";
1445 bool wroteOne = false;
1446 for (uint32_t dimension : dimensions) {
1447 if (wroteOne) {
1448 ret += ", ";
1449 } else {
1450 wroteOne = true;
1451 }
1452 ret += std::to_string(dimension);
1453 }
1454 ret += ")";
1455 return ret;
1456};
1457
Miao Wang484e9702019-01-16 13:42:15 -08001458static void logArguments(const char* kind, const std::vector<ModelArgumentInfo>& args) {
David Grossa1d08a92017-11-15 18:04:01 -08001459 for (unsigned i = 0; i < args.size(); i++) {
1460 const auto& arg = args[i];
1461 std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
David Gross3f1b4542020-03-12 17:27:48 -07001462 switch (arg.state()) {
David Grossa1d08a92017-11-15 18:04:01 -08001463 case ModelArgumentInfo::POINTER:
David Gross948ffa82020-08-14 15:30:49 -07001464 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer()) << ") dim"
1465 << toString(arg.dimensions());
David Grossa1d08a92017-11-15 18:04:01 -08001466 break;
1467 case ModelArgumentInfo::MEMORY:
1468 VLOG(EXECUTION) << prefix << "MEMORY("
David Gross3f1b4542020-03-12 17:27:48 -07001469 << "pool=" << arg.locationAndLength().poolIndex << ", "
David Gross948ffa82020-08-14 15:30:49 -07001470 << "off=" << arg.locationAndLength().offset << ") dim"
1471 << toString(arg.dimensions());
David Grossa1d08a92017-11-15 18:04:01 -08001472 break;
1473 case ModelArgumentInfo::HAS_NO_VALUE:
1474 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
1475 break;
1476 case ModelArgumentInfo::UNSPECIFIED:
1477 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
1478 break;
1479 default:
David Gross3f1b4542020-03-12 17:27:48 -07001480 VLOG(EXECUTION) << prefix << "state(" << arg.state() << ")";
David Grossa1d08a92017-11-15 18:04:01 -08001481 break;
1482 }
1483 }
1484}
1485
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001486bool StepExecutor::isCpu() const {
Xusong Wang001be4b2019-07-02 13:53:25 -07001487 return mDevice == DeviceManager::getCpuDevice();
Slava Shklyaeve5c52dd2019-01-18 14:48:07 +00001488}
1489
Xusong Wang1d789c72021-02-11 15:04:07 -08001490std::pair<int, std::shared_ptr<RuntimeExecution>> StepExecutor::getReusableExecution() {
1491 CHECK(mReusable);
1492 if (mExecution == nullptr) {
1493 CHECK(mPreparedModel != nullptr);
1494 const MeasureTiming measure = measureTiming(mExecutionBuilder);
1495 const OptionalDuration loopTimeoutDuration =
1496 makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
1497 auto [n, execution] = mPreparedModel->createReusableExecution(
Miao Wange1797862021-10-21 19:35:04 +00001498 mInputs, mOutputs, mMemories.getObjects(), measure, loopTimeoutDuration,
1499 mExecutionBuilder->getMetadata());
Xusong Wang1d789c72021-02-11 15:04:07 -08001500 if (n != ANEURALNETWORKS_NO_ERROR) {
1501 return {n, nullptr};
1502 }
1503 mExecution = std::move(execution);
1504 }
1505 return {ANEURALNETWORKS_NO_ERROR, mExecution};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001506}
1507
Xusong Wang1d789c72021-02-11 15:04:07 -08001508std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::compute(
1509 const OptionalTimePoint& deadline, const SharedBurst& burstController) {
David Grossa1d08a92017-11-15 18:04:01 -08001510 if (VLOG_IS_ON(EXECUTION)) {
1511 logArguments("input", mInputs);
1512 logArguments("output", mOutputs);
1513 }
David Gross2b79e902017-10-01 15:26:33 -07001514
Xusong Wang1d789c72021-02-11 15:04:07 -08001515 int n;
1516 std::vector<OutputShape> outputShapes;
1517 Timing timing;
1518 if (mReusable) {
1519 auto [nCreate, execution] = getReusableExecution();
1520 if (nCreate != ANEURALNETWORKS_NO_ERROR) {
1521 return {nCreate, {}, {}};
1522 }
1523 std::tie(n, outputShapes, timing) = execution->compute(burstController, deadline);
1524 } else {
1525 CHECK(mPreparedModel != nullptr);
1526 const MeasureTiming measure = measureTiming(mExecutionBuilder);
1527 const OptionalDuration loopTimeoutDuration =
1528 makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
Miao Wange1797862021-10-21 19:35:04 +00001529 std::tie(n, outputShapes, timing) = mPreparedModel->execute(
1530 mInputs, mOutputs, mMemories.getObjects(), burstController, measure, deadline,
1531 loopTimeoutDuration, mExecutionBuilder->getMetadata());
Xusong Wang1d789c72021-02-11 15:04:07 -08001532 }
David Grossfdad2042020-03-31 16:11:16 -07001533 mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing);
Xusong Wang1d789c72021-02-11 15:04:07 -08001534 return {n, std::move(outputShapes), std::move(timing)};
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001535}
1536
Slava Shklyaev20b9bd12020-11-11 17:01:11 +00001537std::tuple<int, int, ExecuteFencedInfoCallback> StepExecutor::computeFenced(
Michael Butlerb6016f62020-02-25 11:39:05 -08001538 const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence,
Michael Butlerdcea61d2020-12-04 17:39:38 -08001539 const OptionalTimePoint& deadline) {
Miao Wange0227242019-12-20 16:07:39 -08001540 if (VLOG_IS_ON(EXECUTION)) {
1541 logArguments("input", mInputs);
1542 logArguments("output", mOutputs);
1543 }
1544
Michael Butlerdcea61d2020-12-04 17:39:38 -08001545 OptionalDuration optionalTimeoutDurationAfterFence;
Miao Wang9cc46762020-01-21 14:59:54 -08001546 if (timeoutDurationAfterFence > 0) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001547 optionalTimeoutDurationAfterFence = makeTimeoutDuration(timeoutDurationAfterFence);
Miao Wang9cc46762020-01-21 14:59:54 -08001548 }
Xusong Wang1d789c72021-02-11 15:04:07 -08001549
1550 int n;
1551 int syncFenceFd;
1552 ExecuteFencedInfoCallback executeFencedInfoCallback;
1553 Timing timing;
1554 if (mReusable) {
1555 auto [nCreate, execution] = getReusableExecution();
1556 if (nCreate != ANEURALNETWORKS_NO_ERROR) {
1557 return {nCreate, -1, nullptr};
1558 }
1559 std::tie(n, syncFenceFd, executeFencedInfoCallback, timing) =
1560 execution->computeFenced(waitFor, deadline, optionalTimeoutDurationAfterFence);
1561 } else {
1562 CHECK(mPreparedModel != nullptr);
1563 const MeasureTiming measure = measureTiming(mExecutionBuilder);
1564 const OptionalDuration loopTimeoutDuration =
1565 makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
1566 std::tie(n, syncFenceFd, executeFencedInfoCallback, timing) = mPreparedModel->executeFenced(
1567 mInputs, mOutputs, mMemories.getObjects(), waitFor, measure, deadline,
Miao Wange1797862021-10-21 19:35:04 +00001568 loopTimeoutDuration, optionalTimeoutDurationAfterFence,
1569 mExecutionBuilder->getMetadata());
Xusong Wang1d789c72021-02-11 15:04:07 -08001570 }
Slava Shklyaev20b9bd12020-11-11 17:01:11 +00001571 if (syncFenceFd < 0 && executeFencedInfoCallback == nullptr) {
David Grossfdad2042020-03-31 16:11:16 -07001572 mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing);
Miao Wange0227242019-12-20 16:07:39 -08001573 }
Slava Shklyaev20b9bd12020-11-11 17:01:11 +00001574 return {n, syncFenceFd, executeFencedInfoCallback};
Miao Wange0227242019-12-20 16:07:39 -08001575}
1576
Xusong Wang001be4b2019-07-02 13:53:25 -07001577// For cpuFallback{Partial,Full}, recompile the model on CPU and then start compute.
Michael Butler1d541a32019-08-19 12:05:45 -07001578std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::computeOnCpuFallback() {
1579 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "StepExecutor::computeOnCpuFallback");
Xusong Wang001be4b2019-07-02 13:53:25 -07001580 VLOG(EXECUTION) << "Re-compile the model on CPU";
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001581 const ModelFactory makeModel = [this] { return mModel->makeModel(); };
Michael Butlerbf258232019-12-16 18:32:45 -08001582 // TODO: Propagate user preference and compilation priority to this point instead of using
1583 // default values of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER and
1584 // ANEURALNETWORKS_PRIORITY_MEDIUM
Michael Butlerf89b1412019-09-22 22:46:48 -07001585 const ExecutionPreference preference =
Xusong Wang001be4b2019-07-02 13:53:25 -07001586 static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001587 const Priority priority = convertToCanonicalPriority(ANEURALNETWORKS_PRIORITY_DEFAULT);
Miao Wange1797862021-10-21 19:35:04 +00001588 auto [n, preparedModel] = DeviceManager::getCpuDevice()->prepareModel(
1589 makeModel, preference, priority, {}, {}, {}, {}, {});
Michael Butler1d541a32019-08-19 12:05:45 -07001590 if (n != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001591 return {n, {}, {}};
Michael Butler1d541a32019-08-19 12:05:45 -07001592 }
Xusong Wangb3f9c622020-02-20 12:37:51 -08001593
1594 // Prepare device memories for CPU fallback.
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001595 std::vector<const RuntimeMemory*> memories = mMemories.getObjects();
Xusong Wangb3f9c622020-02-20 12:37:51 -08001596 std::vector<bool> isUsedAsInput(memories.size(), false);
1597 std::vector<bool> isUsedAsOutput(memories.size(), false);
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001598 std::vector<std::unique_ptr<RuntimeMemory>> blobAhwbs;
Xusong Wangb3f9c622020-02-20 12:37:51 -08001599
1600 // Mark the input and output usages.
1601 for (auto& input : mInputs) {
1602 if (input.state() == ModelArgumentInfo::MEMORY) {
1603 const uint32_t poolIndex = input.locationAndLength().poolIndex;
1604 isUsedAsInput[poolIndex] = true;
1605 }
1606 }
1607 for (auto& output : mOutputs) {
1608 if (output.state() == ModelArgumentInfo::MEMORY) {
1609 const uint32_t poolIndex = output.locationAndLength().poolIndex;
1610 // Cannot allocate output buffers with unknown shapes.
1611 if (mMemories[poolIndex]->getValidator().createdWithUnknownShape()) {
1612 LOG(ERROR) << "Cannot fallback to CPU because at least one of the output operands "
1613 "has unknown shape.";
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001614 return {ANEURALNETWORKS_OP_FAILED, {}, {}};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001615 }
1616 isUsedAsOutput[poolIndex] = true;
1617 }
1618 }
1619
1620 // Allocate BLOB mode AHardwareBuffers and read the data from input device memories.
1621 for (uint32_t i = 0; i < memories.size(); i++) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001622 const RuntimeMemory* memory = mMemories[i];
Xusong Wangb3f9c622020-02-20 12:37:51 -08001623 if (memory->getIBuffer() != nullptr) {
1624 const uint32_t size = memory->getValidator().getMetadata().logicalSize;
1625 auto [nAhwb, blobAhwb] = MemoryRuntimeAHWB::create(size);
1626 if (nAhwb != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001627 return {nAhwb, {}, {}};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001628 }
1629 if (isUsedAsInput[i]) {
Slava Shklyaev3698ad42020-11-06 13:50:31 +00001630 n = copyIBufferToMemory(memory->getIBuffer(), blobAhwb->getMemory());
Xusong Wangb3f9c622020-02-20 12:37:51 -08001631 if (n != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001632 return {n, {}, {}};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001633 }
1634 }
1635 memories[i] = blobAhwb.get();
1636 blobAhwbs.push_back(std::move(blobAhwb));
1637 }
1638 }
1639
Xusong Wang1d789c72021-02-11 15:04:07 -08001640 const MeasureTiming measure = measureTiming(mExecutionBuilder);
1641 const OptionalDuration loopTimeoutDuration =
1642 makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
1643 auto [nExecute, outputShapes, timing] = preparedModel->execute(
Miao Wange1797862021-10-21 19:35:04 +00001644 mInputs, mOutputs, memories, nullptr, measure, {}, loopTimeoutDuration, {});
Xusong Wang1d789c72021-02-11 15:04:07 -08001645 mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing);
1646 if (nExecute != ANEURALNETWORKS_NO_ERROR) {
1647 return {nExecute, std::move(outputShapes), timing};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001648 }
1649
1650 // Write back to output device memories.
1651 for (uint32_t i = 0; i < memories.size(); i++) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001652 const RuntimeMemory* memory = mMemories[i];
Xusong Wangb3f9c622020-02-20 12:37:51 -08001653 if (memory->getIBuffer() != nullptr && isUsedAsOutput[i]) {
Slava Shklyaev3698ad42020-11-06 13:50:31 +00001654 n = copyMemoryToIBuffer(memories[i]->getMemory(), memory->getIBuffer(), {});
Xusong Wangb3f9c622020-02-20 12:37:51 -08001655 if (n != ANEURALNETWORKS_NO_ERROR) {
Slava Shklyaev9f29f432020-08-13 13:16:03 +01001656 return {n, {}, {}};
Xusong Wangb3f9c622020-02-20 12:37:51 -08001657 }
1658 }
1659 }
1660 return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
Jean-Luc Brouillet873c0082017-07-25 00:17:50 -07001661}
1662
Jean-Luc Brouillet5d5150d2017-09-02 23:05:37 -07001663} // namespace nn
1664} // namespace android