blob: e8531bedc5bb453710daef8d850053de71b65f8d [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "ExecutionBuilder"
#include "ExecutionBuilder.h"
#include "CompilationBuilder.h"
#include "CpuExecutor.h"
#include "HalInterfaces.h"
#include "Manager.h"
#include "ModelBuilder.h"
#include <mutex>
#include <thread>
#include <vector>
namespace android {
namespace nn {
int ModelArgumentInfo::setFromPointer(const Operand& operand,
const ANeuralNetworksOperandType* type, void* data,
uint32_t length) {
int n = updateDimensionInfo(operand, type);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
state = ModelArgumentInfo::POINTER;
locationAndDimension.location = {.poolIndex = 0, .offset = 0, .length = length};
buffer = data;
return ANEURALNETWORKS_NO_ERROR;
}
int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
uint32_t poolIndex, uint32_t offset, uint32_t length) {
int n = updateDimensionInfo(operand, type);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
state = ModelArgumentInfo::MEMORY;
locationAndDimension.location = {.poolIndex = poolIndex, .offset = offset, .length = length};
buffer = nullptr;
return ANEURALNETWORKS_NO_ERROR;
}
int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
const ANeuralNetworksOperandType* newType) {
if (newType == nullptr) {
locationAndDimension.dimensions = hidl_vec<uint32_t>();
} else {
uint32_t count = newType->dimensionCount;
if (static_cast<OperandType>(newType->type) != operand.type ||
count != operand.dimensions.size()) {
LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types";
return ANEURALNETWORKS_BAD_DATA;
}
for (uint32_t i = 0; i < count; i++) {
locationAndDimension.dimensions[i] = newType->dimensions[i];
}
}
return ANEURALNETWORKS_NO_ERROR;
}
ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
mModel(compilation->mModel),
mInputs(mModel->inputCount()),
mOutputs(mModel->outputCount()),
mMemories(mModel->getMemories()) {
LOG(DEBUG) << "ExecutionBuilder::ExecutionBuilder";
for (auto& p : mInputs) {
p.state = ModelArgumentInfo::UNSPECIFIED;
}
for (auto& p : mOutputs) {
p.state = ModelArgumentInfo::UNSPECIFIED;
}
}
int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
const void* buffer, uint32_t length) {
uint32_t count = static_cast<uint32_t>(mInputs.size());
if (index >= count) {
LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
return ANEURALNETWORKS_BAD_DATA;
}
return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
const_cast<void*>(buffer), length);
}
int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
const Memory* memory, uint32_t offset, uint32_t length) {
uint32_t count = static_cast<uint32_t>(mInputs.size());
if (index >= count) {
LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
<< count;
return ANEURALNETWORKS_BAD_DATA;
}
if (!memory->validateSize(offset, length)) {
return ANEURALNETWORKS_BAD_DATA;
}
uint32_t poolIndex = mMemories.add(memory);
return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
length);
}
int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
uint32_t length) {
uint32_t count = static_cast<uint32_t>(mOutputs.size());
if (index >= count) {
LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
return ANEURALNETWORKS_BAD_DATA;
}
return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, length);
}
int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
const Memory* memory, uint32_t offset, uint32_t length) {
uint32_t count = static_cast<uint32_t>(mOutputs.size());
if (index >= count) {
LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
<< count;
return ANEURALNETWORKS_BAD_DATA;
}
if (!memory->validateSize(offset, length)) {
return ANEURALNETWORKS_BAD_DATA;
}
uint32_t poolIndex = mMemories.add(memory);
return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
length);
}
int ExecutionBuilder::startCompute() {
// TODO validate that we have full types for all inputs and outputs,
// that the graph is not cyclic,
/*
TODO: For non-optional inputs, also verify that buffers are not null.
for (auto& p : mInputs) {
if (p.state == ModelArgumentInfo::UNSPECIFIED) {
LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
return ANEURALNETWORKS_BAD_DATA;
}
}
*/
for (auto& p : mOutputs) {
if (p.state == ModelArgumentInfo::UNSPECIFIED) {
LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
return ANEURALNETWORKS_BAD_DATA;
}
}
LOG(DEBUG) << "ExecutionBuilder::startCompute";
std::shared_ptr<Device> device = DeviceManager::get()->getAvailableDriver();
Model model;
mModel->setHidlModel(&model);
return device == nullptr ? startComputeOnCpu(model)
: startComputeOnDevice(device->getInterface(), model);
}
int ExecutionBuilder::wait() {
if (mEvent == nullptr) {
LOG(ERROR) << "ANeuralNetworksExecution_wait without execution in flight";
return ANEURALNETWORKS_BAD_STATE;
}
mEvent->wait();
return ANEURALNETWORKS_NO_ERROR; // TODO shouldn't we look at wait()'s return value?
}
// Figures out how to place each of the input or outputs in a buffer. This just does the layout,
// it does not copy data. Aligns each input a bit.
int ExecutionBuilder::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
Memory* memory) {
uint32_t nextPoolIndex = mMemories.size();
int64_t total = 0;
for (auto& info : *args) {
if (info.state == ModelArgumentInfo::POINTER) {
DataLocation& loc = info.locationAndDimension.location;
// TODO Good enough alignment?
total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
loc.poolIndex = nextPoolIndex;
loc.offset = static_cast<uint32_t>(total);
total += loc.length;
}
};
if (total > 0xFFFFFFFF) {
LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
"2^32.";
return ANEURALNETWORKS_BAD_DATA;
}
hidl_memory hidlMemory;
if (total > 0) {
memory->create(total); // TODO check error
mMemories.add(memory);
}
return ANEURALNETWORKS_NO_ERROR;
}
static void copyLocationAndDimension(const std::vector<ModelArgumentInfo>& argumentInfos,
hidl_vec<RequestArgument>* ioInfos) {
size_t count = argumentInfos.size();
ioInfos->resize(count);
for (size_t i = 0; i < count; i++) {
(*ioInfos)[i] = argumentInfos[i].locationAndDimension;
}
}
int ExecutionBuilder::startComputeOnDevice(sp<IDevice> driver, const Model& model) {
LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice";
// TODO Dangerous! In async, the model will outlive it here. Safe for now
sp<Event> preparationEvent = new Event();
ErrorStatus prepareStatus = ErrorStatus::GENERAL_FAILURE;
sp<IPreparedModel> preparedModel;
driver->prepareModel(model, preparationEvent,
[&](ErrorStatus status, const sp<IPreparedModel>& prepared) {
prepareStatus = status;
preparedModel = prepared;
});
// Immediately synchronize with event for now
// TODO: change to asynchronous later
Event::Status eventStatus = preparationEvent->wait();
if (prepareStatus != ErrorStatus::NONE || preparedModel == nullptr ||
eventStatus != Event::Status::SUCCESS) {
return ANEURALNETWORKS_OP_FAILED;
}
// Layout the input and output data
int n = allocatePointerArgumentsToPool(&mInputs, &mInputPointerArguments);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
n = allocatePointerArgumentsToPool(&mOutputs, &mOutputPointerArguments);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
// Copy the input data that was specified via a pointer.
// mInputPointerArguments.update();
for (auto& info : mInputs) {
if (info.state == ModelArgumentInfo::POINTER) {
DataLocation& loc = info.locationAndDimension.location;
uint8_t* data = nullptr;
int n = mInputPointerArguments.getPointer(&data);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
memcpy(data + loc.offset, info.buffer, loc.length);
}
}
// TODO: Add mInputPointerArguments.commit() and .update() at all the right places
Request request;
copyLocationAndDimension(mInputs, &request.inputs);
copyLocationAndDimension(mOutputs, &request.outputs);
uint32_t count = mMemories.size();
request.pools.resize(count);
for (uint32_t i = 0; i < count; i++) {
request.pools[i] = mMemories[i]->getHidlMemory();
}
// Prepare the event for asynchronous execution. The sp<Event>
// object is recorded if the execution has been successfully
// launched. The sp is used for ref-counting purposes. Without
// it, the HIDL service could attempt to communicate with a dead
// event object.
//
// TODO: Explain the "dead event" problem further, either here or
// in the design document.
sp<Event> eventSp = new Event();
LOG(DEBUG) << "Before preparedModel->execute() " << toString(request);
// Execute.
// TODO: What happens to the Event if the service dies abnormally
// -- won't that keep the Event live forever, because the service
// never has the opportunity to bump the reference count down? Or
// maybe the HIDL infrastructure handles this magically? At worst,
// it seems like this is a small memory leak, if the Event stays
// alive forever.
if (preparedModel->execute(request, eventSp) != ErrorStatus::NONE) {
LOG(DEBUG) << "**Execute failed**";
return ANEURALNETWORKS_OP_FAILED;
}
// TODO: Remove this synchronization point when the block of code below is
// removed.
Event::Status status = eventSp->wait();
if (status != Event::Status::SUCCESS) {
LOG(DEBUG) << "**Execute async failed**";
return ANEURALNETWORKS_OP_FAILED;
}
// Copy the output data from shared memory to the output buffers.
// TODO: Move this block of code somewhere else. It should not be in the
// startCompute function.
// TODO: outputMemory->update(); outputMemory->commit()
for (auto& info : mOutputs) {
if (info.state == ModelArgumentInfo::POINTER) {
DataLocation& loc = info.locationAndDimension.location;
uint8_t* data = nullptr;
int n = mOutputPointerArguments.getPointer(&data);
if (n != ANEURALNETWORKS_NO_ERROR) {
return n;
}
memcpy(info.buffer, data + loc.offset, loc.length);
}
}
LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice completed";
mEvent = eventSp;
return ANEURALNETWORKS_NO_ERROR;
}
static void asyncStartComputeOnCpu(const Model& model, const Request& request,
const std::vector<RunTimePoolInfo>& runTimePoolInfos,
const sp<IEvent>& event) {
CpuExecutor executor;
int err = executor.run(model, request, runTimePoolInfos);
ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ?
ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
event->notify(status);
}
int ExecutionBuilder::startComputeOnCpu(const Model& model) {
// TODO: use a thread pool
// Prepare the event for asynchronous execution. The sp<Event> object is
// recorded if the execution has been successfully launched.
sp<Event> eventSp = new Event();
std::vector<RunTimePoolInfo> runTimePoolInfos;
uint32_t count = mMemories.size();
runTimePoolInfos.resize(count);
for (uint32_t i = 0; i < count; i++) {
const Memory* mem = mMemories[i];
runTimePoolInfos[i].set(mem->getHidlMemory());
}
// Create as many pools as there are input / output.
auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
for (ModelArgumentInfo& argumentInfo : argumentInfos) {
if (argumentInfo.state == ModelArgumentInfo::POINTER) {
RunTimePoolInfo runTimeInfo = {
.buffer = static_cast<uint8_t*>(argumentInfo.buffer)};
argumentInfo.locationAndDimension.location.poolIndex =
static_cast<uint32_t>(runTimePoolInfos.size());
argumentInfo.locationAndDimension.location.offset = 0;
runTimePoolInfos.push_back(runTimeInfo);
}
}
};
fixPointerArguments(mInputs);
fixPointerArguments(mOutputs);
Request request;
copyLocationAndDimension(mInputs, &request.inputs);
copyLocationAndDimension(mOutputs, &request.outputs);
// TODO: should model be moved with a std::cref?
std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
std::move(runTimePoolInfos), eventSp);
eventSp->bind_thread(std::move(thread));
mEvent = eventSp;
return ANEURALNETWORKS_NO_ERROR;
}
} // namespace nn
} // namespace android