Support the fallback path with device memories.
CPU fallback with device memories:
1. Allocate BLOB mode AHWBs for input and output device memories.
2. Copy out input device memories by IBuffer::copyTo.
3. Compute on CPU.
4. Copy back output device memories by IBuffer::copyFrom.
Bug: 152208838
Test: NNT_static with a broken driver
Change-Id: I9fed3134a7c56c893ff94e892cc25d230a1bd972
Merged-In: I9fed3134a7c56c893ff94e892cc25d230a1bd972
(cherry picked from commit 7b352ec1d710de550df2719a98083b04ccadfe1a)
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index 27e8212..61e320f 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -1071,6 +1071,12 @@
std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::compute(
const std::optional<Deadline>& deadline,
const std::shared_ptr<ExecutionBurstController>& burstController) {
+ return computeWithMemories(deadline, mMemories.getObjects(), burstController);
+}
+
+std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::computeWithMemories(
+ const std::optional<Deadline>& deadline, const std::vector<const Memory*>& memories,
+ const std::shared_ptr<ExecutionBurstController>& burstController) {
CHECK(mPreparedModel != nullptr);
if (VLOG_IS_ON(EXECUTION)) {
@@ -1081,9 +1087,8 @@
const MeasureTiming measure = measureTiming(mExecutionBuilder);
const OptionalTimeoutDuration loopTimeoutDuration =
makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
- const auto [n, outputShapes, timing] =
- mPreparedModel->execute(mInputs, mOutputs, mMemories.getObjects(), burstController,
- measure, deadline, loopTimeoutDuration);
+ const auto [n, outputShapes, timing] = mPreparedModel->execute(
+ mInputs, mOutputs, memories, burstController, measure, deadline, loopTimeoutDuration);
mExecutionBuilder->reportTimingWithoutFencedExecutionCallback(timing);
return {n, std::move(outputShapes), timing};
@@ -1128,13 +1133,74 @@
const ExecutionPreference preference =
static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
const Priority priority = convertToHalPriority(ANEURALNETWORKS_PRIORITY_DEFAULT);
- const auto [n, preparedModel] =
- mDevice->prepareModel(makeModel, preference, priority, {}, {}, {});
- mPreparedModel = preparedModel;
+ auto [n, preparedModel] = mDevice->prepareModel(makeModel, preference, priority, {}, {}, {});
+ mPreparedModel = std::move(preparedModel);
if (n != ANEURALNETWORKS_NO_ERROR) {
return {n, {}, kNoTiming};
}
- return compute({}, /*burstController=*/nullptr);
+
+ // Prepare device memories for CPU fallback.
+ std::vector<const Memory*> memories = mMemories.getObjects();
+ std::vector<bool> isUsedAsInput(memories.size(), false);
+ std::vector<bool> isUsedAsOutput(memories.size(), false);
+ std::vector<std::unique_ptr<Memory>> blobAhwbs;
+
+ // Mark the input and output usages.
+ for (auto& input : mInputs) {
+ if (input.state() == ModelArgumentInfo::MEMORY) {
+ const uint32_t poolIndex = input.locationAndLength().poolIndex;
+ isUsedAsInput[poolIndex] = true;
+ }
+ }
+ for (auto& output : mOutputs) {
+ if (output.state() == ModelArgumentInfo::MEMORY) {
+ const uint32_t poolIndex = output.locationAndLength().poolIndex;
+ // Cannot allocate output buffers with unknown shapes.
+ if (mMemories[poolIndex]->getValidator().createdWithUnknownShape()) {
+ LOG(ERROR) << "Cannot fallback to CPU because at least one of the output operands "
+ "has unknown shape.";
+ return {ANEURALNETWORKS_OP_FAILED, {}, kNoTiming};
+ }
+ isUsedAsOutput[poolIndex] = true;
+ }
+ }
+
+ // Allocate BLOB mode AHardwareBuffers and read the data from input device memories.
+ for (uint32_t i = 0; i < memories.size(); i++) {
+ const Memory* memory = mMemories[i];
+ if (memory->getIBuffer() != nullptr) {
+ const uint32_t size = memory->getValidator().getMetadata().logicalSize;
+ auto [nAhwb, blobAhwb] = MemoryRuntimeAHWB::create(size);
+ if (nAhwb != ANEURALNETWORKS_NO_ERROR) {
+ return {nAhwb, {}, kNoTiming};
+ }
+ if (isUsedAsInput[i]) {
+ n = copyIBufferToHidlMemory(memory->getIBuffer(), blobAhwb->getHidlMemory());
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ return {n, {}, kNoTiming};
+ }
+ }
+ memories[i] = blobAhwb.get();
+ blobAhwbs.push_back(std::move(blobAhwb));
+ }
+ }
+
+ auto [nCompute, outputShapes, timing] = computeWithMemories({}, memories);
+ if (nCompute != ANEURALNETWORKS_NO_ERROR) {
+ return {nCompute, std::move(outputShapes), timing};
+ }
+
+ // Write back to output device memories.
+ for (uint32_t i = 0; i < memories.size(); i++) {
+ const Memory* memory = mMemories[i];
+ if (memory->getIBuffer() != nullptr && isUsedAsOutput[i]) {
+ n = copyHidlMemoryToIBuffer(memories[i]->getHidlMemory(), memory->getIBuffer(), {});
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ return {n, {}, kNoTiming};
+ }
+ }
+ }
+ return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
}
} // namespace nn
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index f32e8c1..f61df4c 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -287,6 +287,10 @@
int setInputOrOutputFromMemory(const hal::Operand& inputOrOutputOperand, const Memory* memory,
uint32_t offset, ModelArgumentInfo* inputOrOutputInfo);
+ std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeWithMemories(
+ const std::optional<Deadline>& deadline, const std::vector<const Memory*>& memories,
+ const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
+
// describes the full (possibly multiple-"step") execution
ExecutionBuilder* mExecutionBuilder;
diff --git a/runtime/Memory.cpp b/runtime/Memory.cpp
index 7ff3651..09e597e 100644
--- a/runtime/Memory.cpp
+++ b/runtime/Memory.cpp
@@ -134,7 +134,6 @@
}
Metadata getMetadata() const override {
- CHECK(mInitialized);
return {.logicalSize = TypeManager::get()->getSizeOfData(kOperand.type, mUpdatedDimensions),
.dimensions = mUpdatedDimensions,
.operand = kOperand};
@@ -158,6 +157,10 @@
return true;
}
+ bool createdWithUnknownShape() const override {
+ return TypeManager::get()->getSizeOfData(kOperand.type, kInitialDimensions) == 0;
+ }
+
void setInitialized(bool initialized) override { mInitialized = initialized; }
bool isInitialized() const override { return mInitialized; }
@@ -243,7 +246,7 @@
return ANEURALNETWORKS_NO_ERROR;
}
-static int copyIBufferToHidlMemory(const sp<IBuffer>& src, const hidl_memory& dst) {
+int copyIBufferToHidlMemory(const sp<IBuffer>& src, const hidl_memory& dst) {
const auto ret = src->copyTo(dst);
if (!ret.isOk()) {
LOG(ERROR) << "ANeuralNetworksMemory_copy failure: " << ret.description();
@@ -252,8 +255,8 @@
return convertErrorStatusToResultCode(static_cast<ErrorStatus>(ret));
}
-static int copyHidlMemoryToIBuffer(const hidl_memory& src, const sp<IBuffer>& dst,
- const std::vector<uint32_t>& dimensions) {
+int copyHidlMemoryToIBuffer(const hidl_memory& src, const sp<IBuffer>& dst,
+ const std::vector<uint32_t>& dimensions) {
const auto ret = dst->copyFrom(src, dimensions);
if (!ret.isOk()) {
LOG(ERROR) << "ANeuralNetworksMemory_copy failure: " << ret.description();
diff --git a/runtime/Memory.h b/runtime/Memory.h
index dcedffa..56bf81d 100644
--- a/runtime/Memory.h
+++ b/runtime/Memory.h
@@ -151,10 +151,18 @@
// Try update the memory metadata with the provided metadata. Return false if incompatible.
virtual bool updateMetadata(const Metadata& metadata) = 0;
+ // Whether the memory is created with unknown dimensions or rank.
+ virtual bool createdWithUnknownShape() const { return false; }
+
virtual void setInitialized(bool) {}
virtual bool isInitialized() const { return true; }
};
+int copyIBufferToHidlMemory(const sp<hal::IBuffer>& src, const hal::hidl_memory& dst);
+
+int copyHidlMemoryToIBuffer(const hal::hidl_memory& src, const sp<hal::IBuffer>& dst,
+ const std::vector<uint32_t>& dimensions);
+
// Represents a memory region.
class Memory {
// Disallow copy and assign to prevent slicing