Add AHardwareBuffer support for NNAPI

   - The main purpose for AHardwareBuffer support is for model inputs &
   outputs.
   - ANeuralNetworks_setOperandValueFromMemory only accepts BLOB mode
   AHardwareBuffer.
   - ANeuralNetworks_set[Input|Output]FromMemory can accepts non-BLOB mode
   AHardwareBuffer. And for non-BLOB mode buffer, the entire buffer will
   be used.
   - There is no guarantee that arbitrary AHardwareBuffer can be used by
   arbitrary devices. The execution must fail if the driver cannot
   consume the buffer.
   - No CPU fallback for AHardwareBuffer except BLOB mode.

Test: mm
Test: NeuralNetworksTest_static
Change-Id: I9d9ac95aea1e56b583c06e4f9c9afdaab1403152
Merged-In: I9d9ac95aea1e56b583c06e4f9c9afdaab1403152
(cherry picked from commit 5777040ee6f93f4060f7efb6cffaf5bff37f99d8)
diff --git a/common/Android.bp b/common/Android.bp
index db40854..260840c 100644
--- a/common/Android.bp
+++ b/common/Android.bp
@@ -62,12 +62,16 @@
     shared_libs: [
         "libhidltransport",
         "libhidlmemory",
+        "libnativewindow",
         "[email protected]",
         "[email protected]",
         "[email protected]",
         "[email protected]",
         "[email protected]",
     ],
+    whole_static_libs: [
+        "libarect",
+    ],
     cflags: [
         "-Werror",
         "-Wall",
@@ -133,6 +137,7 @@
         "libhidlbase",
         "libhidltransport",
         "libhidlmemory",
+        "libnativewindow",
         "libtextclassifier_hash",
         "liblog",
         "libutils",
@@ -151,6 +156,7 @@
         "tensorflow_headers",
     ],
     whole_static_libs: [
+        "libarect",
         "libtflite_kernel_utils",
         "philox_random",
     ],
@@ -174,6 +180,7 @@
     name: "NeuralNetworksTest_operations",
     shared_libs: [
         "libhidlmemory",
+        "libnativewindow",
         "libneuralnetworks",
         "[email protected]",
         "[email protected]",
@@ -207,6 +214,7 @@
     name: "NeuralNetworksTest_utils",
     shared_libs: [
         "libhidlmemory",
+        "libnativewindow",
         "libneuralnetworks",
         "[email protected]",
         "[email protected]",
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index af7e4bc..f97ae93 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -209,8 +209,20 @@
         size_t size = hidlMemory.size();
         int fd = hidlMemory.handle()->data[0];
         int prot = hidlMemory.handle()->data[1];
-        size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
-                                        hidlMemory.handle()->data[3]);
+        size_t offset = getSizeFromInts(hidlMemory.handle()->data[2], hidlMemory.handle()->data[3]);
+        buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
+        if (buffer == MAP_FAILED) {
+            LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
+            if (fail) *fail = true;
+            return;
+        }
+    } else if (memType == "hardware_buffer_blob") {
+        // CpuExecutor uses BLOB mode hardware_buffer the same way as mmap_fd.
+        size_t size = hidlMemory.size();
+        int fd = hidlMemory.handle()->data[0];
+        // TODO: only map as READ & WRITE when needed.
+        int prot = PROT_READ | PROT_WRITE;
+        size_t offset = 0;
         buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
         if (buffer == MAP_FAILED) {
             LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
@@ -224,8 +236,8 @@
     }
 
     mHidlMemory = hidlMemory;
-    mBuffer     = buffer;
-    mMemory     = memory;
+    mBuffer = buffer;
+    mMemory = memory;
 }
 
 RunTimePoolInfo::RunTimePoolInfo(uint8_t* buffer) {
@@ -246,10 +258,10 @@
     return *this;
 }
 
-void RunTimePoolInfo::moveFrom(RunTimePoolInfo &&other) {
+void RunTimePoolInfo::moveFrom(RunTimePoolInfo&& other) {
     mHidlMemory = std::move(other.mHidlMemory);
-    mBuffer     = std::move(other.mBuffer);
-    mMemory     = std::move(other.mMemory);
+    mBuffer = std::move(other.mBuffer);
+    mMemory = std::move(other.mMemory);
 }
 
 void RunTimePoolInfo::release() {
@@ -260,7 +272,7 @@
     auto memType = mHidlMemory.name();
     if (memType == "ashmem") {
         // nothing to do
-    } else if (memType == "mmap_fd") {
+    } else if (memType == "mmap_fd" || memType == "hardware_buffer_blob") {
         size_t size = mHidlMemory.size();
         if (munmap(mBuffer, size)) {
             LOG(ERROR) << "RunTimePoolInfo::release(): Can't munmap";
@@ -272,8 +284,8 @@
     }
 
     mHidlMemory = hidl_memory();
-    mMemory     = nullptr;
-    mBuffer     = nullptr;
+    mMemory = nullptr;
+    mBuffer = nullptr;
 }
 
 // Making sure the output data are correctly updated after execution.
@@ -427,8 +439,7 @@
                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
     NNTRACE_CPU(NNTRACE_PHASE_EXECUTION, "run");
-    VLOG(CPUEXE) << "CpuExecutor::run() with request("
-                 << SHOW_IF_DEBUG(toString(request)) << ")";
+    VLOG(CPUEXE) << "CpuExecutor::run() with request(" << SHOW_IF_DEBUG(toString(request)) << ")";
 
     // b/109953668, disable OpenMP
 #ifdef NNAPI_OPENMP
@@ -436,7 +447,7 @@
 #endif  // NNAPI_OPENMP
 
     mModel = &model;
-    mRequest = &request; // TODO check if mRequest is needed
+    mRequest = &request;  // TODO check if mRequest is needed
     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
     // The model has serialized the operation in execution order.
     for (const auto& operation : model.operations) {
@@ -505,8 +516,9 @@
 
     // Adjust the runtime info for the arguments passed to the model,
     // modifying the buffer location, and possibly the dimensions.
-    auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
-                                  const hidl_vec<RequestArgument>& arguments) {
+    auto updateForArguments = [this, &requestPoolInfos](
+                                      const std::vector<uint32_t>& indexes,
+                                      const hidl_vec<RequestArgument>& arguments) {
         nnAssert(indexes.size() == arguments.size());
         for (size_t i = 0; i < indexes.size(); i++) {
             const uint32_t operandIndex = indexes[i];
@@ -570,18 +582,17 @@
     auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
                                                                 size_t requiredOuts) -> bool {
         auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
-                          const char* type) -> bool {
+                                         const char* type) -> bool {
             size_t actualCount = indexes.size();
             if (actualCount != requiredCount) {
-                LOG(ERROR) << getOperationName(operation.type)
-                           << ": Invalid number of " << type << " operands. Got " << actualCount
-                           << " of " << requiredCount;
+                LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type
+                           << " operands. Got " << actualCount << " of " << requiredCount;
                 return false;
             }
             for (size_t i = 0; i < actualCount; i++) {
                 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
-                    LOG(ERROR) << getOperationName(operation.type) << " " << type
-                               << " operand " << i << " is required but missing.";
+                    LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand "
+                               << i << " is required but missing.";
                     return false;
                 }
             }
@@ -723,9 +734,9 @@
                 !allParametersPresent(inCount, 1)) {
                 return ANEURALNETWORKS_BAD_DATA;
             }
-            const RunTimeOperandInfo& input  = mOperands[ins[0]];
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
             const RunTimeOperandInfo& filter = mOperands[ins[1]];
-            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
+            const RunTimeOperandInfo& bias = mOperands[ins[2]];
 
             int32_t padding_left, padding_right;
             int32_t padding_top, padding_bottom;
@@ -752,14 +763,14 @@
                 }
                 useImplicitPadding = true;
             } else if (inCount >= 11 && mOperands[ins[8]].type == OperandType::INT32) {
-                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
-                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
-                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
-                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
+                padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+                padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+                padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[10]]);
+                activation = getScalarData<int32_t>(mOperands[ins[10]]);
                 if (inCount >= 12) {
                     data_layout = getScalarData<bool>(mOperands[ins[11]]);
                 }
@@ -787,9 +798,9 @@
             if (useImplicitPadding) {
                 Shape inputShape = input_tmp.shape();
                 Shape filterShape = filter.shape();
-                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_width = getSizeOfDimension(inputShape, 2);
                 int32_t input_height = getSizeOfDimension(inputShape, 1);
-                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
+                int32_t filter_width = getSizeOfDimension(filterShape, 2);
                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
                 calculateExplicitPadding(input_width, stride_width, dilation_width_factor,
                                          filter_width, padding_implicit, &padding_left,
@@ -859,9 +870,9 @@
                 !allParametersPresent(inCount, 1)) {
                 return ANEURALNETWORKS_BAD_DATA;
             }
-            const RunTimeOperandInfo& input  = mOperands[ins[0]];
+            const RunTimeOperandInfo& input = mOperands[ins[0]];
             const RunTimeOperandInfo& filter = mOperands[ins[1]];
-            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
+            const RunTimeOperandInfo& bias = mOperands[ins[2]];
 
             int32_t padding_left, padding_right;
             int32_t padding_top, padding_bottom;
@@ -886,13 +897,13 @@
                 }
                 useImplicitPadding = true;
             } else if (inCount >= 10 && mOperands[ins[7]].type == OperandType::INT32) {
-                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
-                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
-                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
-                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+                padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+                padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+                padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation = getScalarData<int32_t>(mOperands[ins[9]]);
                 if (inCount >= 11) {
                     data_layout = getScalarData<bool>(mOperands[ins[10]]);
                 }
@@ -920,9 +931,9 @@
             if (useImplicitPadding) {
                 Shape inputShape = input_tmp.shape();
                 Shape filterShape = filter.shape();
-                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_width = getSizeOfDimension(inputShape, 2);
                 int32_t input_height = getSizeOfDimension(inputShape, 1);
-                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
+                int32_t filter_width = getSizeOfDimension(filterShape, 2);
                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
                 calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
                                          &padding_left, &padding_right);
@@ -1000,25 +1011,25 @@
             bool data_layout = false;
 
             if (inCount >= 10) {
-                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
-                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
-                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
-                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+                padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation = getScalarData<int32_t>(mOperands[ins[9]]);
                 if (inCount == 11) {
                     data_layout = getScalarData<bool>(mOperands[ins[10]]);
                 }
             } else {
                 padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation = getScalarData<int32_t>(mOperands[ins[6]]);
                 if (inCount == 8) {
                     data_layout = getScalarData<bool>(mOperands[ins[7]]);
                 }
@@ -1039,14 +1050,12 @@
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
-                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_width = getSizeOfDimension(inputShape, 2);
                 int32_t input_height = getSizeOfDimension(inputShape, 1);
-                calculateExplicitPadding(input_width, stride_width,
-                                         filter_width, padding_implicit,
+                calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
                                          &padding_left, &padding_right);
-                calculateExplicitPadding(input_height, stride_height,
-                                         filter_height, padding_implicit,
-                                         &padding_top, &padding_bottom);
+                calculateExplicitPadding(input_height, stride_height, filter_height,
+                                         padding_implicit, &padding_top, &padding_bottom);
             }
 
             if (!genericPoolingPrepare(input_tmp.shape(), padding_left, padding_right, padding_top,
@@ -1101,25 +1110,25 @@
             bool data_layout = false;
 
             if (inCount >= 10) {
-                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
-                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
-                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
-                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+                padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation = getScalarData<int32_t>(mOperands[ins[9]]);
                 if (inCount == 11) {
                     data_layout = getScalarData<bool>(mOperands[ins[10]]);
                 }
             } else {
                 padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation = getScalarData<int32_t>(mOperands[ins[6]]);
                 if (inCount == 8) {
                     data_layout = getScalarData<bool>(mOperands[ins[7]]);
                 }
@@ -1140,14 +1149,12 @@
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
-                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_width = getSizeOfDimension(inputShape, 2);
                 int32_t input_height = getSizeOfDimension(inputShape, 1);
-                calculateExplicitPadding(input_width, stride_width,
-                                         filter_width, padding_implicit,
+                calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
                                          &padding_left, &padding_right);
-                calculateExplicitPadding(input_height, stride_height,
-                                         filter_height, padding_implicit,
-                                         &padding_top, &padding_bottom);
+                calculateExplicitPadding(input_height, stride_height, filter_height,
+                                         padding_implicit, &padding_top, &padding_bottom);
             }
 
             if (!genericPoolingPrepare(input_tmp.shape(), padding_left, padding_right, padding_top,
@@ -1196,25 +1203,25 @@
             bool data_layout = false;
 
             if (inCount >= 10) {
-                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
-                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
-                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
-                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
+                padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+                padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+                padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+                padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+                activation = getScalarData<int32_t>(mOperands[ins[9]]);
                 if (inCount == 11) {
                     data_layout = getScalarData<bool>(mOperands[ins[10]]);
                 }
             } else {
                 padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
-                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
-                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
-                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
-                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
-                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
+                stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+                stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+                filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+                filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+                activation = getScalarData<int32_t>(mOperands[ins[6]]);
                 if (inCount == 8) {
                     data_layout = getScalarData<bool>(mOperands[ins[7]]);
                 }
@@ -1235,14 +1242,12 @@
 
             if (inCount <= 8) {
                 Shape inputShape = input_tmp.shape();
-                int32_t input_width  = getSizeOfDimension(inputShape, 2);
+                int32_t input_width = getSizeOfDimension(inputShape, 2);
                 int32_t input_height = getSizeOfDimension(inputShape, 1);
-                calculateExplicitPadding(input_width, stride_width,
-                                         filter_width, padding_implicit,
+                calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
                                          &padding_left, &padding_right);
-                calculateExplicitPadding(input_height, stride_height,
-                                         filter_height, padding_implicit,
-                                         &padding_top, &padding_bottom);
+                calculateExplicitPadding(input_height, stride_height, filter_height,
+                                         padding_implicit, &padding_top, &padding_bottom);
             }
 
             if (!genericPoolingPrepare(input_tmp.shape(), padding_left, padding_right, padding_top,
@@ -1438,9 +1443,9 @@
             if (!allParametersPresent(4, 1)) {
                 return ANEURALNETWORKS_BAD_DATA;
             }
-            RunTimeOperandInfo& input   = mOperands[ins[0]];
+            RunTimeOperandInfo& input = mOperands[ins[0]];
             RunTimeOperandInfo& weights = mOperands[ins[1]];
-            RunTimeOperandInfo& bias    = mOperands[ins[2]];
+            RunTimeOperandInfo& bias = mOperands[ins[2]];
 
             int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
 
@@ -1761,12 +1766,9 @@
             }
         } break;
         case OperationType::EMBEDDING_LOOKUP: {
-            const RunTimeOperandInfo &values =
-                mOperands[ins[EmbeddingLookup::kValueTensor]];
-            const RunTimeOperandInfo &lookups =
-                mOperands[ins[EmbeddingLookup::kLookupTensor]];
-            RunTimeOperandInfo &output =
-                mOperands[outs[EmbeddingLookup::kOutputTensor]];
+            const RunTimeOperandInfo& values = mOperands[ins[EmbeddingLookup::kValueTensor]];
+            const RunTimeOperandInfo& lookups = mOperands[ins[EmbeddingLookup::kLookupTensor]];
+            RunTimeOperandInfo& output = mOperands[outs[EmbeddingLookup::kOutputTensor]];
 
             Shape outputShape;
             EmbeddingLookup lookup(operation, mOperands);
@@ -1775,17 +1777,12 @@
                       setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lookup.Eval();
         } break;
         case OperationType::HASHTABLE_LOOKUP: {
-            const RunTimeOperandInfo &lookups =
-                mOperands[ins[HashtableLookup::kLookupTensor]];
-            const RunTimeOperandInfo &keys =
-                mOperands[ins[HashtableLookup::kKeyTensor]];
-            const RunTimeOperandInfo &values =
-                mOperands[ins[HashtableLookup::kValueTensor]];
+            const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
+            const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]];
+            const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]];
 
-            RunTimeOperandInfo &output =
-                mOperands[outs[HashtableLookup::kOutputTensor]];
-            RunTimeOperandInfo &hits =
-                mOperands[outs[HashtableLookup::kHitsTensor]];
+            RunTimeOperandInfo& output = mOperands[outs[HashtableLookup::kOutputTensor]];
+            RunTimeOperandInfo& hits = mOperands[outs[HashtableLookup::kHitsTensor]];
 
             Shape outputShape, hitShape;
             HashtableLookup lookup(operation, mOperands);
@@ -1850,10 +1847,8 @@
                       multinomial.Eval();
         } break;
         case OperationType::RNN: {
-            RunTimeOperandInfo &hiddenStateOut =
-                mOperands[outs[RNN::kHiddenStateOutTensor]];
-            RunTimeOperandInfo &output =
-                mOperands[outs[RNN::kOutputTensor]];
+            RunTimeOperandInfo& hiddenStateOut = mOperands[outs[RNN::kHiddenStateOutTensor]];
+            RunTimeOperandInfo& output = mOperands[outs[RNN::kOutputTensor]];
 
             Shape hiddenStateShape, outputShape;
             RNN rnn_cell(operation, mOperands);
@@ -1863,10 +1858,8 @@
                       setInfoAndAllocateIfNeeded(&output, outputShape, &result) && rnn_cell.Eval();
         } break;
         case OperationType::SVDF: {
-            RunTimeOperandInfo &stateOut =
-                mOperands[outs[SVDF::kStateOutTensor]];
-            RunTimeOperandInfo &output =
-                mOperands[outs[SVDF::kOutputTensor]];
+            RunTimeOperandInfo& stateOut = mOperands[outs[SVDF::kStateOutTensor]];
+            RunTimeOperandInfo& output = mOperands[outs[SVDF::kOutputTensor]];
 
             Shape stateShape, outputShape;
             SVDF svdf(operation, mOperands);
@@ -2749,5 +2742,5 @@
 }
 #endif  // NNAPI_OPENMP
 
-} // namespace nn
-} // namespace android
+}  // namespace nn
+}  // namespace android
diff --git a/common/ValidateHal.cpp b/common/ValidateHal.cpp
index 3b5792e..ce0e798 100644
--- a/common/ValidateHal.cpp
+++ b/common/ValidateHal.cpp
@@ -393,7 +393,8 @@
 static bool validatePools(const hidl_vec<hidl_memory>& pools) {
     for (const hidl_memory& memory : pools) {
         const auto& name = memory.name();
-        if (name != "ashmem" && name != "mmap_fd") {
+        if (name != "ashmem" && name != "mmap_fd" && name != "hardware_buffer_blob" &&
+            name != "hardware_buffer") {
             LOG(ERROR) << "Unsupported memory type " << name;
             return false;
         }
diff --git a/driver/sample/Android.bp b/driver/sample/Android.bp
index 2e901d6..9967988 100644
--- a/driver/sample/Android.bp
+++ b/driver/sample/Android.bp
@@ -35,6 +35,7 @@
         "libhidlbase",
         "libhidlmemory",
         "libhidltransport",
+        "libnativewindow",
         "libtextclassifier_hash",
         "liblog",
         "libutils",
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 119ef8e..4ab0bc3 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -57,6 +57,7 @@
                 "libhidltransport",
                 "libhidlmemory",
                 "liblog",
+                "libnativewindow",
                 "libutils",
                 "[email protected]",
                 "[email protected]",
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index c163f5b..8b04620 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -81,15 +81,14 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
-int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand,
-                                              uint32_t poolIndex, uint32_t offset) {
+int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex,
+                                              uint32_t offset) {
     int n = updateDimensionInfo(operand, nullptr);
     if (n != ANEURALNETWORKS_NO_ERROR) {
         return n;
     }
     state = ModelArgumentInfo::MEMORY;
-    locationAndLength =
-            {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
+    locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
     buffer = nullptr;
     return ANEURALNETWORKS_NO_ERROR;
 }
@@ -126,12 +125,12 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
-ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
-        mModel(compilation->mModel),
-        mPlan(&compilation->mPlan),
-        mPartitioning(compilation->mPartitioning),
-        mInputs(mModel->inputCount()),
-        mOutputs(mModel->outputCount()) {
+ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation)
+    : mModel(compilation->mModel),
+      mPlan(&compilation->mPlan),
+      mPartitioning(compilation->mPartitioning),
+      mInputs(mModel->inputCount()),
+      mOutputs(mModel->outputCount()) {
     VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
 }
 
@@ -167,7 +166,12 @@
                    << count;
         return ANEURALNETWORKS_BAD_DATA;
     }
-    if (!memory->validateSize(offset, length)) {
+    // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
+    if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory has non-zero offset and length"
+                   << " for Non-BLOB format AHardwareBuffer.";
+        return ANEURALNETWORKS_BAD_DATA;
+    } else if (!memory->validateSize(offset, length)) {
         return ANEURALNETWORKS_BAD_DATA;
     }
     // TODO validate the rest
@@ -176,8 +180,8 @@
                                         length);
 }
 
-int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
-                                size_t length) {
+int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type,
+                                void* buffer, size_t length) {
     uint32_t count = static_cast<uint32_t>(mOutputs.size());
     if (index >= count) {
         LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
@@ -207,7 +211,12 @@
                    << count;
         return ANEURALNETWORKS_BAD_DATA;
     }
-    if (!memory->validateSize(offset, length)) {
+    // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
+    if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
+        LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory has non-zero offset and length"
+                   << " for Non-BLOB format AHardwareBuffer.";
+        return ANEURALNETWORKS_BAD_DATA;
+    } else if (!memory->validateSize(offset, length)) {
         return ANEURALNETWORKS_BAD_DATA;
     }
     // TODO validate the rest
@@ -281,8 +290,7 @@
 // (2) If unsuccessful, attempt to execute the full model on CPU,
 //     ensure that executionCallback->notify() is called, and return
 //     false.
-static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
-                               const ExecutionPlan* plan,
+static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder, const ExecutionPlan* plan,
                                std::shared_ptr<ExecutionPlan::Controller> controller,
                                const sp<ExecutionCallback>& executionCallback) {
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
@@ -465,15 +473,16 @@
 }
 
 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
-                                     hidl_vec<RequestArgument>* ioInfos) {
+                                    hidl_vec<RequestArgument>* ioInfos) {
     size_t count = argumentInfos.size();
     ioInfos->resize(count);
     for (size_t i = 0; i < count; i++) {
         const auto& info = argumentInfos[i];
-        (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
-                          .location = info.locationAndLength,
-                          .dimensions = info.dimensions,
-                        };
+        (*ioInfos)[i] = {
+                .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
+                .location = info.locationAndLength,
+                .dimensions = info.dimensions,
+        };
     }
 }
 
@@ -506,12 +515,10 @@
         case ModelArgumentInfo::UNSPECIFIED:
             break;
         case ModelArgumentInfo::MEMORY: {
-            const uint32_t builderPoolIndex =
-                    builderInputOrOutput.locationAndLength.poolIndex;
+            const uint32_t builderPoolIndex = builderInputOrOutput.locationAndLength.poolIndex;
             const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
             const uint32_t executorPoolIndex = mMemories.add(memory);
-            executorInputOrOutput->locationAndLength.poolIndex =
-                    executorPoolIndex;
+            executorInputOrOutput->locationAndLength.poolIndex = executorPoolIndex;
             break;
         }
     }
@@ -528,7 +535,7 @@
     return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset);
 }
 
-static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) {
+static void logArguments(const char* kind, const std::vector<ModelArgumentInfo>& args) {
     for (unsigned i = 0; i < args.size(); i++) {
         const auto& arg = args[i];
         std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
@@ -538,10 +545,8 @@
                 break;
             case ModelArgumentInfo::MEMORY:
                 VLOG(EXECUTION) << prefix << "MEMORY("
-                                << "pool=" << arg.locationAndLength.poolIndex
-                                << ", "
-                                << "off=" << arg.locationAndLength.offset
-                                << ")";
+                                << "pool=" << arg.locationAndLength.poolIndex << ", "
+                                << "off=" << arg.locationAndLength.offset << ")";
                 break;
             case ModelArgumentInfo::HAS_NO_VALUE:
                 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
@@ -590,7 +595,7 @@
         // remove this entire block of code since it is a stale path that is only
         // encountered on an #if-removed code.
         ExecutionPreference preference =
-            static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
+                static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
         ErrorStatus prepareLaunchStatus =
                 mDevice->getInterface()->prepareModel(model, preference, preparedModelCallback);
         if (prepareLaunchStatus != ErrorStatus::NONE) {
@@ -698,9 +703,8 @@
     Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
     if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
         VLOG(EXECUTION) << "**Execution failed**";
-        return callbackStatus.isOk()
-                ? convertErrorStatusToResultCode(callbackStatus)
-                : ANEURALNETWORKS_OP_FAILED;
+        return callbackStatus.isOk() ? convertErrorStatusToResultCode(callbackStatus)
+                                     : ANEURALNETWORKS_OP_FAILED;
     }
 
     // Copy the output data from shared memory to the output buffers.
@@ -770,7 +774,7 @@
         for (ModelArgumentInfo& argumentInfo : argumentInfos) {
             if (argumentInfo.state == ModelArgumentInfo::POINTER) {
                 argumentInfo.locationAndLength.poolIndex =
-                            static_cast<uint32_t>(requestPoolInfos.size());
+                        static_cast<uint32_t>(requestPoolInfos.size());
                 argumentInfo.locationAndLength.offset = 0;
                 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer));
             }
diff --git a/runtime/Memory.h b/runtime/Memory.h
index 15b56e5..5be5007 100644
--- a/runtime/Memory.h
+++ b/runtime/Memory.h
@@ -23,6 +23,7 @@
 #include <cutils/native_handle.h>
 #include <sys/mman.h>
 #include <unordered_map>
+#include "vndk/hardware_buffer.h"
 
 namespace android {
 namespace nn {
@@ -31,7 +32,7 @@
 
 // Represents a memory region.
 class Memory {
-public:
+   public:
     Memory() {}
     virtual ~Memory() {}
 
@@ -47,13 +48,19 @@
     hardware::hidl_memory getHidlMemory() const { return mHidlMemory; }
 
     // Returns a pointer to the underlying memory of this memory object.
+    // The function will fail if the memory is not CPU accessible and nullptr
+    // will be returned.
     virtual int getPointer(uint8_t** buffer) const {
         *buffer = static_cast<uint8_t*>(static_cast<void*>(mMemory->getPointer()));
+        if (buffer == nullptr) {
+            return ANEURALNETWORKS_BAD_DATA;
+        }
         return ANEURALNETWORKS_NO_ERROR;
     }
 
     virtual bool validateSize(uint32_t offset, uint32_t length) const;
-protected:
+
+   protected:
     // The hidl_memory handle for this shared memory.  We will pass this value when
     // communicating with the drivers.
     hardware::hidl_memory mHidlMemory;
@@ -61,9 +68,9 @@
 };
 
 class MemoryFd : public Memory {
-public:
+   public:
     MemoryFd() {}
-    ~MemoryFd();
+    ~MemoryFd() override;
 
     // Disallow copy semantics to ensure the runtime object can only be freed
     // once. Copy semantics could be enabled if some sort of reference counting
@@ -78,24 +85,82 @@
 
     int getPointer(uint8_t** buffer) const override;
 
-private:
+   private:
     native_handle_t* mHandle = nullptr;
     mutable uint8_t* mMapping = nullptr;
 };
 
+// TODO(miaowang): move function definitions to Memory.cpp
+class MemoryAHWB : public Memory {
+   public:
+    MemoryAHWB() {}
+    ~MemoryAHWB() override{};
+
+    // Disallow copy semantics to ensure the runtime object can only be freed
+    // once. Copy semantics could be enabled if some sort of reference counting
+    // or deep-copy system for runtime objects is added later.
+    MemoryAHWB(const MemoryAHWB&) = delete;
+    MemoryAHWB& operator=(const MemoryAHWB&) = delete;
+
+    // Keep track of the provided AHardwareBuffer handle.
+    int set(const AHardwareBuffer* ahwb) {
+        AHardwareBuffer_describe(ahwb, &mBufferDesc);
+        const native_handle_t* handle = AHardwareBuffer_getNativeHandle(ahwb);
+        mHardwareBuffer = ahwb;
+        if (mBufferDesc.format == AHARDWAREBUFFER_FORMAT_BLOB) {
+            mHidlMemory = hidl_memory("hardware_buffer_blob", handle, mBufferDesc.width);
+        } else {
+            // memory size is not used.
+            mHidlMemory = hidl_memory("hardware_buffer", handle, 0);
+        }
+        return ANEURALNETWORKS_NO_ERROR;
+    };
+
+    int getPointer(uint8_t** buffer) const override {
+        *buffer = nullptr;
+        return ANEURALNETWORKS_BAD_DATA;
+    };
+
+    // validateSize should only be called for blob mode AHardwareBuffer.
+    // Calling it on non-blob mode AHardwareBuffer will result in an error.
+    // TODO(miaowang): consider separate blob and non-blob into different classes.
+    bool validateSize(uint32_t offset, uint32_t length) const override {
+        if (mHardwareBuffer == nullptr) {
+            LOG(ERROR) << "MemoryAHWB has not been initialized.";
+            return false;
+        }
+        // validateSize should only be called on BLOB mode buffer.
+        if (mBufferDesc.format == AHARDWAREBUFFER_FORMAT_BLOB) {
+            if (offset + length > mBufferDesc.width) {
+                LOG(ERROR) << "Request size larger than the memory size.";
+                return false;
+            } else {
+                return true;
+            }
+        } else {
+            LOG(ERROR) << "Invalid AHARDWAREBUFFER_FORMAT, must be AHARDWAREBUFFER_FORMAT_BLOB.";
+            return false;
+        }
+    }
+
+   private:
+    const AHardwareBuffer* mHardwareBuffer = nullptr;
+    AHardwareBuffer_Desc mBufferDesc;
+};
+
 // A utility class to accumulate mulitple Memory objects and assign each
 // a distinct index number, starting with 0.
 //
 // The user of this class is responsible for avoiding concurrent calls
 // to this class from multiple threads.
 class MemoryTracker {
-private:
+   private:
     // The vector of Memory pointers we are building.
     std::vector<const Memory*> mMemories;
     // A faster way to see if we already have a memory than doing find().
     std::unordered_map<const Memory*, uint32_t> mKnown;
 
-public:
+   public:
     // Adds the memory, if it does not already exists.  Returns its index.
     // The memories should survive the tracker.
     uint32_t add(const Memory* memory);
@@ -105,7 +170,7 @@
     const Memory* operator[](size_t i) const { return mMemories[i]; }
     // Iteration
     decltype(mMemories.begin()) begin() { return mMemories.begin(); }
-    decltype(mMemories.end())   end()   { return mMemories.end(); }
+    decltype(mMemories.end()) end() { return mMemories.end(); }
 };
 
 }  // namespace nn
diff --git a/runtime/ModelBuilder.cpp b/runtime/ModelBuilder.cpp
index 64852e3..d685ad2 100644
--- a/runtime/ModelBuilder.cpp
+++ b/runtime/ModelBuilder.cpp
@@ -94,9 +94,7 @@
         }
         operand.lifetime = OperandLifeTime::NO_VALUE;
         // The location is unused and is set to zeros.
-        operand.location = {.poolIndex = 0,
-                            .offset = 0,
-                            .length = 0};
+        operand.location = {.poolIndex = 0, .offset = 0, .length = 0};
     } else {
         if (length > 0xFFFFFFFF) {
             LOG(ERROR) << "ANeuralNetworksModel_setOperandValue value length of " << length
@@ -116,7 +114,7 @@
             mSmallOperandValues.resize(existingSize + extraBytes + valueLength);
             operand.lifetime = OperandLifeTime::CONSTANT_COPY;
             operand.location = {
-                .poolIndex = 0, .offset = existingSize + extraBytes, .length = valueLength};
+                    .poolIndex = 0, .offset = existingSize + extraBytes, .length = valueLength};
             memcpy(&mSmallOperandValues[operand.location.offset], buffer, valueLength);
             VLOG(MODEL) << "Copied small value to offset " << operand.location.offset;
         } else {
@@ -125,7 +123,8 @@
             // The values for poolIndex and offset will be set when the model is finished.
             typedef decltype(operand.location.poolIndex) PoolIndexType;
             typedef decltype(operand.location.offset) OffsetType;
-            operand.location = {.poolIndex = ~PoolIndexType(0), .offset = ~OffsetType(0),
+            operand.location = {.poolIndex = ~PoolIndexType(0),
+                                .offset = ~OffsetType(0),
                                 .length = valueLength};
             // We keep track of the buffers. We'll allocate the shared memory only
             // once we know the total size, to avoid needless copies.
@@ -175,7 +174,7 @@
         // Calculate the size of the shared memory needed for all the large values.
         // Also sets the offset for each value within the memory.
         size_t poolSize = 0;
-        for (LargeValue& l: mLargeOperandValues) {
+        for (LargeValue& l : mLargeOperandValues) {
             Operand& operand = mOperands[l.operandIndex];
             nnAssert(operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE);
             poolSize += alignBytesNeeded(poolSize, operand.location.length);
@@ -198,7 +197,7 @@
                     << poolIndex;
 
         // Copy the values to this memory.
-        for (LargeValue& l: mLargeOperandValues) {
+        for (LargeValue& l : mLargeOperandValues) {
             Operand& operand = mOperands[l.operandIndex];
             operand.location.poolIndex = poolIndex;
             memcpy(memoryPointer + operand.location.offset, l.buffer, operand.location.length);
@@ -209,7 +208,8 @@
 
 int ModelBuilder::setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
                                             size_t length) {
-    VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size " << length;
+    VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size "
+                << length;
     if (badState("setOperandValueFromMemory")) {
         return ANEURALNETWORKS_BAD_STATE;
     }
@@ -221,6 +221,12 @@
     }
     Operand& operand = mOperands[index];
     uint32_t neededLength = sizeOfData(operand.type, operand.dimensions);
+    // Only BLOB format AHardwareBuffer can be used for constant data.
+    if (memory->getHidlMemory().name() == "hardware_buffer") {
+        LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory passed an AHardwareBuffer"
+                   << " that is not in AHARDWAREBUFFER_FORMAT_BLOB format";
+        return ANEURALNETWORKS_UNMAPPABLE;
+    }
     if (neededLength != length) {
         LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting " << length
                    << " bytes when needing " << neededLength;
@@ -231,7 +237,7 @@
     }
     operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE;
     operand.location = {
-                .poolIndex = mMemories.add(memory), .offset = offset, .length = neededLength};
+            .poolIndex = mMemories.add(memory), .offset = offset, .length = neededLength};
     return ANEURALNETWORKS_NO_ERROR;
 }
 
@@ -272,7 +278,7 @@
 }
 
 int ModelBuilder::identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs,
-                                      uint32_t outputCount, const uint32_t* outputs) {
+                                           uint32_t outputCount, const uint32_t* outputs) {
     if (badState("identifyInputsAndOutputs")) {
         return ANEURALNETWORKS_BAD_STATE;
     }
@@ -296,7 +302,8 @@
         for (uint32_t i = 0; i < indexCount; i++) {
             const uint32_t operandIndex = indexList[i];
             if (operandIndex >= mOperands.size()) {
-                LOG(ERROR) << "ANeuralNetworksModel_identifyInputsAndOutputs Can't set input or output "
+                LOG(ERROR) << "ANeuralNetworksModel_identifyInputsAndOutputs Can't set input or "
+                              "output "
                               "to be "
                            << operandIndex << " as this exceeds the numbe of operands "
                            << mOperands.size();
@@ -404,7 +411,7 @@
                 lifetime == OperandLifeTime::MODEL_OUTPUT) {
                 count++;
                 operandToOperations.insert(
-                            std::pair<uint32_t, uint32_t>(operandIndex, operationIndex));
+                        std::pair<uint32_t, uint32_t>(operandIndex, operationIndex));
             }
         }
         if (count == 0) {
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index e0c91d0..c053fad 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -32,6 +32,8 @@
 #include "Tracing.h"
 #include "Utils.h"
 
+#include "vndk/hardware_buffer.h"
+
 #include <cstddef>
 #include <memory>
 #include <vector>
@@ -546,6 +548,22 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
+int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb,
+                                                    ANeuralNetworksMemory** memory) {
+    NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromAHardwareBuffer");
+    *memory = nullptr;
+    std::unique_ptr<MemoryAHWB> m = std::make_unique<MemoryAHWB>();
+    if (m == nullptr) {
+        return ANEURALNETWORKS_OUT_OF_MEMORY;
+    }
+    int n = m->set(ahwb);
+    if (n != ANEURALNETWORKS_NO_ERROR) {
+        return n;
+    }
+    *memory = reinterpret_cast<ANeuralNetworksMemory*>(m.release());
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
 void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) {
     NNTRACE_RT(NNTRACE_PHASE_TERMINATION, "ANeuralNetworksMemory_free");
     // No validation.  Free of nullptr is valid.
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 1c34973..fe03bef 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -42,6 +42,7 @@
  *   - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES
  */
 
+#include <android/hardware_buffer.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <sys/cdefs.h>
@@ -4212,7 +4213,8 @@
 
     /**
      * Failure caused by not being able to map a file into memory.
-     * This may be caused by a file descriptor not being mappable.
+     * This may be caused by a file descriptor not being mappable, or an AHardwareBuffer
+     * not supported by the device.
      * Mitigate by reading its content into memory.
      */
     ANEURALNETWORKS_UNMAPPABLE = 7,
@@ -4249,9 +4251,11 @@
  * By using shared memory, a program can efficiently communicate to the
  * runtime and drivers the tensors that define a model. See
  * {@link ANeuralNetworksModel_setOperandValueFromMemory}. An application
- * should typically create one shared memory object that contains every tensor
+ * should typically create one shared memory object that contains every constant tensor
  * needed to define a model. {@link ANeuralNetworksMemory_createFromFd} can be
  * used to create shared memory from a file handle.
+ * {@link ANeuralNetworksMemory_createFromAHardwareBuffer} can be used to
+ * create shared memory from an AHardwareBuffer handle.
  *
  * Memory objects can also be used to specify the input and output arguments of
  * an execution. See {@link ANeuralNetworksExecution_setInputFromMemory}
@@ -4715,6 +4719,8 @@
  * @param execution The execution to be scheduled and executed.
  *
  * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ *         ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot
+ *         be properly mapped.
  */
 int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution);
 
@@ -4818,6 +4824,41 @@
 int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution* execution,
                                           ANeuralNetworksBurst* burst) __INTRODUCED_IN(29);
 
+/**
+ * Creates a shared memory object from an AHardwareBuffer handle.
+ *
+ * If the shared memory is backed by an AHardwareBuffer of AHARDWAREBUFFER_FORMAT_BLOB
+ * format, it can be used the same way as shared memory created from a file handle. See
+ * {@link ANeuralNetworksMemory} for a description on how to use this shared memory.
+ *
+ * If the shared memory is backed by an AHardwareBuffer of a format other than
+ * AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for Model inputs and outputs.
+ * When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} with the shared memory, both
+ * offset and length must be set to zero and the entire memory region will be
+ * associated with the specified input or output operand. There is no guarantee
+ * that an arbitrary AHardwareBuffer_Format and AHardwareBuffer_UsageFlags combination
+ * can be used by arbitrary devices. The execution will fail if selected set of devices
+ * cannot consume the buffer.
+ *
+ * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared memory
+ * backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB is
+ * disallowed.
+ *
+ * TODO(miaowang): add documentation about intended usage with introspection API.
+ *
+ * Available since API level 29.
+ *
+ * @param ahwb The AHardwareBuffer handle.
+ * @param memory The memory object to be created.
+ *               Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+ *
+ * @see AHardwareBuffer
+ */
+int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb,
+                                                    ANeuralNetworksMemory** memory);
 #endif  // __ANDROID_API__ >= __ANDROID_API_Q__
 
 #if __ANDROID_API__ >= 27
@@ -5051,10 +5092,15 @@
  * To indicate that an optional operand should be considered missing,
  * use {@link ANeuralNetworksModel_setOperandValue} instead, passing nullptr for buffer.
  *
+ * Is disallowed to set an operand value with shared memory backed by an AHardwareBuffer
+ * of a format other than AHARDWAREBUFFER_FORMAT_BLOB.
+ *
  * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
  * called will return an error.
  *
  * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * AHardwareBuffer usage.
  *
  * Available since API level 27.
  *
@@ -5330,6 +5376,8 @@
  * and 0 for length.
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * AHardwareBuffer usage.
  *
  * Available since API level 27.
  *
@@ -5409,6 +5457,8 @@
  * <p>The provided memory must outlive the execution.</p>
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * AHardwareBuffer usage.
  *
  * Available since API level 27.
  *
@@ -5478,6 +5528,8 @@
  * Available since API level 27.
  *
  * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ *         ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot
+ *         be properly mapped.
  */
 int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) __INTRODUCED_IN(27);
 
diff --git a/runtime/include/NeuralNetworksWrapper.h b/runtime/include/NeuralNetworksWrapper.h
index 102a141..0d6d815 100644
--- a/runtime/include/NeuralNetworksWrapper.h
+++ b/runtime/include/NeuralNetworksWrapper.h
@@ -114,6 +114,11 @@
                  ANEURALNETWORKS_NO_ERROR;
     }
 
+    Memory(AHardwareBuffer* buffer) {
+        mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
+                 ANEURALNETWORKS_NO_ERROR;
+    }
+
     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
 
     // Disallow copy semantics to ensure the runtime object can only be freed
diff --git a/runtime/libneuralnetworks.map.txt b/runtime/libneuralnetworks.map.txt
index 7cf2fb1..873342f 100644
--- a/runtime/libneuralnetworks.map.txt
+++ b/runtime/libneuralnetworks.map.txt
@@ -26,6 +26,7 @@
     ANeuralNetworksDevice_getType; # introduced=Q
     ANeuralNetworksDevice_getVersion; # introduced=Q
     ANeuralNetworksDevice_getFeatureLevel; # introduced=Q
+    ANeuralNetworksMemory_createFromAHardwareBuffer; # introduced=Q
     ANeuralNetworksMemory_createFromFd;
     ANeuralNetworksMemory_free;
     ANeuralNetworksModel_create;
diff --git a/runtime/test/Android.bp b/runtime/test/Android.bp
index aecb2d5..765dace 100644
--- a/runtime/test/Android.bp
+++ b/runtime/test/Android.bp
@@ -28,6 +28,7 @@
         "libhidltransport",
         "libhidlmemory",
         "liblog",
+        "libnativewindow",
         "libtextclassifier_hash",
         "libutils",
         "[email protected]",
diff --git a/runtime/test/TestIntrospectionControl.cpp b/runtime/test/TestIntrospectionControl.cpp
index 760f92b..0714fb8 100644
--- a/runtime/test/TestIntrospectionControl.cpp
+++ b/runtime/test/TestIntrospectionControl.cpp
@@ -185,7 +185,7 @@
     // This is needed before we have the CPU fallback path being treated as a Device.
     // TODO(miaowang): remove once b/72506261 is fixed.
     if (DeviceManager::get()->getUseCpuOnly()) {
-        return;
+        GTEST_SKIP();
     }
 
     createSimpleAddModel(&mModel);
@@ -257,7 +257,7 @@
     // This is needed before we have the CPU fallback path being treated as a Device.
     // TODO(miaowang): remove once b/72506261 is fixed.
     if (DeviceManager::get()->getUseCpuOnly()) {
-        return;
+        GTEST_SKIP();
     }
 
     createAddMulModel(&mModel, false);
@@ -283,7 +283,7 @@
     // This is needed before we have the CPU fallback path being treated as a Device.
     // TODO(miaowang): remove once b/72506261 is fixed.
     if (DeviceManager::get()->getUseCpuOnly()) {
-        return;
+        GTEST_SKIP();
     }
 
     createAddMulModel(&mModel, true);
@@ -309,7 +309,7 @@
     // This is needed before we have the CPU fallback path being treated as a Device.
     // TODO(miaowang): remove once b/72506261 is fixed.
     if (DeviceManager::get()->getUseCpuOnly()) {
-        return;
+        GTEST_SKIP();
     }
 
     createAddMulModel(&mModel, false);
diff --git a/runtime/test/TestMemory.cpp b/runtime/test/TestMemory.cpp
index 47cdf24..fd342dd 100644
--- a/runtime/test/TestMemory.cpp
+++ b/runtime/test/TestMemory.cpp
@@ -91,4 +91,61 @@
     unlink(path);
 }
 
+TEST_F(MemoryTest, TestAHardwareBuffer) {
+    const uint32_t offsetForMatrix2 = 20;
+    const uint32_t offsetForMatrix3 = 200;
+
+    AHardwareBuffer_Desc desc{
+            .width = offsetForMatrix3 + sizeof(matrix3),
+            .height = 1,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_BLOB,
+            .usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
+    };
+    AHardwareBuffer* buffer = nullptr;
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &buffer), 0);
+
+    void* bufferPtr = nullptr;
+    ASSERT_EQ(AHardwareBuffer_lock(buffer, desc.usage, -1, NULL, &bufferPtr), 0);
+    memcpy((uint8_t*)bufferPtr + offsetForMatrix2, matrix2, sizeof(matrix2));
+    memcpy((uint8_t*)bufferPtr + offsetForMatrix3, matrix3, sizeof(matrix3));
+    ASSERT_EQ(AHardwareBuffer_unlock(buffer, nullptr), 0);
+
+    WrapperMemory weights(buffer);
+    ASSERT_TRUE(weights.isValid());
+
+    WrapperModel model;
+    WrapperOperandType matrixType(WrapperType::TENSOR_FLOAT32, {3, 4});
+    WrapperOperandType scalarType(WrapperType::INT32, {});
+    int32_t activation(0);
+    auto a = model.addOperand(&matrixType);
+    auto b = model.addOperand(&matrixType);
+    auto c = model.addOperand(&matrixType);
+    auto d = model.addOperand(&matrixType);
+    auto e = model.addOperand(&matrixType);
+    auto f = model.addOperand(&scalarType);
+
+    model.setOperandValueFromMemory(e, &weights, offsetForMatrix2, sizeof(Matrix3x4));
+    model.setOperandValueFromMemory(a, &weights, offsetForMatrix3, sizeof(Matrix3x4));
+    model.setOperandValue(f, &activation, sizeof(activation));
+    model.addOperation(ANEURALNETWORKS_ADD, {a, c, f}, {b});
+    model.addOperation(ANEURALNETWORKS_ADD, {b, e, f}, {d});
+    model.identifyInputsAndOutputs({c}, {d});
+    ASSERT_TRUE(model.isValid());
+    model.finish();
+
+    // Test the three node model.
+    Matrix3x4 actual;
+    memset(&actual, 0, sizeof(actual));
+    WrapperCompilation compilation2(&model);
+    ASSERT_EQ(compilation2.finish(), WrapperResult::NO_ERROR);
+    WrapperExecution execution2(&compilation2);
+    ASSERT_EQ(execution2.setInput(0, matrix1, sizeof(Matrix3x4)), WrapperResult::NO_ERROR);
+    ASSERT_EQ(execution2.setOutput(0, actual, sizeof(Matrix3x4)), WrapperResult::NO_ERROR);
+    ASSERT_EQ(execution2.compute(), WrapperResult::NO_ERROR);
+    ASSERT_EQ(CompareMatrices(expected3, actual), 0);
+
+    AHardwareBuffer_release(buffer);
+    buffer = nullptr;
+}
 }  // end namespace
diff --git a/runtime/test/TestNeuralNetworksWrapper.h b/runtime/test/TestNeuralNetworksWrapper.h
index 73c8dbe..cc6a331 100644
--- a/runtime/test/TestNeuralNetworksWrapper.h
+++ b/runtime/test/TestNeuralNetworksWrapper.h
@@ -115,6 +115,10 @@
         mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
                  ANEURALNETWORKS_NO_ERROR;
     }
+    Memory(AHardwareBuffer* buffer) {
+        mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
+                 ANEURALNETWORKS_NO_ERROR;
+    }
 
     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
 
diff --git a/runtime/test/TestValidation.cpp b/runtime/test/TestValidation.cpp
index 085c0eb..8558508 100644
--- a/runtime/test/TestValidation.cpp
+++ b/runtime/test/TestValidation.cpp
@@ -287,6 +287,72 @@
               ANEURALNETWORKS_BAD_STATE);
 }
 
+TEST_F(ValidationTestModel, SetOperandValueFromAHardwareBuffer) {
+    uint32_t dimensions[]{1};
+    ANeuralNetworksOperandType quant8Type{.type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
+                                          .dimensionCount = 1,
+                                          .dimensions = dimensions,
+                                          .scale = 1.0,
+                                          .zeroPoint = 0};
+    EXPECT_EQ(ANeuralNetworksModel_addOperand(mModel, &quant8Type), ANEURALNETWORKS_NO_ERROR);
+
+    AHardwareBuffer_Desc desc{
+            .width = 16,
+            .height = 16,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM,
+            .usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
+    };
+
+    AHardwareBuffer* buffer = nullptr;
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &buffer), 0);
+
+    ANeuralNetworksMemory* memory;
+    EXPECT_EQ(ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &memory),
+              ANEURALNETWORKS_NO_ERROR);
+
+    // This should fail, since non-BLOB AHardwareBuffer is not allowed.
+    EXPECT_EQ(ANeuralNetworksModel_setOperandValueFromMemory(mModel, 0, memory, 0, sizeof(uint8_t)),
+              ANEURALNETWORKS_UNMAPPABLE);
+
+    AHardwareBuffer_release(buffer);
+}
+
+TEST_F(ValidationTestModel, SetOperandValueFromAHardwareBufferBlob) {
+    uint32_t dimensions[]{1};
+    ANeuralNetworksOperandType floatType{
+            .type = ANEURALNETWORKS_TENSOR_FLOAT32, .dimensionCount = 1, .dimensions = dimensions};
+    EXPECT_EQ(ANeuralNetworksModel_addOperand(mModel, &floatType), ANEURALNETWORKS_NO_ERROR);
+
+    const size_t memorySize = 20;
+    AHardwareBuffer_Desc desc{
+            .width = memorySize,
+            .height = 1,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_BLOB,
+            .usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
+    };
+
+    AHardwareBuffer* buffer = nullptr;
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &buffer), 0);
+
+    ANeuralNetworksMemory* memory;
+    EXPECT_EQ(ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &memory),
+              ANEURALNETWORKS_NO_ERROR);
+
+    // This should fail, since offset is larger than memorySize.
+    EXPECT_EQ(ANeuralNetworksModel_setOperandValueFromMemory(mModel, 0, memory, memorySize + 1,
+                                                             sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+
+    // This should fail, since requested size is larger than the memory.
+    EXPECT_EQ(ANeuralNetworksModel_setOperandValueFromMemory(mModel, 0, memory, memorySize - 3,
+                                                             sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+
+    AHardwareBuffer_release(buffer);
+}
+
 TEST_F(ValidationTestModel, AddOEMOperand) {
     ANeuralNetworksOperandType OEMScalarType{
             .type = ANEURALNETWORKS_OEM_SCALAR, .dimensionCount = 0, .dimensions = nullptr};
@@ -547,55 +613,46 @@
 }
 
 TEST_F(ValidationTestExecution, SetInput) {
-    ANeuralNetworksExecution* execution;
-    EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution), ANEURALNETWORKS_NO_ERROR);
-
     char buffer[20];
     EXPECT_EQ(ANeuralNetworksExecution_setInput(nullptr, 0, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
-    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 0, nullptr, nullptr, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, nullptr, sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
 
     // This should fail, since memory is not the size of a float32.
-    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 0, nullptr, buffer, 20),
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 0, nullptr, buffer, 20),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, as this operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, 999, nullptr, buffer, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, 999, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, as this operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setInput(execution, -1, nullptr, buffer, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setInput(mExecution, -1, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 }
 
 TEST_F(ValidationTestExecution, SetOutput) {
-    ANeuralNetworksExecution* execution;
-    EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution), ANEURALNETWORKS_NO_ERROR);
-
     char buffer[20];
     EXPECT_EQ(ANeuralNetworksExecution_setOutput(nullptr, 0, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
-    EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution, 0, nullptr, nullptr, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, nullptr, sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
 
     // This should fail, since memory is not the size of a float32.
-    EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution, 0, nullptr, buffer, 20),
+    EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 0, nullptr, buffer, 20),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, as this operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution, 999, nullptr, buffer, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, 999, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, as this operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution, -1, nullptr, buffer, sizeof(float)),
+    EXPECT_EQ(ANeuralNetworksExecution_setOutput(mExecution, -1, nullptr, buffer, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 }
 
 TEST_F(ValidationTestExecution, SetInputFromMemory) {
-    ANeuralNetworksExecution* execution;
-    EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution), ANEURALNETWORKS_NO_ERROR);
-
     const size_t memorySize = 20;
     int memoryFd = ASharedMemory_create("nnMemory", memorySize);
     ASSERT_GT(memoryFd, 0);
@@ -608,36 +665,71 @@
     EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(nullptr, 0, nullptr, memory, 0,
                                                           sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, 0, nullptr, nullptr, 0,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, nullptr, 0,
                                                           sizeof(float)),
               ANEURALNETWORKS_UNEXPECTED_NULL);
 
     // This should fail, since the operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, 999, nullptr, memory, 0,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 999, nullptr, memory, 0,
                                                           sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, since the operand does not exist.
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, -1, nullptr, memory, 0,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, -1, nullptr, memory, 0,
                                                           sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, since memory is not the size of a float32.
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, 0, nullptr, memory, 0,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory, 0,
                                                           memorySize),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, since offset is larger than memorySize.
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, 0, nullptr, memory,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory,
                                                           memorySize + 1, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 
     // This should fail, since requested size is larger than the memory.
-    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(execution, 0, nullptr, memory,
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory,
                                                           memorySize - 3, sizeof(float)),
               ANEURALNETWORKS_BAD_DATA);
 }
 
+TEST_F(ValidationTestExecution, SetInputFromAHardwareBufferBlob) {
+    const size_t memorySize = 20;
+
+    AHardwareBuffer_Desc desc{
+            .width = memorySize,
+            .height = 1,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_BLOB,
+            .usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
+    };
+
+    AHardwareBuffer* buffer = nullptr;
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &buffer), 0);
+
+    ANeuralNetworksMemory* memory;
+    EXPECT_EQ(ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &memory),
+              ANEURALNETWORKS_NO_ERROR);
+
+    // This should fail, since memory is not the size of a float32.
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory, 0,
+                                                          memorySize),
+              ANEURALNETWORKS_BAD_DATA);
+
+    // This should fail, since offset is larger than memorySize.
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory,
+                                                          memorySize + 1, sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+    // This should fail, since requested size is larger than the memory.
+    EXPECT_EQ(ANeuralNetworksExecution_setInputFromMemory(mExecution, 0, nullptr, memory,
+                                                          memorySize - 3, sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+
+    AHardwareBuffer_release(buffer);
+}
+
 TEST_F(ValidationTestExecution, SetOutputFromMemory) {
     ANeuralNetworksExecution* execution;
     EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution), ANEURALNETWORKS_NO_ERROR);
@@ -684,6 +776,42 @@
               ANEURALNETWORKS_BAD_DATA);
 }
 
+TEST_F(ValidationTestExecution, SetOutputFromAHardwareBufferBlob) {
+    const size_t memorySize = 20;
+
+    AHardwareBuffer_Desc desc{
+            .width = memorySize,
+            .height = 1,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_BLOB,
+            .usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
+    };
+
+    AHardwareBuffer* buffer = nullptr;
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &buffer), 0);
+
+    ANeuralNetworksMemory* memory;
+    EXPECT_EQ(ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &memory),
+              ANEURALNETWORKS_NO_ERROR);
+
+    // This should fail, since memory is not the size of a float32.
+    EXPECT_EQ(ANeuralNetworksExecution_setOutputFromMemory(mExecution, 0, nullptr, memory, 0,
+                                                           memorySize),
+              ANEURALNETWORKS_BAD_DATA);
+
+    // This should fail, since offset is larger than memorySize.
+    EXPECT_EQ(ANeuralNetworksExecution_setOutputFromMemory(mExecution, 0, nullptr, memory,
+                                                           memorySize + 1, sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+
+    // This should fail, since requested size is larger than the memory.
+    EXPECT_EQ(ANeuralNetworksExecution_setOutputFromMemory(mExecution, 0, nullptr, memory,
+                                                           memorySize - 3, sizeof(float)),
+              ANEURALNETWORKS_BAD_DATA);
+
+    AHardwareBuffer_release(buffer);
+}
+
 TEST_F(ValidationTestExecution, Compute) {
     EXPECT_EQ(ANeuralNetworksExecution_compute(nullptr), ANEURALNETWORKS_UNEXPECTED_NULL);
 }