Cleanup NNAPI runtime Memory objects

Prior to this CL, runtime Memory* objects were default constructed and
set to a value later. Rebinding the value led to multiple bugs in the
past and made the Memory* objects prone to data races. This CL addresses
these issues by determining all values in the Memory* objects' factory
methods, and making the Memory* objects immutable after construction.

This CL also untangles some of the inheritance hierarchy. Prior to this
CL, all MemoryFd and MemoryAHWB inherited from Memory, which was
effectively ashmem-based memory. This CL separates MemoryAshmem into its
own, unique class type, and has a common base Memory class. This
reorganization also uncovered that getPointer was only used in the
runtime on ashmem-based memory, and removes the unused getPointer
methods.

Finally, this CL improves documentation of NeuralNetworks.h in two ways:
1) Fixes the typo "ANeuralNetworksMemory_createFromAHardwarBuffer"
2) Documents the missing lifetime constraints of
   ANeuralNetworksMemory_createFromAHardwareBuffer

Fixes: 138852228
Fixes: 69632863
Fixes: 69633035
Fixes: 129572123
Fixes: 132323765
Fixes: 139213289
Test: mma
Test: atest NeuralNetworksTest_static
Test: atest CtsNNAPITestCases
Change-Id: I49a2356d6b8fc38e501d8e37ba8a8893f3e91395
Merged-In: I49a2356d6b8fc38e501d8e37ba8a8893f3e91395
(cherry picked from commit 008caeab69787b8978e874f8fb3811e2400f5d55)
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index 0d90203..afce316 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -634,7 +634,9 @@
       mSubModelInputsAndOutputs(subModelInputsAndOutputs),
       mNextStepIndex(0) {
     if (totalSizeOfTemporaries) {
-        if (mTemporaries.create(totalSizeOfTemporaries) != ANEURALNETWORKS_NO_ERROR) {
+        int n;
+        std::tie(n, mTemporaries) = MemoryAshmem::create(totalSizeOfTemporaries);
+        if (n != ANEURALNETWORKS_NO_ERROR) {
             LOG(ERROR) << "ExecutionPlan::Controller failed to allocate temporaries";
             mNextStepIndex = kBadStepIndex;
         }
@@ -831,7 +833,7 @@
                 const uint32_t offsetOfTemporary =
                         controller->mSubModelInputsAndOutputs->at(fromModelOperandIndex);
                 int n = (*executor)->setOutputFromTemporaryMemory(firstSubModelOutputIndex + idx,
-                                                                  &controller->mTemporaries,
+                                                                  controller->mTemporaries.get(),
                                                                   offsetOfTemporary);
                 if (n != ANEURALNETWORKS_NO_ERROR) {
                     controller->mNextStepIndex = Controller::kBadStepIndex;
@@ -851,7 +853,7 @@
                 const uint32_t offsetOfTemporary =
                         controller->mSubModelInputsAndOutputs->at(fromModelOperandIndex);
                 int n = (*executor)->setInputFromTemporaryMemory(firstSubModelInputIndex + idx,
-                                                                 &controller->mTemporaries,
+                                                                 controller->mTemporaries.get(),
                                                                  offsetOfTemporary);
                 if (n != ANEURALNETWORKS_NO_ERROR) {
                     controller->mNextStepIndex = Controller::kBadStepIndex;
diff --git a/runtime/ExecutionPlan.h b/runtime/ExecutionPlan.h
index 3b4dac2..cd3c018 100644
--- a/runtime/ExecutionPlan.h
+++ b/runtime/ExecutionPlan.h
@@ -205,7 +205,7 @@
         const BurstBuilder* mBurstBuilder;
         std::shared_ptr<const SubModelInputsAndOutputsType>
                 mSubModelInputsAndOutputs;  // may be nullptr
-        Memory mTemporaries;
+        std::unique_ptr<MemoryAshmem> mTemporaries;
         size_t mNextStepIndex;
     };
 
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index 5b39113..0e6c9a5 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -319,14 +319,15 @@
     }
 }
 
-// Figures out how to place each of the input or outputs in a buffer. This just does the layout,
-// it does not copy data.  Aligns each input a bit.
-static int allocatePointerArgumentsToPool(MemoryTracker* memories,
-                                          std::vector<ModelArgumentInfo>* args, Memory* memory) {
+// Figures out how to place each of the input or outputs in a buffer. This just
+// does the layout and memory allocation, it does not copy data.  Aligns each
+// input a bit.
+static std::pair<int, std::unique_ptr<MemoryAshmem>> allocatePointerArgumentsToPool(
+        MemoryTracker* memories, std::vector<ModelArgumentInfo>* args) {
     CHECK(memories != nullptr);
     CHECK(args != nullptr);
-    CHECK(memory != nullptr);
-    uint32_t nextPoolIndex = memories->size();
+
+    const uint32_t nextPoolIndex = memories->size();
     int64_t total = 0;
     for (auto& info : *args) {
         if (info.state == ModelArgumentInfo::POINTER) {
@@ -339,15 +340,19 @@
         }
     };
     if (total > 0xFFFFFFFF) {
-        LOG(ERROR) << "allocatePointerArgumentsToPool: ANeuralNetworksExecution: "
-                      "Size of all inputs or outputs exceeds 2^32.";
-        return ANEURALNETWORKS_BAD_DATA;
+        LOG(ERROR) << "allocatePointerArgumentsToPool: ANeuralNetworksExecution: Size of all "
+                      "inputs or outputs exceeds 2^32.";
+        return {ANEURALNETWORKS_BAD_DATA, nullptr};
     }
-    if (total > 0) {
-        memory->create(total);  // TODO check error
-        memories->add(memory);
+    if (total <= 0) {
+        return {ANEURALNETWORKS_NO_ERROR, nullptr};
     }
-    return ANEURALNETWORKS_NO_ERROR;
+    auto [n, memory] = MemoryAshmem::create(total);
+    if (n != ANEURALNETWORKS_NO_ERROR) {
+        return {n, nullptr};
+    }
+    memories->add(memory.get());
+    return {ANEURALNETWORKS_NO_ERROR, std::move(memory)};
 }
 
 // Start compute on an actual HIDL driver.
@@ -370,23 +375,25 @@
     *synchronizationCallback = nullptr;
 
     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
+
     // We separate the input & output pools so accelerators only need to copy
     // the contents of the input pools. We could also use it to set protection
     // on read only memory but that's not currently done.
-    Memory inputPointerArguments;
-    Memory outputPointerArguments;
 
     // Layout the input and output data
-    NN_RETURN_IF_ERROR(allocatePointerArgumentsToPool(memories, inputs, &inputPointerArguments));
-    NN_RETURN_IF_ERROR(allocatePointerArgumentsToPool(memories, outputs, &outputPointerArguments));
+    const auto [n1, inputPointerArguments] = allocatePointerArgumentsToPool(memories, inputs);
+    NN_RETURN_IF_ERROR(n1);
+    const auto [n2, outputPointerArguments] = allocatePointerArgumentsToPool(memories, outputs);
+    NN_RETURN_IF_ERROR(n2);
 
     // Copy the input data that was specified via a pointer.
-    for (auto& info : *inputs) {
-        if (info.state == ModelArgumentInfo::POINTER) {
-            DataLocation& loc = info.locationAndLength;
-            uint8_t* data = nullptr;
-            NN_RETURN_IF_ERROR(inputPointerArguments.getPointer(&data));
-            memcpy(data + loc.offset, info.buffer, loc.length);
+    if (inputPointerArguments != nullptr) {
+        for (const auto& info : *inputs) {
+            if (info.state == ModelArgumentInfo::POINTER) {
+                const DataLocation& loc = info.locationAndLength;
+                uint8_t* const data = inputPointerArguments->getPointer();
+                memcpy(data + loc.offset, info.buffer, loc.length);
+            }
         }
     }
 
@@ -486,12 +493,13 @@
     // TODO: Move this block of code somewhere else. It should not be in the
     // startCompute function.
     NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "DriverPreparedModel::execute");
-    for (auto& info : *outputs) {
-        if (info.state == ModelArgumentInfo::POINTER) {
-            DataLocation& loc = info.locationAndLength;
-            uint8_t* data = nullptr;
-            NN_RETURN_IF_ERROR(outputPointerArguments.getPointer(&data));
-            memcpy(info.buffer, data + loc.offset, loc.length);
+    if (outputPointerArguments != nullptr) {
+        for (const auto& info : *outputs) {
+            if (info.state == ModelArgumentInfo::POINTER) {
+                const DataLocation& loc = info.locationAndLength;
+                const uint8_t* const data = outputPointerArguments->getPointer();
+                memcpy(info.buffer, data + loc.offset, loc.length);
+            }
         }
     }
     VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
diff --git a/runtime/Memory.cpp b/runtime/Memory.cpp
index 20fabfa..6e972c6 100644
--- a/runtime/Memory.cpp
+++ b/runtime/Memory.cpp
@@ -18,6 +18,8 @@
 
 #include "Memory.h"
 
+#include <memory>
+#include <utility>
 #include "ExecutionBurstController.h"
 #include "MemoryUtils.h"
 #include "Utils.h"
@@ -27,6 +29,8 @@
 
 using namespace hal;
 
+Memory::Memory(hidl_memory memory) : kHidlMemory(std::move(memory)) {}
+
 Memory::~Memory() {
     for (const auto [ptr, weakBurst] : mUsedBy) {
         if (const std::shared_ptr<ExecutionBurstController> burst = weakBurst.lock()) {
@@ -35,23 +39,12 @@
     }
 }
 
-int Memory::create(uint32_t size) {
-    mHidlMemory = allocateSharedMemory(size);
-    mMemory = mapMemory(mHidlMemory);
-    if (mMemory == nullptr) {
-        LOG(ERROR) << "Memory::create failed";
-        return ANEURALNETWORKS_OUT_OF_MEMORY;
-    }
-    return ANEURALNETWORKS_NO_ERROR;
-}
-
 bool Memory::validateSize(uint32_t offset, uint32_t length) const {
-    if (offset + length > mHidlMemory.size()) {
+    if (offset + length > kHidlMemory.size()) {
         LOG(ERROR) << "Request size larger than the memory size.";
         return false;
-    } else {
-        return true;
     }
+    return true;
 }
 
 intptr_t Memory::getKey() const {
@@ -63,83 +56,98 @@
     mUsedBy.emplace(burst.get(), burst);
 }
 
-MemoryFd::~MemoryFd() {
-    // Unmap the memory.
-    if (mMapping) {
-        munmap(mMapping, mHidlMemory.size());
+std::pair<int, std::unique_ptr<MemoryAshmem>> MemoryAshmem::create(uint32_t size) {
+    hidl_memory hidlMemory = allocateSharedMemory(size);
+    sp<IMemory> mapped = mapMemory(hidlMemory);
+    if (mapped == nullptr || mapped->getPointer() == nullptr) {
+        LOG(ERROR) << "Memory::create failed";
+        return {ANEURALNETWORKS_OUT_OF_MEMORY, nullptr};
     }
-    // Delete the native_handle.
-    if (mHandle) {
-        int fd = mHandle->data[0];
-        if (fd != -1) {
-            close(fd);
-        }
-        native_handle_delete(mHandle);
-    }
+    return {ANEURALNETWORKS_NO_ERROR,
+            std::make_unique<MemoryAshmem>(std::move(mapped), std::move(hidlMemory))};
 }
 
-int MemoryFd::set(size_t size, int prot, int fd, size_t offset) {
+uint8_t* MemoryAshmem::getPointer() const {
+    return static_cast<uint8_t*>(static_cast<void*>(kMappedMemory->getPointer()));
+}
+
+MemoryAshmem::MemoryAshmem(sp<IMemory> mapped, hidl_memory memory)
+    : Memory(std::move(memory)), kMappedMemory(std::move(mapped)) {}
+
+std::pair<int, std::unique_ptr<MemoryFd>> MemoryFd::create(size_t size, int prot, int fd,
+                                                           size_t offset) {
     if (size == 0 || fd < 0) {
         LOG(ERROR) << "Invalid size or fd";
-        return ANEURALNETWORKS_BAD_DATA;
+        return {ANEURALNETWORKS_BAD_DATA, nullptr};
     }
+
+    // Duplicate the file descriptor so MemoryFd owns its own version.
     int dupfd = dup(fd);
     if (dupfd == -1) {
         LOG(ERROR) << "Failed to dup the fd";
-        return ANEURALNETWORKS_UNEXPECTED_NULL;
+        // TODO(b/120417090): is ANEURALNETWORKS_UNEXPECTED_NULL the correct
+        // error to return here?
+        return {ANEURALNETWORKS_UNEXPECTED_NULL, nullptr};
     }
 
-    if (mMapping) {
-        if (munmap(mMapping, mHidlMemory.size()) != 0) {
-            LOG(ERROR) << "Failed to remove the existing mapping";
-            // This is not actually fatal.
-        }
-        mMapping = nullptr;
-    }
-    if (mHandle) {
-        native_handle_delete(mHandle);
-    }
-    mHandle = native_handle_create(1, 3);
-    if (mHandle == nullptr) {
+    // Create a temporary native handle to own the dupfd.
+    native_handle_t* nativeHandle = native_handle_create(1, 3);
+    if (nativeHandle == nullptr) {
         LOG(ERROR) << "Failed to create native_handle";
-        return ANEURALNETWORKS_UNEXPECTED_NULL;
+        // TODO(b/120417090): is ANEURALNETWORKS_UNEXPECTED_NULL the correct
+        // error to return here?
+        return {ANEURALNETWORKS_UNEXPECTED_NULL, nullptr};
     }
-    mHandle->data[0] = dupfd;
-    mHandle->data[1] = prot;
-    mHandle->data[2] = (int32_t)(uint32_t)(offset & 0xffffffff);
-#if defined(__LP64__)
-    mHandle->data[3] = (int32_t)(uint32_t)(offset >> 32);
-#else
-    mHandle->data[3] = 0;
-#endif
-    mHidlMemory = hidl_memory("mmap_fd", mHandle, size);
-    return ANEURALNETWORKS_NO_ERROR;
+    nativeHandle->data[0] = dupfd;
+    nativeHandle->data[1] = prot;
+    const uint64_t bits = static_cast<uint64_t>(offset);
+    nativeHandle->data[2] = (int32_t)(uint32_t)(bits & 0xffffffff);
+    nativeHandle->data[3] = (int32_t)(uint32_t)(bits >> 32);
+
+    // Create a hidl_handle which owns the native handle and fd so that we don't
+    // have to manually clean either the native handle or the fd.
+    hardware::hidl_handle hidlHandle;
+    hidlHandle.setTo(nativeHandle, /*shouldOwn=*/true);
+
+    // Push the hidl_handle into a hidl_memory object. The hidl_memory object is
+    // responsible for cleaning the hidl_handle, the native handle, and the fd.
+    hidl_memory hidlMemory = hidl_memory("mmap_fd", std::move(hidlHandle), size);
+
+    return {ANEURALNETWORKS_NO_ERROR, std::make_unique<MemoryFd>(std::move(hidlMemory))};
 }
 
-int MemoryFd::getPointer(uint8_t** buffer) const {
-    if (mMapping) {
-        *buffer = mMapping;
-        return ANEURALNETWORKS_NO_ERROR;
-    }
+MemoryFd::MemoryFd(hidl_memory memory) : Memory(std::move(memory)) {}
 
-    if (mHandle == nullptr) {
-        LOG(ERROR) << "Memory not initialized";
-        return ANEURALNETWORKS_UNEXPECTED_NULL;
-    }
-
-    int fd = mHandle->data[0];
-    int prot = mHandle->data[1];
-    size_t offset = getSizeFromInts(mHandle->data[2], mHandle->data[3]);
-    void* data = mmap(nullptr, mHidlMemory.size(), prot, MAP_SHARED, fd, offset);
-    if (data == MAP_FAILED) {
-        LOG(ERROR) << "MemoryFd::getPointer(): Can't mmap the file descriptor.";
-        return ANEURALNETWORKS_UNMAPPABLE;
+std::pair<int, std::unique_ptr<MemoryAHWB>> MemoryAHWB::create(const AHardwareBuffer& ahwb) {
+    AHardwareBuffer_Desc bufferDesc;
+    AHardwareBuffer_describe(&ahwb, &bufferDesc);
+    const native_handle_t* handle = AHardwareBuffer_getNativeHandle(&ahwb);
+    hidl_memory hidlMemory;
+    if (bufferDesc.format == AHARDWAREBUFFER_FORMAT_BLOB) {
+        hidlMemory = hidl_memory("hardware_buffer_blob", handle, bufferDesc.width);
     } else {
-        mMapping = *buffer = static_cast<uint8_t*>(data);
-        return ANEURALNETWORKS_NO_ERROR;
+        // memory size is not used.
+        hidlMemory = hidl_memory("hardware_buffer", handle, 0);
     }
+
+    std::unique_ptr<MemoryAHWB> memory =
+            std::make_unique<MemoryAHWB>(bufferDesc, std::move(hidlMemory));
+    return {ANEURALNETWORKS_NO_ERROR, std::move(memory)};
+};
+
+bool MemoryAHWB::validateSize(uint32_t offset, uint32_t length) const {
+    // validateSize should only be called on BLOB mode buffer.
+    if (!kBlobMode) {
+        LOG(ERROR) << "Invalid AHARDWAREBUFFER_FORMAT, must be AHARDWAREBUFFER_FORMAT_BLOB.";
+        return false;
+    }
+    // Use normal validation.
+    return Memory::validateSize(offset, length);
 }
 
+MemoryAHWB::MemoryAHWB(const AHardwareBuffer_Desc& desc, hidl_memory memory)
+    : Memory(std::move(memory)), kBlobMode(desc.format == AHARDWAREBUFFER_FORMAT_BLOB) {}
+
 uint32_t MemoryTracker::add(const Memory* memory) {
     VLOG(MODEL) << __func__ << "(" << SHOW_IF_DEBUG(memory) << ")";
     // See if we already have this memory. If so,
diff --git a/runtime/Memory.h b/runtime/Memory.h
index 3c31c2e..e726da3 100644
--- a/runtime/Memory.h
+++ b/runtime/Memory.h
@@ -21,11 +21,15 @@
 #include "NeuralNetworks.h"
 #include "Utils.h"
 
+#include <android-base/macros.h>
 #include <cutils/native_handle.h>
 #include <sys/mman.h>
 #include <vndk/hardware_buffer.h>
+#include <memory>
 #include <mutex>
 #include <unordered_map>
+#include <utility>
+#include <vector>
 
 namespace android {
 namespace nn {
@@ -35,31 +39,15 @@
 
 // Represents a memory region.
 class Memory {
+    // Disallow copy and assign to prevent slicing
+    DISALLOW_COPY_AND_ASSIGN(Memory);
+
    public:
-    Memory() {}
+    // Custom destructor to notify any ExecutionBurstControllers currently using
+    // this memory that it is being freed.
     virtual ~Memory();
 
-    // Disallow copy semantics to ensure the runtime object can only be freed
-    // once. Copy semantics could be enabled if some sort of reference counting
-    // or deep-copy system for runtime objects is added later.
-    Memory(const Memory&) = delete;
-    Memory& operator=(const Memory&) = delete;
-
-    // Creates a shared memory object of the size specified in bytes.
-    int create(uint32_t size);
-
-    hal::hidl_memory getHidlMemory() const { return mHidlMemory; }
-
-    // Returns a pointer to the underlying memory of this memory object.
-    // The function will fail if the memory is not CPU accessible and nullptr
-    // will be returned.
-    virtual int getPointer(uint8_t** buffer) const {
-        *buffer = static_cast<uint8_t*>(static_cast<void*>(mMemory->getPointer()));
-        if (*buffer == nullptr) {
-            return ANEURALNETWORKS_BAD_DATA;
-        }
-        return ANEURALNETWORKS_NO_ERROR;
-    }
+    const hal::hidl_memory& getHidlMemory() const { return kHidlMemory; }
 
     virtual bool validateSize(uint32_t offset, uint32_t length) const;
 
@@ -72,11 +60,13 @@
     void usedBy(const std::shared_ptr<ExecutionBurstController>& burst) const;
 
    protected:
+    Memory(hal::hidl_memory memory);
+
     // The hidl_memory handle for this shared memory.  We will pass this value when
     // communicating with the drivers.
-    hal::hidl_memory mHidlMemory;
-    sp<hal::IMemory> mMemory;
+    const hal::hidl_memory kHidlMemory;
 
+   private:
     mutable std::mutex mMutex;
     // mUsedBy is essentially a set of burst objects which use this Memory
     // object. However, std::weak_ptr does not have comparison operations nor a
@@ -89,85 +79,63 @@
             mUsedBy;
 };
 
-class MemoryFd : public Memory {
+class MemoryAshmem : public Memory {
    public:
-    MemoryFd() {}
-    ~MemoryFd() override;
+    // Creates a memory object containing a new android shared memory ("ashmem")
+    // object of the size specified in bytes. Because this ashmem region can be
+    // shared with and accessed by one or more driver processes, MemoryAshmem
+    // has shared ownership over the ashmem region.
+    //
+    // On success, returns ANEURALNETWORKS_NO_ERROR and a memory object.
+    // On error, returns the appropriate NNAPI error code and nullptr.
+    static std::pair<int, std::unique_ptr<MemoryAshmem>> create(uint32_t size);
 
-    // Disallow copy semantics to ensure the runtime object can only be freed
-    // once. Copy semantics could be enabled if some sort of reference counting
-    // or deep-copy system for runtime objects is added later.
-    MemoryFd(const MemoryFd&) = delete;
-    MemoryFd& operator=(const MemoryFd&) = delete;
+    // Get a pointer to the ashmem region of memory. The returned pointer is
+    // valid for the lifetime of the MemoryAshmem object. This call always
+    // returns non-null because it was validated during MemoryAshmem::create.
+    uint8_t* getPointer() const;
 
-    // Create the native_handle based on input size, prot, and fd.
-    // Existing native_handle will be deleted, and mHidlMemory will wrap
-    // the newly created native_handle.
-    int set(size_t size, int prot, int fd, size_t offset);
-
-    int getPointer(uint8_t** buffer) const override;
+    // prefer using MemoryAshmem::create
+    MemoryAshmem(sp<hal::IMemory> mapped, hal::hidl_memory memory);
 
    private:
-    native_handle_t* mHandle = nullptr;
-    mutable uint8_t* mMapping = nullptr;
+    const sp<hal::IMemory> kMappedMemory;
 };
 
-// TODO(miaowang): move function definitions to Memory.cpp
+class MemoryFd : public Memory {
+   public:
+    // Create a memory object based on input size, prot, and fd that can be sent
+    // across HIDL. This function duplicates the provided fd, and owns the
+    // duplicate.
+    //
+    // On success, returns ANEURALNETWORKS_NO_ERROR and a memory object.
+    // On error, returns the appropriate NNAPI error code and nullptr.
+    static std::pair<int, std::unique_ptr<MemoryFd>> create(size_t size, int prot, int fd,
+                                                            size_t offset);
+
+    // prefer using MemoryFd::create
+    MemoryFd(hal::hidl_memory memory);
+};
+
 class MemoryAHWB : public Memory {
    public:
-    MemoryAHWB() {}
-    ~MemoryAHWB() override{};
-
-    // Disallow copy semantics to ensure the runtime object can only be freed
-    // once. Copy semantics could be enabled if some sort of reference counting
-    // or deep-copy system for runtime objects is added later.
-    MemoryAHWB(const MemoryAHWB&) = delete;
-    MemoryAHWB& operator=(const MemoryAHWB&) = delete;
-
-    // Keep track of the provided AHardwareBuffer handle.
-    int set(const AHardwareBuffer* ahwb) {
-        AHardwareBuffer_describe(ahwb, &mBufferDesc);
-        const native_handle_t* handle = AHardwareBuffer_getNativeHandle(ahwb);
-        mHardwareBuffer = ahwb;
-        if (mBufferDesc.format == AHARDWAREBUFFER_FORMAT_BLOB) {
-            mHidlMemory = hal::hidl_memory("hardware_buffer_blob", handle, mBufferDesc.width);
-        } else {
-            // memory size is not used.
-            mHidlMemory = hal::hidl_memory("hardware_buffer", handle, 0);
-        }
-        return ANEURALNETWORKS_NO_ERROR;
-    };
-
-    int getPointer(uint8_t** buffer) const override {
-        *buffer = nullptr;
-        return ANEURALNETWORKS_BAD_DATA;
-    };
+    // Create a memory object to keep track of (but not take ownership of) the
+    // provided AHardwareBuffer handle.
+    //
+    // On success, returns ANEURALNETWORKS_NO_ERROR and a memory object.
+    // On error, returns the appropriate NNAPI error code and nullptr.
+    static std::pair<int, std::unique_ptr<MemoryAHWB>> create(const AHardwareBuffer& ahwb);
 
     // validateSize should only be called for blob mode AHardwareBuffer.
     // Calling it on non-blob mode AHardwareBuffer will result in an error.
     // TODO(miaowang): consider separate blob and non-blob into different classes.
-    bool validateSize(uint32_t offset, uint32_t length) const override {
-        if (mHardwareBuffer == nullptr) {
-            LOG(ERROR) << "MemoryAHWB has not been initialized.";
-            return false;
-        }
-        // validateSize should only be called on BLOB mode buffer.
-        if (mBufferDesc.format == AHARDWAREBUFFER_FORMAT_BLOB) {
-            if (offset + length > mBufferDesc.width) {
-                LOG(ERROR) << "Request size larger than the memory size.";
-                return false;
-            } else {
-                return true;
-            }
-        } else {
-            LOG(ERROR) << "Invalid AHARDWAREBUFFER_FORMAT, must be AHARDWAREBUFFER_FORMAT_BLOB.";
-            return false;
-        }
-    }
+    bool validateSize(uint32_t offset, uint32_t length) const override;
+
+    // prefer using MemoryAHWB::create
+    MemoryAHWB(const AHardwareBuffer_Desc& desc, hal::hidl_memory memory);
 
    private:
-    const AHardwareBuffer* mHardwareBuffer = nullptr;
-    AHardwareBuffer_Desc mBufferDesc;
+    const bool kBlobMode;
 };
 
 // A utility class to accumulate mulitple Memory objects and assign each
diff --git a/runtime/ModelBuilder.cpp b/runtime/ModelBuilder.cpp
index 60d96c3..efa79a2 100644
--- a/runtime/ModelBuilder.cpp
+++ b/runtime/ModelBuilder.cpp
@@ -26,6 +26,7 @@
 #include "ValidateHal.h"
 
 #include <map>
+#include <memory>
 #include <utility>
 
 namespace android {
@@ -253,17 +254,12 @@
             poolSize += operand.location.length;
         }
 
-        // Allocated the shared memory.
-        int n = mLargeValueMemory.create(poolSize);
-        if (n != ANEURALNETWORKS_NO_ERROR) {
-            return n;
-        }
-        uint8_t* memoryPointer = nullptr;
-        n = mLargeValueMemory.getPointer(&memoryPointer);
-        if (n != ANEURALNETWORKS_NO_ERROR) {
-            return n;
-        }
-        uint32_t poolIndex = mMemories.add(&mLargeValueMemory);
+        // Allocate the shared memory.
+        int n;
+        std::tie(n, mLargeValueMemory) = MemoryAshmem::create(poolSize);
+        NN_RETURN_IF_ERROR(n);
+        uint8_t* memoryPointer = mLargeValueMemory->getPointer();
+        uint32_t poolIndex = mMemories.add(mLargeValueMemory.get());
         VLOG(MODEL) << "Allocated large value pool of size " << poolSize << " at index "
                     << poolIndex;
 
diff --git a/runtime/ModelBuilder.h b/runtime/ModelBuilder.h
index 4bfcd04..0e604e1 100644
--- a/runtime/ModelBuilder.h
+++ b/runtime/ModelBuilder.h
@@ -20,6 +20,7 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H
 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H
 
+#include <memory>
 #include "HalInterfaces.h"
 #include "Memory.h"
 #include "NeuralNetworks.h"
@@ -160,7 +161,7 @@
     // Operand index and buffer pointer for all the large operand values of this model.
     std::vector<LargeValue> mLargeOperandValues;
     // The shared memory region that will contain the large values.
-    Memory mLargeValueMemory;
+    std::unique_ptr<MemoryAshmem> mLargeValueMemory;
 
     // Once the model has been finished, we should not allow further
     // modifications to the model.
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 987c7f8..4cbb161 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -823,12 +823,10 @@
 int ANeuralNetworksMemory_createFromFd(size_t size, int prot, int fd, size_t offset,
                                        ANeuralNetworksMemory** memory) {
     NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromFd");
-    *memory = nullptr;
-    std::unique_ptr<MemoryFd> m = std::make_unique<MemoryFd>();
-    if (m == nullptr) {
-        return ANEURALNETWORKS_OUT_OF_MEMORY;
-    }
-    int n = m->set(size, prot, fd, offset);
+    *memory = nullptr;  // WARNING: b/138965390
+    int n = ANEURALNETWORKS_NO_ERROR;
+    std::unique_ptr<MemoryFd> m;
+    std::tie(n, m) = MemoryFd::create(size, prot, fd, offset);
     if (n != ANEURALNETWORKS_NO_ERROR) {
         return n;
     }
@@ -839,12 +837,10 @@
 int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb,
                                                     ANeuralNetworksMemory** memory) {
     NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromAHardwareBuffer");
-    *memory = nullptr;
-    std::unique_ptr<MemoryAHWB> m = std::make_unique<MemoryAHWB>();
-    if (m == nullptr) {
-        return ANEURALNETWORKS_OUT_OF_MEMORY;
-    }
-    int n = m->set(ahwb);
+    *memory = nullptr;  // WARNING: b/138965390
+    int n = ANEURALNETWORKS_NO_ERROR;
+    std::unique_ptr<MemoryAHWB> m;
+    std::tie(n, m) = MemoryAHWB::create(*ahwb);
     if (n != ANEURALNETWORKS_NO_ERROR) {
         return n;
     }
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 3a46e2d..b4badd7 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -5649,13 +5649,15 @@
  * offset and length must be set to zero and the entire memory region will be
  * associated with the specified input or output operand. There is no guarantee
  * that an arbitrary AHardwareBuffer_Format and AHardwareBuffer_UsageFlags combination
- * can be used by arbitrary devices. The execution will fail if selected set of devices
- * cannot consume the buffer.
+ * can be used by arbitrary devices. The execution will fail if the selected set of
+ * devices cannot consume the buffer.
  *
  * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared memory
  * backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB is
  * disallowed.
  *
+ * The provided AHardwareBuffer must outlive the ANeuralNetworksMemory object.
+ *
  * Available since API level 29.
  *
  * @param ahwb The AHardwareBuffer handle.
@@ -5971,7 +5973,7 @@
  * called will return an error.
  *
  * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
  *
  * Available since API level 27.
@@ -6256,7 +6258,7 @@
  * buffer and 0 for length.
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
  *
  * Available since API level 27.
@@ -6349,7 +6351,7 @@
  * <p>The provided memory must outlive the execution.</p>
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
  *
  * Available since API level 27.
diff --git a/runtime/test/TestMemoryInternal.cpp b/runtime/test/TestMemoryInternal.cpp
index 8bba825..5657912 100644
--- a/runtime/test/TestMemoryInternal.cpp
+++ b/runtime/test/TestMemoryInternal.cpp
@@ -178,54 +178,6 @@
     close(outputFd);
 }
 
-// Regression test for http://b/69621433 "MemoryFd leaks shared memory regions".
-TEST_F(MemoryLeakTest, GetPointer) {
-    static const size_t size = 1;
-
-    int fd = ASharedMemory_create(nullptr, size);
-    ASSERT_GE(fd, 0);
-
-    uint8_t* buf = (uint8_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-    ASSERT_NE(buf, nullptr);
-    *buf = 0;
-
-    {
-        // Scope "mem" in such a way that any shared memory regions it
-        // owns will be released before we check the value of *buf: We
-        // want to verify that the explicit mmap() above is not
-        // perturbed by any mmap()/munmap() that results from methods
-        // invoked on "mem".
-
-        WrapperMemory mem(size, PROT_READ | PROT_WRITE, fd, 0);
-        ASSERT_TRUE(mem.isValid());
-
-        auto internalMem = reinterpret_cast<::android::nn::Memory*>(mem.get());
-        uint8_t* dummy;
-        ASSERT_EQ(internalMem->getPointer(&dummy), ANEURALNETWORKS_NO_ERROR);
-        (*dummy)++;
-    }
-
-    ASSERT_EQ(*buf, (uint8_t)1);
-    ASSERT_EQ(munmap(buf, size), 0);
-
-    close(fd);
-}
-
-// Regression test for http://b/69621433 "MemoryFd leaks shared memory regions".
-TEST_F(MemoryLeakTest, Instantiate) {
-    static const size_t size = 1;
-    int fd = ASharedMemory_create(nullptr, size);
-    ASSERT_GE(fd, 0);
-    WrapperMemory mem(size, PROT_READ | PROT_WRITE, fd, 0);
-    ASSERT_TRUE(mem.isValid());
-
-    auto internalMem = reinterpret_cast<::android::nn::Memory*>(mem.get());
-    uint8_t* dummy;
-    ASSERT_EQ(internalMem->getPointer(&dummy), ANEURALNETWORKS_NO_ERROR);
-
-    close(fd);
-}
-
 #ifndef NNTEST_ONLY_PUBLIC_API
 // Regression test for http://b/73663843, conv_2d trying to allocate too much memory.
 TEST_F(MemoryLeakTest, convTooLarge) {
diff --git a/runtime/test/TestNeuralNetworksWrapper.h b/runtime/test/TestNeuralNetworksWrapper.h
index e378170..8bc8a9e 100644
--- a/runtime/test/TestNeuralNetworksWrapper.h
+++ b/runtime/test/TestNeuralNetworksWrapper.h
@@ -38,13 +38,167 @@
 using wrapper::ExtensionModel;
 using wrapper::ExtensionOperandParams;
 using wrapper::ExtensionOperandType;
-using wrapper::Memory;
-using wrapper::Model;
 using wrapper::OperandType;
 using wrapper::Result;
 using wrapper::SymmPerChannelQuantParams;
 using wrapper::Type;
 
+class Memory {
+   public:
+    // Takes ownership of a ANeuralNetworksMemory
+    Memory(ANeuralNetworksMemory* memory) : mMemory(memory) {}
+
+    Memory(size_t size, int protect, int fd, size_t offset) {
+        mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
+                 ANEURALNETWORKS_NO_ERROR;
+    }
+
+    Memory(AHardwareBuffer* buffer) {
+        mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
+                 ANEURALNETWORKS_NO_ERROR;
+    }
+
+    ~Memory() { ANeuralNetworksMemory_free(mMemory); }
+
+    // Disallow copy semantics to ensure the runtime object can only be freed
+    // once. Copy semantics could be enabled if some sort of reference counting
+    // or deep-copy system for runtime objects is added later.
+    Memory(const Memory&) = delete;
+    Memory& operator=(const Memory&) = delete;
+
+    // Move semantics to remove access to the runtime object from the wrapper
+    // object that is being moved. This ensures the runtime object will be
+    // freed only once.
+    Memory(Memory&& other) { *this = std::move(other); }
+    Memory& operator=(Memory&& other) {
+        if (this != &other) {
+            ANeuralNetworksMemory_free(mMemory);
+            mMemory = other.mMemory;
+            mValid = other.mValid;
+            other.mMemory = nullptr;
+            other.mValid = false;
+        }
+        return *this;
+    }
+
+    ANeuralNetworksMemory* get() const { return mMemory; }
+    bool isValid() const { return mValid; }
+
+   private:
+    ANeuralNetworksMemory* mMemory = nullptr;
+    bool mValid = true;
+};
+
+class Model {
+   public:
+    Model() {
+        // TODO handle the value returned by this call
+        ANeuralNetworksModel_create(&mModel);
+    }
+    ~Model() { ANeuralNetworksModel_free(mModel); }
+
+    // Disallow copy semantics to ensure the runtime object can only be freed
+    // once. Copy semantics could be enabled if some sort of reference counting
+    // or deep-copy system for runtime objects is added later.
+    Model(const Model&) = delete;
+    Model& operator=(const Model&) = delete;
+
+    // Move semantics to remove access to the runtime object from the wrapper
+    // object that is being moved. This ensures the runtime object will be
+    // freed only once.
+    Model(Model&& other) { *this = std::move(other); }
+    Model& operator=(Model&& other) {
+        if (this != &other) {
+            ANeuralNetworksModel_free(mModel);
+            mModel = other.mModel;
+            mNextOperandId = other.mNextOperandId;
+            mValid = other.mValid;
+            other.mModel = nullptr;
+            other.mNextOperandId = 0;
+            other.mValid = false;
+        }
+        return *this;
+    }
+
+    Result finish() {
+        if (mValid) {
+            auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
+            if (result != Result::NO_ERROR) {
+                mValid = false;
+            }
+            return result;
+        } else {
+            return Result::BAD_STATE;
+        }
+    }
+
+    uint32_t addOperand(const OperandType* type) {
+        if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
+            ANEURALNETWORKS_NO_ERROR) {
+            mValid = false;
+        }
+        if (type->channelQuant) {
+            if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
+                        mModel, mNextOperandId, &type->channelQuant.value().params) !=
+                ANEURALNETWORKS_NO_ERROR) {
+                mValid = false;
+            }
+        }
+        return mNextOperandId++;
+    }
+
+    void setOperandValue(uint32_t index, const void* buffer, size_t length) {
+        if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
+            ANEURALNETWORKS_NO_ERROR) {
+            mValid = false;
+        }
+    }
+
+    void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
+                                   size_t length) {
+        if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
+                                                           length) != ANEURALNETWORKS_NO_ERROR) {
+            mValid = false;
+        }
+    }
+
+    void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
+                      const std::vector<uint32_t>& outputs) {
+        if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
+                                              inputs.data(), static_cast<uint32_t>(outputs.size()),
+                                              outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
+            mValid = false;
+        }
+    }
+    void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
+                                  const std::vector<uint32_t>& outputs) {
+        if (ANeuralNetworksModel_identifyInputsAndOutputs(
+                    mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
+                    static_cast<uint32_t>(outputs.size()),
+                    outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
+            mValid = false;
+        }
+    }
+
+    void relaxComputationFloat32toFloat16(bool isRelax) {
+        if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
+            ANEURALNETWORKS_NO_ERROR) {
+            mRelaxed = isRelax;
+        }
+    }
+
+    ANeuralNetworksModel* getHandle() const { return mModel; }
+    bool isValid() const { return mValid; }
+    bool isRelaxed() const { return mRelaxed; }
+
+   protected:
+    ANeuralNetworksModel* mModel = nullptr;
+    // We keep track of the operand ID as a convenience to the caller.
+    uint32_t mNextOperandId = 0;
+    bool mValid = true;
+    bool mRelaxed = false;
+};
+
 class Compilation {
    public:
     Compilation(const Model* model) {
diff --git a/runtime/test/TestPartitioningRandom.cpp b/runtime/test/TestPartitioningRandom.cpp
index b7326e5..e591f16 100644
--- a/runtime/test/TestPartitioningRandom.cpp
+++ b/runtime/test/TestPartitioningRandom.cpp
@@ -30,6 +30,7 @@
 #include <cassert>
 #include <cstdio>
 #include <iterator>
+#include <memory>
 #include <random>
 #include <set>
 #include <tuple>
@@ -39,7 +40,6 @@
 #include <unistd.h>
 
 #include <android-base/logging.h>
-#include <android/sharedmem.h>
 #include <gtest/gtest.h>
 
 // Uncomment the following line to generate some debugging output that
@@ -98,7 +98,6 @@
 using HalVersion = nn::HalVersion;
 using HidlModel = V1_2::Model;
 using HidlToken = hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
-using MemoryBuilder = nn::Memory;
 using ModelBuilder = nn::ModelBuilder;
 using Result = nn::test_wrapper::Result;
 using SampleDriver = nn::sample_driver::SampleDriver;
@@ -252,7 +251,6 @@
 class TestMemories {
    public:
     TestMemories() = default;
-    ~TestMemories();
 
     TestMemories(const TestMemories&) = delete;
     TestMemories& operator=(const TestMemories&) = delete;
@@ -282,15 +280,12 @@
         CHECK(mLayoutDone);
         CHECK(regionIndex < regionCount());
         const auto& regionDescriptor = mRegions[regionIndex];
-        const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
+        const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
         uint32_t offset = std::get<1>(regionDescriptor);
         uint32_t length = std::get<2>(regionDescriptor);
 
-        uint8_t* buffer;
-        if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
-            ANEURALNETWORKS_NO_ERROR) {
-            CHECK(0);
-        }
+        uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
+        CHECK(buffer != nullptr);
 
         if (pMemory) *pMemory = memory;
         if (pOffset) *pOffset = offset;
@@ -309,8 +304,7 @@
     std::vector<uint32_t> mMemorySizes;
 
     // Index is the memory index.
-    std::vector<WrapperMemory> mMemorys;
-    std::vector<int> mFDs;
+    std::vector<WrapperMemory> mMemories;
 
     // Index is the region index; tuple represents memory index,
     // region offset within memory, region length.
@@ -323,20 +317,16 @@
 void TestMemories::layout() {
     CHECK(!mLayoutDone);
     for (uint32_t memorySize : mMemorySizes) {
-        const int fd = ASharedMemory_create(nullptr, memorySize);
-        CHECK(fd >= 0);
-        mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
-        mFDs.push_back(fd);
+        auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
+        CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
+        CHECK(ashmem != nullptr);
+
+        ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
+        mMemories.emplace_back(memory);
     }
     mLayoutDone = true;
 }
 
-TestMemories::~TestMemories() {
-    for (int fd : mFDs) {
-        close(fd);
-    }
-}
-
 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
    public:
     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}