Use dedicated allocs for buffer device address capture/replay
When using Vulkan buffer device addresses for capture/replay,
the VkDeviceMemory backing the buffers needs to have been allocated
with device address support. This isn't compatible with our current
model of suballocating host visible memories from blocks, at least not
without significantly more work emulating buffer device addresses
ourselves in the guest.
Bug: 189960236
Test: dEQP-VK.binding_mode.buffer_device_address.capture_replay_stress*
all pass
Change-Id: Iaafdb579c4fdd06b90db4a27df0d5d71ecb01e8a
diff --git a/system/vulkan_enc/ResourceTracker.cpp b/system/vulkan_enc/ResourceTracker.cpp
index 25c3065..565e139 100644
--- a/system/vulkan_enc/ResourceTracker.cpp
+++ b/system/vulkan_enc/ResourceTracker.cpp
@@ -546,6 +546,7 @@
}
if (memInfo.directMapped) {
+ ALOGE("%s: warning: direct mapped memory never goes to unregister!\n", __func__);
subFreeHostMemory(&memInfo.subAlloc);
}
@@ -2944,16 +2945,37 @@
HostMemBlockIndex res = 0;
bool found = false;
+ VkMemoryAllocateFlagsInfo allocFlagsInfo;
+ VkMemoryOpaqueCaptureAddressAllocateInfo opaqueCaptureAddressAllocInfo;
+
+ // Add buffer device address capture structs
+ const VkMemoryAllocateFlagsInfo* allocFlagsInfoPtr =
+ vk_find_struct<VkMemoryAllocateFlagsInfo>(pAllocateInfo);
+ const VkMemoryOpaqueCaptureAddressAllocateInfo* opaqueCaptureAddressAllocInfoPtr =
+ vk_find_struct<VkMemoryOpaqueCaptureAddressAllocateInfo>(pAllocateInfo);
+
+ bool isDeviceAddressMemoryAllocation =
+ allocFlagsInfoPtr && ((allocFlagsInfoPtr->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT) ||
+ (allocFlagsInfoPtr->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT));
+ bool isDedicated = isDeviceAddressMemoryAllocation;
+
while (!found) {
- for (HostMemBlockIndex i = 0; i < blocks.size(); ++i) {
- if (blocks[i].initialized &&
- blocks[i].initResult == VK_SUCCESS &&
- canSubAlloc(
- blocks[i].subAlloc,
- pAllocateInfo->allocationSize)) {
- res = i;
- found = true;
- return res;
+ // If we need a dedicated host mapping, found = true necessarily
+ if (isDedicated) {
+ found = true;
+ } else {
+ for (HostMemBlockIndex i = 0; i < blocks.size(); ++i) {
+ if (blocks[i].initialized &&
+ blocks[i].initResult == VK_SUCCESS &&
+ !blocks[i].isDedicated &&
+ blocks[i].isDeviceAddressMemoryAllocation == isDeviceAddressMemoryAllocation &&
+ canSubAlloc(
+ blocks[i].subAlloc,
+ pAllocateInfo->allocationSize)) {
+ res = i;
+ found = true;
+ return res;
+ }
}
}
@@ -2961,13 +2983,21 @@
auto& hostMemAlloc = blocks.back();
+ hostMemAlloc.isDedicated = isDedicated;
+
// Uninitialized block; allocate on host.
static constexpr VkDeviceSize oneMb = 1048576;
+ // This needs to be a power of 2 that is at least the min alignment needed in HostVisibleMemoryVirtualization.cpp.
+ static constexpr VkDeviceSize biggestPage = 65536;
static constexpr VkDeviceSize kDefaultHostMemBlockSize =
16 * oneMb; // 16 mb
VkDeviceSize roundedUpAllocSize =
oneMb * ((pAllocateInfo->allocationSize + oneMb - 1) / oneMb);
+ // If dedicated, use a smaller "page rounded alloc size".
+ VkDeviceSize pageRoundedAllocSize =
+ biggestPage * ((pAllocateInfo->allocationSize + biggestPage - 1) / biggestPage);
+
VkDeviceSize virtualHeapSize = VIRTUAL_HOST_VISIBLE_HEAP_SIZE;
VkDeviceSize blockSizeNeeded =
@@ -2975,12 +3005,32 @@
std::min(virtualHeapSize,
kDefaultHostMemBlockSize));
- VkMemoryAllocateInfo allocInfoForHost = *pAllocateInfo;
+ VkMemoryAllocateInfo allocInfoForHost = vk_make_orphan_copy(*pAllocateInfo);
+ vk_struct_chain_iterator structChainIter = vk_make_chain_iterator(&allocInfoForHost);
allocInfoForHost.allocationSize = blockSizeNeeded;
+ if (isDedicated) {
+ allocInfoForHost.allocationSize = pageRoundedAllocSize;
+ }
+
// TODO: Support dedicated/external host visible allocation
- allocInfoForHost.pNext = nullptr;
+
+ // Support device address capture/replay allocations
+ if (isDeviceAddressMemoryAllocation) {
+ hostMemAlloc.isDeviceAddressMemoryAllocation = true;
+ if (allocFlagsInfoPtr) {
+ ALOGV("%s: has alloc flags\n", __func__);
+ allocFlagsInfo = *allocFlagsInfoPtr;
+ vk_append_struct(&structChainIter, &allocFlagsInfo);
+ }
+
+ if (opaqueCaptureAddressAllocInfoPtr) {
+ ALOGV("%s: has opaque capture address\n", __func__);
+ opaqueCaptureAddressAllocInfo = *opaqueCaptureAddressAllocInfoPtr;
+ vk_append_struct(&structChainIter, &opaqueCaptureAddressAllocInfo);
+ }
+ }
mLock.unlock();
VkResult host_res =
@@ -3010,13 +3060,14 @@
uint64_t directMappedAddr = 0;
-
VkResult directMapResult = VK_SUCCESS;
if (mFeatureInfo->hasDirectMem) {
mLock.unlock();
directMapResult =
enc->vkMapMemoryIntoAddressSpaceGOOGLE(
device, hostMemAlloc.memory, &directMappedAddr, true /* do lock */);
+ ALOGV("%s: direct mapped addr 0x%llx\n", __func__,
+ (unsigned long long)directMappedAddr);
mLock.lock();
} else if (mFeatureInfo->hasVirtioGpuNext) {
#if !defined(HOST_BUILD) && defined(VK_USE_PLATFORM_ANDROID_KHR)
@@ -3081,6 +3132,7 @@
}
if (directMapResult != VK_SUCCESS) {
+ ALOGE("%s: error: directMapResult != VK_SUCCESS\n", __func__);
hostMemAlloc.initialized = true;
hostMemAlloc.initResult = directMapResult;
mLock.unlock();
@@ -3092,6 +3144,7 @@
hostMemInfo.mappedPtr =
(uint8_t*)(uintptr_t)directMappedAddr;
hostMemInfo.virtualHostVisibleBacking = true;
+ ALOGV("%s: Set mapped ptr to %p\n", __func__, hostMemInfo.mappedPtr);
VkResult hostMemAllocRes =
finishHostMemAllocInit(
@@ -3107,6 +3160,11 @@
if (hostMemAllocRes != VK_SUCCESS) {
return INVALID_HOST_MEM_BLOCK;
}
+
+ if (isDedicated) {
+ ALOGV("%s: New dedicated block at %zu\n", __func__, blocks.size() - 1);
+ return blocks.size() - 1;
+ }
}
// unreacheable, but we need to make Werror happy
@@ -3171,6 +3229,27 @@
VkMemoryAllocateInfo finalAllocInfo = vk_make_orphan_copy(*pAllocateInfo);
vk_struct_chain_iterator structChainIter = vk_make_chain_iterator(&finalAllocInfo);
+ VkMemoryAllocateFlagsInfo allocFlagsInfo;
+ VkMemoryOpaqueCaptureAddressAllocateInfo opaqueCaptureAddressAllocInfo;
+
+ // Add buffer device address capture structs
+ const VkMemoryAllocateFlagsInfo* allocFlagsInfoPtr =
+ vk_find_struct<VkMemoryAllocateFlagsInfo>(pAllocateInfo);
+ const VkMemoryOpaqueCaptureAddressAllocateInfo* opaqueCaptureAddressAllocInfoPtr =
+ vk_find_struct<VkMemoryOpaqueCaptureAddressAllocateInfo>(pAllocateInfo);
+
+ if (allocFlagsInfoPtr) {
+ ALOGV("%s: has alloc flags\n", __func__);
+ allocFlagsInfo = *allocFlagsInfoPtr;
+ vk_append_struct(&structChainIter, &allocFlagsInfo);
+ }
+
+ if (opaqueCaptureAddressAllocInfoPtr) {
+ ALOGV("%s: has opaque capture address\n", __func__);
+ opaqueCaptureAddressAllocInfo = *opaqueCaptureAddressAllocInfoPtr;
+ vk_append_struct(&structChainIter, &opaqueCaptureAddressAllocInfo);
+ }
+
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo;
VkImportColorBufferGOOGLE importCbInfo = {
VK_STRUCTURE_TYPE_IMPORT_COLOR_BUFFER_GOOGLE, 0,
@@ -3865,7 +3944,60 @@
return;
}
- subFreeHostMemory(&info.subAlloc);
+ VkDeviceMemory baseMemory = info.subAlloc.baseMemory;
+ uint32_t memoryTypeIndex = info.subAlloc.memoryTypeIndex;
+ bool isDeviceAddressMemoryAllocation = info.subAlloc.isDeviceAddressMemoryAllocation;
+ // If this was a device address memory allocation,
+ // free it right away.
+ // TODO: Retest with eagerly freeing other kinds of host visible
+ // allocs as well
+ if (subFreeHostMemory(&info.subAlloc) && isDeviceAddressMemoryAllocation) {
+ ALOGV("%s: Last free for this device-address block, "
+ "free on host and clear block contents\n", __func__);
+ ALOGV("%s: baseMem 0x%llx this mem 0x%llx\n", __func__,
+ (unsigned long long)baseMemory,
+ (unsigned long long)memory);
+ VkEncoder* enc = (VkEncoder*)context;
+ bool freeMemorySyncSupported =
+ mFeatureInfo->hasVulkanFreeMemorySync;
+
+ auto it = info_VkDevice.find(device);
+ if (it == info_VkDevice.end()) {
+ ALOGE("%s: Last free: could not find device\n", __func__);
+ return;
+ }
+
+ auto& deviceInfo = it->second;
+
+ auto& hostMemBlocksForTypeIndex =
+ deviceInfo.hostMemBlocks[memoryTypeIndex];
+
+ size_t indexToRemove = 0;
+ bool found = false;
+ for (const auto& allocInfo : hostMemBlocksForTypeIndex) {
+ if (baseMemory == allocInfo.memory) {
+ found = true;
+ break;
+ }
+ ++indexToRemove;
+ }
+
+ if (!found) {
+ ALOGE("%s: Last free: could not find original block\n", __func__);
+ return;
+ }
+
+ ALOGV("%s: Destroying host mem alloc block at index %zu\n", __func__, indexToRemove);
+
+ destroyHostMemAlloc(
+ freeMemorySyncSupported,
+ enc, device,
+ hostMemBlocksForTypeIndex.data() + indexToRemove);
+
+ ALOGV("%s: Destroying host mem alloc block at index %zu (done)\n", __func__, indexToRemove);
+
+ hostMemBlocksForTypeIndex.erase(hostMemBlocksForTypeIndex.begin() + indexToRemove);
+ }
}
VkResult on_vkMapMemory(
@@ -3878,19 +4010,33 @@
VkMemoryMapFlags,
void** ppData) {
- if (host_result != VK_SUCCESS) return host_result;
+ if (host_result != VK_SUCCESS) {
+ ALOGE("%s: Host failed to map\n", __func__);
+ return host_result;
+ }
AutoLock lock(mLock);
auto it = info_VkDeviceMemory.find(memory);
- if (it == info_VkDeviceMemory.end()) return VK_ERROR_MEMORY_MAP_FAILED;
+ if (it == info_VkDeviceMemory.end()) {
+ ALOGE("%s: Could not find this device memory\n", __func__);
+ return VK_ERROR_MEMORY_MAP_FAILED;
+ }
auto& info = it->second;
- if (!info.mappedPtr) return VK_ERROR_MEMORY_MAP_FAILED;
+ if (!info.mappedPtr) {
+ ALOGE("%s: mappedPtr null\n", __func__);
+ return VK_ERROR_MEMORY_MAP_FAILED;
+ }
if (size != VK_WHOLE_SIZE &&
(info.mappedPtr + offset + size > info.mappedPtr + info.allocationSize)) {
+ ALOGE("%s: size is too big. alloc size 0x%llx while we wanted offset 0x%llx size 0x%llx total 0x%llx\n", __func__,
+ (unsigned long long)info.allocationSize,
+ (unsigned long long)offset,
+ (unsigned long long)size,
+ (unsigned long long)offset);
return VK_ERROR_MEMORY_MAP_FAILED;
}