Prevent race between QSRI and VkImage destruction
... by tracking pending sync fds created during QSRI and waiting
for all pending sync fds during image destruction.
The race is that the host may process a `vkDestroyImage()` from
the VK encoder command stream before processing an earlier
submitted `VIRTIO_GPU_NATIVE_SYNC_VULKAN_QSRI_EXPORT` from the
virtio gpu command stream.
Bug: b/237003428
Test: `cvd start --gpu_mode=gfxstream --enable_gpu_enable=true`
and repeatedly open and close apps quickly.
Change-Id: Iaa5c3a480958ce05f57362ce98d8dde9e048e4ed
diff --git a/system/vulkan_enc/ResourceTracker.cpp b/system/vulkan_enc/ResourceTracker.cpp
index bf700c4..9c51adb 100644
--- a/system/vulkan_enc/ResourceTracker.cpp
+++ b/system/vulkan_enc/ResourceTracker.cpp
@@ -366,6 +366,7 @@
#ifdef VK_USE_PLATFORM_ANDROID_KHR
bool hasExternalFormat = false;
unsigned androidFormat = 0;
+ std::vector<int> pendingQsriSyncFds;
#endif
#ifdef VK_USE_PLATFORM_FUCHSIA
bool isSysmemBackedMemory = false;
@@ -5864,6 +5865,30 @@
void on_vkDestroyImage(
void* context,
VkDevice device, VkImage image, const VkAllocationCallbacks *pAllocator) {
+
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+ AutoLock<RecursiveLock> lock(mLock);
+
+ // Wait for any pending QSRIs to prevent a race between the Gfxstream host
+ // potentially processing the below `vkDestroyImage()` from the VK encoder
+ // command stream before processing a previously submitted
+ // `VIRTIO_GPU_NATIVE_SYNC_VULKAN_QSRI_EXPORT` from the virtio-gpu command
+ // stream which relies on the image existing.
+ auto imageInfoIt = info_VkImage.find(image);
+ if (imageInfoIt != info_VkImage.end()) {
+ auto& imageInfo = imageInfoIt->second;
+ for (int syncFd : imageInfo.pendingQsriSyncFds) {
+ int syncWaitRet = sync_wait(syncFd, 3000);
+ if (syncWaitRet < 0) {
+ ALOGE("%s: Failed to wait for pending QSRI sync: sterror: %s errno: %d",
+ __func__, strerror(errno), errno);
+ }
+ close(syncFd);
+ }
+ imageInfo.pendingQsriSyncFds.clear();
+ }
+#endif
+
VkEncoder* enc = (VkEncoder*)context;
enc->vkDestroyImage(device, image, pAllocator, true /* do lock */);
}
@@ -7775,7 +7800,7 @@
return res;
}
- int exportSyncFdForQSRI(VkImage image) {
+ int exportSyncFdForQSRILocked(VkImage image) {
int fd = -1;
#if defined(VK_USE_PLATFORM_ANDROID_KHR) || defined(__linux__)
ALOGV("%s: call for image %p hos timage handle 0x%llx\n", __func__, (void*)image,
@@ -7823,6 +7848,40 @@
}
ALOGV("%s: got fd: %d\n", __func__, fd);
#endif
+
+#if defined(VK_USE_PLATFORM_ANDROID_KHR)
+ auto imageInfoIt = info_VkImage.find(image);
+ if (imageInfoIt != info_VkImage.end()) {
+ auto& imageInfo = imageInfoIt->second;
+
+ // Remove any pending QSRI sync fds that are already signaled.
+ auto syncFdIt = imageInfo.pendingQsriSyncFds.begin();
+ while (syncFdIt != imageInfo.pendingQsriSyncFds.end()) {
+ int syncFd = *syncFdIt;
+ int syncWaitRet = sync_wait(syncFd, /*timeout msecs*/0);
+ if (syncWaitRet == 0) {
+ // Sync fd is signaled.
+ syncFdIt = imageInfo.pendingQsriSyncFds.erase(syncFdIt);
+ close(syncFd);
+ } else {
+ if (errno != ETIME) {
+ ALOGE("%s: Failed to wait for pending QSRI sync: sterror: %s errno: %d",
+ __func__, strerror(errno), errno);
+ }
+ break;
+ }
+ }
+
+ int syncFdDup = dup(fd);
+ if (syncFdDup < 0) {
+ ALOGE("%s: Failed to dup() QSRI sync fd : sterror: %s errno: %d",
+ __func__, strerror(errno), errno);
+ } else {
+ imageInfo.pendingQsriSyncFds.push_back(syncFdDup);
+ }
+ }
+#endif
+
return fd;
}
@@ -7858,9 +7917,9 @@
if (pNativeFenceFd) {
*pNativeFenceFd =
- exportSyncFdForQSRI(image);
+ exportSyncFdForQSRILocked(image);
} else {
- int syncFd = exportSyncFdForQSRI(image);
+ int syncFd = exportSyncFdForQSRILocked(image);
if (syncFd >= 0) close(syncFd);
}
#endif