Revert "Revert "gfxstream: stream-renderer: add waitSyncResource..."

Revert submission 3125001

BUG=346348303
TEST=atbd

Change-Id: Iff294455e95093e80cbcb66f36d0a280c48299b3
diff --git a/host/ColorBuffer.cpp b/host/ColorBuffer.cpp
index a65ed83..1664680 100644
--- a/host/ColorBuffer.cpp
+++ b/host/ColorBuffer.cpp
@@ -443,6 +443,18 @@
     }
 }
 
+int ColorBuffer::waitSync() {
+    if (mColorBufferGl) {
+        return -1;
+    }
+
+    if (!mColorBufferVk) {
+        return -1;
+    }
+
+    return mColorBufferVk->waitSync();
+}
+
 #if GFXSTREAM_ENABLE_HOST_GLES
 bool ColorBuffer::glOpBlitFromCurrentReadBuffer() {
     if (!mColorBufferGl) {
diff --git a/host/ColorBuffer.h b/host/ColorBuffer.h
index 2a29a1e..43a3330 100644
--- a/host/ColorBuffer.h
+++ b/host/ColorBuffer.h
@@ -93,6 +93,8 @@
     bool invalidateForVk();
     bool importNativeResource(void* nativeResource, uint32_t type, bool preserveContent);
 
+    int waitSync();
+
 #if GFXSTREAM_ENABLE_HOST_GLES
     GLuint glOpGetTexture();
     bool glOpBlitFromCurrentReadBuffer();
diff --git a/host/FrameBuffer.cpp b/host/FrameBuffer.cpp
index 1af6919..3014704 100644
--- a/host/FrameBuffer.cpp
+++ b/host/FrameBuffer.cpp
@@ -2970,6 +2970,17 @@
     return colorBuffer->invalidateForVk();
 }
 
+int FrameBuffer::waitSyncColorBuffer(HandleType colorBufferHandle) {
+    AutoLock mutex(m_lock);
+
+    ColorBufferPtr colorBuffer = findColorBuffer(colorBufferHandle);
+    if (!colorBuffer) {
+        return -1;
+    }
+
+    return colorBuffer->waitSync();
+}
+
 #if GFXSTREAM_ENABLE_HOST_GLES
 HandleType FrameBuffer::getEmulatedEglWindowSurfaceColorBufferHandle(HandleType p_surface) {
     AutoLock mutex(m_lock);
diff --git a/host/FrameBuffer.h b/host/FrameBuffer.h
index ace590f..9e7f58a 100644
--- a/host/FrameBuffer.h
+++ b/host/FrameBuffer.h
@@ -497,6 +497,8 @@
                                      size_t bytesSize);
     bool invalidateColorBufferForVk(HandleType colorBufferHandle);
 
+    int waitSyncColorBuffer(HandleType colorBufferHandle);
+
 #if GFXSTREAM_ENABLE_HOST_GLES
     // Retrieves the color buffer handle associated with |p_surface|.
     // Returns 0 if there is no such handle.
diff --git a/host/RendererImpl.cpp b/host/RendererImpl.cpp
index a8fe16c..6387a51 100644
--- a/host/RendererImpl.cpp
+++ b/host/RendererImpl.cpp
@@ -672,6 +672,8 @@
             return FrameBuffer::getFB()->platformDestroySharedEglContext(context);
         },
 #endif
+    .wait_sync_color_buffer =
+        [](uint32_t handle) { return FrameBuffer::getFB()->waitSyncColorBuffer(handle); },
 };
 
 struct AndroidVirtioGpuOps* RendererImpl::getVirtioGpuOps() {
diff --git a/host/include/gfxstream/virtio-gpu-gfxstream-renderer.h b/host/include/gfxstream/virtio-gpu-gfxstream-renderer.h
index 9e8d280..9a97be8 100644
--- a/host/include/gfxstream/virtio-gpu-gfxstream-renderer.h
+++ b/host/include/gfxstream/virtio-gpu-gfxstream-renderer.h
@@ -238,6 +238,8 @@
 VG_EXPORT int stream_renderer_vulkan_info(uint32_t res_handle,
                                           struct stream_renderer_vulkan_info* vulkan_info);
 
+VG_EXPORT int stream_renderer_wait_sync_resource(uint32_t res_handle);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/host/virtio-gpu-gfxstream-renderer.cpp b/host/virtio-gpu-gfxstream-renderer.cpp
index 9bd4aa5..cf058d4 100644
--- a/host/virtio-gpu-gfxstream-renderer.cpp
+++ b/host/virtio-gpu-gfxstream-renderer.cpp
@@ -1797,6 +1797,21 @@
         return success ? 0 : -1;
     }
 
+    int waitSyncResource(uint32_t res_handle) {
+        auto it = mResources.find(res_handle);
+        if (it == mResources.end()) {
+            stream_renderer_error("waitSyncResource could not find resource: %d", res_handle);
+            return -EINVAL;
+        }
+        auto& entry = it->second;
+        if (ResType::COLOR_BUFFER != entry.type) {
+            stream_renderer_error("waitSyncResource is undefined for non-ColorBuffer resource.");
+            return -EINVAL;
+        }
+
+        return mVirtioGpuOps->wait_sync_color_buffer(res_handle);
+    }
+
     int resourceMapInfo(uint32_t res_handle, uint32_t* map_info) {
         auto it = mResources.find(res_handle);
         if (it == mResources.end()) return -EINVAL;
@@ -2101,6 +2116,10 @@
     return sRenderer()->platformDestroySharedEglContext(context);
 }
 
+VG_EXPORT int stream_renderer_wait_sync_resource(uint32_t res_handle) {
+    return sRenderer()->waitSyncResource(res_handle);
+}
+
 VG_EXPORT int stream_renderer_resource_map_info(uint32_t res_handle, uint32_t* map_info) {
     return sRenderer()->resourceMapInfo(res_handle, map_info);
 }
diff --git a/host/vulkan/ColorBufferVk.cpp b/host/vulkan/ColorBufferVk.cpp
index 40fac75..9461510 100644
--- a/host/vulkan/ColorBufferVk.cpp
+++ b/host/vulkan/ColorBufferVk.cpp
@@ -78,5 +78,7 @@
     return importExtMemoryHandleToVkColorBuffer(mHandle, type, extMemoryHandle);
 }
 
+int ColorBufferVk::waitSync() { return waitSyncVkColorBuffer(mHandle); }
+
 }  // namespace vk
 }  // namespace gfxstream
diff --git a/host/vulkan/ColorBufferVk.h b/host/vulkan/ColorBufferVk.h
index 264628c..81e1e42 100644
--- a/host/vulkan/ColorBufferVk.h
+++ b/host/vulkan/ColorBufferVk.h
@@ -42,6 +42,8 @@
 
     void onSave(android::base::Stream* stream);
 
+    int waitSync();
+
    private:
     ColorBufferVk(uint32_t handle);
 
diff --git a/host/vulkan/DeviceOpTracker.cpp b/host/vulkan/DeviceOpTracker.cpp
index c73eb0d..bc36554 100644
--- a/host/vulkan/DeviceOpTracker.cpp
+++ b/host/vulkan/DeviceOpTracker.cpp
@@ -67,27 +67,29 @@
     }
 }
 
-void DeviceOpTracker::PollAndProcessGarbage() {
-    {
-        std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
+void DeviceOpTracker::Poll() {
+    std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
 
-        // Assuming that polling functions are added to the queue in the roughly the order
-        // they are used, encountering an unsignaled/pending polling functions likely means
-        // that all polling functions after are also still pending. This might not necessarily
-        // always be the case but it is a simple heuristic to try to minimize the amount of
-        // work performed here as it is expected that this function will be called while
-        // processing other guest vulkan functions.
-        auto firstPendingIt = std::find_if(mPollFunctions.begin(), mPollFunctions.end(),
-                                           [](const OpPollingFunction& pollingFunc) {
-                                               DeviceOpStatus status = pollingFunc();
-                                               return status == DeviceOpStatus::kPending;
-                                           });
-        mPollFunctions.erase(mPollFunctions.begin(), firstPendingIt);
+    // Assuming that polling functions are added to the queue in the roughly the order
+    // they are used, encountering an unsignaled/pending polling functions likely means
+    // that all polling functions after are also still pending. This might not necessarily
+    // always be the case but it is a simple heuristic to try to minimize the amount of
+    // work performed here as it is expected that this function will be called while
+    // processing other guest vulkan functions.
+    auto firstPendingIt = std::find_if(mPollFunctions.begin(), mPollFunctions.end(),
+                                       [](const OpPollingFunction& pollingFunc) {
+                                           DeviceOpStatus status = pollingFunc();
+                                           return status == DeviceOpStatus::kPending;
+                                       });
+    mPollFunctions.erase(mPollFunctions.begin(), firstPendingIt);
 
-        if (mPollFunctions.size() > kSizeLoggingThreshold) {
-            WARN("VkDevice:%p has %d pending waitables.", mDevice, mPollFunctions.size());
-        }
+    if (mPollFunctions.size() > kSizeLoggingThreshold) {
+        WARN("VkDevice:%p has %d pending waitables.", mDevice, mPollFunctions.size());
     }
+}
+
+void DeviceOpTracker::PollAndProcessGarbage() {
+    Poll();
 
     const auto now = std::chrono::system_clock::now();
     const auto old = now - kTimeThreshold;
diff --git a/host/vulkan/DeviceOpTracker.h b/host/vulkan/DeviceOpTracker.h
index 3d1a032..acd7782 100644
--- a/host/vulkan/DeviceOpTracker.h
+++ b/host/vulkan/DeviceOpTracker.h
@@ -29,6 +29,9 @@
 namespace gfxstream {
 namespace vk {
 
+class DeviceOpTracker;
+using DeviceOpTrackerPtr = std::shared_ptr<DeviceOpTracker>;
+
 using DeviceOpWaitable = std::shared_future<void>;
 
 inline bool IsDone(const DeviceOpWaitable& waitable) {
@@ -56,8 +59,11 @@
     // semaphore can be destroyed once the waitable has finished.
     void AddPendingGarbage(DeviceOpWaitable waitable, VkSemaphore semaphore);
 
-    // Checks for completion of previously submitted waitables and destroys dependent
-    // objects.
+    // Checks for completion of previously submitted waitables and sets their state accordingly .
+    // This function is thread-safe
+    void Poll();
+
+    // Calls Poll(), and also destroys dependent objects accordingly
     void PollAndProcessGarbage();
 
     void OnDestroyDevice();
diff --git a/host/vulkan/VkCommonOperations.cpp b/host/vulkan/VkCommonOperations.cpp
index e8c8b90..612aaa1 100644
--- a/host/vulkan/VkCommonOperations.cpp
+++ b/host/vulkan/VkCommonOperations.cpp
@@ -3391,6 +3391,36 @@
     return infoPtr->currentLayout;
 }
 
+void setColorBufferLatestUse(uint32_t colorBufferHandle, DeviceOpWaitable waitable,
+                             DeviceOpTrackerPtr tracker) {
+    AutoLock lock(sVkEmulationLock);
+    auto infoPtr = android::base::find(sVkEmulation->colorBuffers, colorBufferHandle);
+    if (!infoPtr) {
+        VK_COMMON_ERROR("Invalid ColorBuffer handle %d.", static_cast<int>(colorBufferHandle));
+        return;
+    }
+
+    infoPtr->latestUse = waitable;
+    infoPtr->latestUseTracker = tracker;
+}
+
+int waitSyncVkColorBuffer(uint32_t colorBufferHandle) {
+    AutoLock lock(sVkEmulationLock);
+    auto infoPtr = android::base::find(sVkEmulation->colorBuffers, colorBufferHandle);
+    if (!infoPtr) {
+        VK_COMMON_ERROR("Invalid ColorBuffer handle %d.", static_cast<int>(colorBufferHandle));
+        return -1;
+    }
+
+    if (infoPtr->latestUse && infoPtr->latestUseTracker) {
+        while (!IsDone(*infoPtr->latestUse)) {
+            infoPtr->latestUseTracker->Poll();
+        }
+    }
+
+    return 0;
+}
+
 // Allocate a ready to use VkCommandBuffer for queue transfer. The caller needs
 // to signal the returned VkFence when the VkCommandBuffer completes.
 static std::tuple<VkCommandBuffer, VkFence> allocateQueueTransferCommandBuffer_locked() {
diff --git a/host/vulkan/VkCommonOperations.h b/host/vulkan/VkCommonOperations.h
index 36d2f7f..25e228c 100644
--- a/host/vulkan/VkCommonOperations.h
+++ b/host/vulkan/VkCommonOperations.h
@@ -26,6 +26,7 @@
 #include "BorrowedImageVk.h"
 #include "CompositorVk.h"
 #include "DebugUtilsHelper.h"
+#include "DeviceOpTracker.h"
 #include "DisplayVk.h"
 #include "FrameworkFormats.h"
 #include "aemu/base/ManagedDescriptor.hpp"
@@ -334,6 +335,9 @@
         VulkanMode vulkanMode = VulkanMode::Default;
 
         MTLTextureRef mtlTexture = nullptr;
+
+        std::optional<DeviceOpWaitable> latestUse;
+        DeviceOpTrackerPtr latestUseTracker = nullptr;
     };
 
     struct BufferInfo {
@@ -541,6 +545,11 @@
 
 VkImageLayout getColorBufferCurrentLayout(uint32_t colorBufferHandle);
 
+void setColorBufferLatestUse(uint32_t colorBufferHandle, DeviceOpWaitable waitable,
+                             DeviceOpTrackerPtr tracker);
+
+int waitSyncVkColorBuffer(uint32_t colorBufferHandle);
+
 void releaseColorBufferForGuestUse(uint32_t colorBufferHandle);
 
 std::unique_ptr<BorrowedImageInfoVk> borrowColorBufferForComposition(uint32_t colorBufferHandle,
diff --git a/host/vulkan/VkDecoderGlobalState.cpp b/host/vulkan/VkDecoderGlobalState.cpp
index 856d62d..20125f1 100644
--- a/host/vulkan/VkDecoderGlobalState.cpp
+++ b/host/vulkan/VkDecoderGlobalState.cpp
@@ -1807,7 +1807,7 @@
         deviceInfo.externalFencePool =
             std::make_unique<ExternalFencePool<VulkanDispatch>>(dispatch, *pDevice);
 
-        deviceInfo.deviceOpTracker.emplace(*pDevice, dispatch);
+        deviceInfo.deviceOpTracker = std::make_shared<DeviceOpTracker>(*pDevice, dispatch);
 
         if (mLogging) {
             fprintf(stderr, "%s: init vulkan dispatch from device (end)\n", __func__);
@@ -3977,6 +3977,57 @@
         convertQueueFamilyForeignToExternal(&barrier->dstQueueFamilyIndex);
     }
 
+    inline VkImage getIMBImage(const VkImageMemoryBarrier& imb) { return imb.image; }
+    inline VkImage getIMBImage(const VkImageMemoryBarrier2& imb) { return imb.image; }
+
+    inline VkImageLayout getIMBNewLayout(const VkImageMemoryBarrier& imb) { return imb.newLayout; }
+    inline VkImageLayout getIMBNewLayout(const VkImageMemoryBarrier2& imb) { return imb.newLayout; }
+
+    inline uint32_t getIMBSrcQueueFamilyIndex(const VkImageMemoryBarrier& imb) {
+        return imb.srcQueueFamilyIndex;
+    }
+    inline uint32_t getIMBSrcQueueFamilyIndex(const VkImageMemoryBarrier2& imb) {
+        return imb.srcQueueFamilyIndex;
+    }
+    inline uint32_t getIMBDstQueueFamilyIndex(const VkImageMemoryBarrier& imb) {
+        return imb.dstQueueFamilyIndex;
+    }
+    inline uint32_t getIMBDstQueueFamilyIndex(const VkImageMemoryBarrier2& imb) {
+        return imb.dstQueueFamilyIndex;
+    }
+
+    template <typename VkImageMemoryBarrierType>
+    void processImageMemoryBarrier(VkCommandBuffer commandBuffer, uint32_t imageMemoryBarrierCount,
+                                   const VkImageMemoryBarrierType* pImageMemoryBarriers) {
+        std::lock_guard<std::recursive_mutex> lock(mLock);
+        CommandBufferInfo* cmdBufferInfo = android::base::find(mCmdBufferInfo, commandBuffer);
+        if (!cmdBufferInfo) return;
+
+        // TODO: update image layout in ImageInfo
+        for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
+            auto* imageInfo = android::base::find(mImageInfo, getIMBImage(pImageMemoryBarriers[i]));
+            if (!imageInfo) {
+                continue;
+            }
+            cmdBufferInfo->imageLayouts[getIMBImage(pImageMemoryBarriers[i])] =
+                getIMBNewLayout(pImageMemoryBarriers[i]);
+            if (!imageInfo->boundColorBuffer.has_value()) {
+                continue;
+            }
+            HandleType cb = imageInfo->boundColorBuffer.value();
+            if (getIMBSrcQueueFamilyIndex(pImageMemoryBarriers[i]) == VK_QUEUE_FAMILY_EXTERNAL) {
+                cmdBufferInfo->acquiredColorBuffers.insert(cb);
+            }
+            if (getIMBDstQueueFamilyIndex(pImageMemoryBarriers[i]) == VK_QUEUE_FAMILY_EXTERNAL) {
+                cmdBufferInfo->releasedColorBuffers.insert(cb);
+            }
+            cmdBufferInfo->cbLayouts[cb] = getIMBNewLayout(pImageMemoryBarriers[i]);
+            // Insert unconditionally to this list, regardless of whether or not
+            // there is a queue family ownership transfer
+            cmdBufferInfo->imageBarrierColorBuffers.insert(cb);
+        }
+    }
+
     void on_vkCmdPipelineBarrier(android::base::BumpPool* pool, VkCommandBuffer boxed_commandBuffer,
                                  VkPipelineStageFlags srcStageMask,
                                  VkPipelineStageFlags dstStageMask,
@@ -4013,26 +4064,7 @@
         DeviceInfo* deviceInfo = android::base::find(mDeviceInfo, cmdBufferInfo->device);
         if (!deviceInfo) return;
 
-        // TODO: update image layout in ImageInfo
-        for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-            const VkImageMemoryBarrier& barrier = pImageMemoryBarriers[i];
-            auto* imageInfo = android::base::find(mImageInfo, barrier.image);
-            if (!imageInfo) {
-                continue;
-            }
-            cmdBufferInfo->imageLayouts[barrier.image] = barrier.newLayout;
-            if (!imageInfo->boundColorBuffer.has_value()) {
-                continue;
-            }
-            HandleType cb = imageInfo->boundColorBuffer.value();
-            if (barrier.srcQueueFamilyIndex == VK_QUEUE_FAMILY_EXTERNAL) {
-                cmdBufferInfo->acquiredColorBuffers.insert(cb);
-            }
-            if (barrier.dstQueueFamilyIndex == VK_QUEUE_FAMILY_EXTERNAL) {
-                cmdBufferInfo->releasedColorBuffers.insert(cb);
-            }
-            cmdBufferInfo->cbLayouts[cb] = barrier.newLayout;
-        }
+        processImageMemoryBarrier(commandBuffer, imageMemoryBarrierCount, pImageMemoryBarriers);
 
         if (!deviceInfo->emulateTextureEtc2 && !deviceInfo->emulateTextureAstc) {
             vk->vkCmdPipelineBarrier(commandBuffer, srcStageMask, dstStageMask, dependencyFlags,
@@ -4091,6 +4123,29 @@
                                   const VkDependencyInfo* pDependencyInfo) {
         auto commandBuffer = unbox_VkCommandBuffer(boxed_commandBuffer);
         auto vk = dispatch_VkCommandBuffer(boxed_commandBuffer);
+
+        for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; ++i) {
+            convertQueueFamilyForeignToExternal_VkBufferMemoryBarrier(
+                ((VkBufferMemoryBarrier*)pDependencyInfo->pBufferMemoryBarriers) + i);
+        }
+
+        for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; ++i) {
+            convertQueueFamilyForeignToExternal_VkImageMemoryBarrier(
+                ((VkImageMemoryBarrier*)pDependencyInfo->pImageMemoryBarriers) + i);
+        }
+
+        std::lock_guard<std::recursive_mutex> lock(mLock);
+        CommandBufferInfo* cmdBufferInfo = android::base::find(mCmdBufferInfo, commandBuffer);
+        if (!cmdBufferInfo) return;
+
+        DeviceInfo* deviceInfo = android::base::find(mDeviceInfo, cmdBufferInfo->device);
+        if (!deviceInfo) return;
+
+        processImageMemoryBarrier(commandBuffer, pDependencyInfo->imageMemoryBarrierCount,
+                                  pDependencyInfo->pImageMemoryBarriers);
+
+        // TODO: If this is a decompressed image, handle decompression before calling
+        // VkCmdvkCmdPipelineBarrier2 i.e. match on_vkCmdPipelineBarrier implementation
         vk->vkCmdPipelineBarrier2(commandBuffer, pDependencyInfo);
     }
 
@@ -4551,7 +4606,7 @@
         }
 #endif
 
-        if (importCbInfoPtr && !mGuestUsesAngle) {
+        if (importCbInfoPtr) {
             memoryInfo.boundColorBuffer = importCbInfoPtr->colorBuffer;
         }
 
@@ -5285,6 +5340,27 @@
             queueCompletedWaitable = builder.OnQueueSubmittedWithFence(usedFence);
         }
 
+        {
+            std::lock_guard<std::recursive_mutex> lock(mLock);
+            std::unordered_set<HandleType> imageBarrierColorBuffers;
+            for (int i = 0; i < submitCount; i++) {
+                for (int j = 0; j < getCommandBufferCount(pSubmits[i]); j++) {
+                    VkCommandBuffer cmdBuffer = getCommandBuffer(pSubmits[i], j);
+                    CommandBufferInfo* cmdBufferInfo =
+                        android::base::find(mCmdBufferInfo, cmdBuffer);
+                    if (cmdBufferInfo) {
+                        imageBarrierColorBuffers.merge(cmdBufferInfo->imageBarrierColorBuffers);
+                    }
+                }
+            }
+            auto* deviceInfo = android::base::find(mDeviceInfo, device);
+            if (!deviceInfo) return VK_ERROR_INITIALIZATION_FAILED;
+            for (const auto& colorBuffer : imageBarrierColorBuffers) {
+                setColorBufferLatestUse(colorBuffer, queueCompletedWaitable,
+                                        deviceInfo->deviceOpTracker);
+            }
+        }
+
         AutoLock qlock(*ql);
         auto result = dispatchVkQueueSubmit(vk, queue, submitCount, pSubmits, usedFence);
 
@@ -7386,6 +7462,7 @@
         std::unordered_set<HandleType> releasedColorBuffers;
         std::unordered_map<HandleType, VkImageLayout> cbLayouts;
         std::unordered_map<VkImage, VkImageLayout> imageLayouts;
+        std::unordered_set<HandleType> imageBarrierColorBuffers;
 
         void reset() {
             preprocessFuncs.clear();
diff --git a/host/vulkan/VkDecoderInternalStructs.h b/host/vulkan/VkDecoderInternalStructs.h
index dc352de..1b51a5a 100644
--- a/host/vulkan/VkDecoderInternalStructs.h
+++ b/host/vulkan/VkDecoderInternalStructs.h
@@ -200,7 +200,7 @@
     std::unique_ptr<ExternalFencePool<VulkanDispatch>> externalFencePool = nullptr;
     std::set<VkFormat> imageFormats = {};  // image formats used on this device
     std::unique_ptr<GpuDecompressionPipelineManager> decompPipelines = nullptr;
-    std::optional<DeviceOpTracker> deviceOpTracker;
+    DeviceOpTrackerPtr deviceOpTracker = nullptr;
 
     // True if this is a compressed image that needs to be decompressed on the GPU (with our
     // compute shader)