| /* |
| * Copyright 2022 Google LLC |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "vn_feedback.h" |
| |
| #include "vn_command_buffer.h" |
| #include "vn_device.h" |
| #include "vn_physical_device.h" |
| #include "vn_query_pool.h" |
| #include "vn_queue.h" |
| |
| static uint32_t |
| vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props, |
| uint32_t mem_type_bits, |
| VkMemoryPropertyFlags required_mem_flags) |
| { |
| u_foreach_bit(mem_type_index, mem_type_bits) |
| { |
| assert(mem_type_index < mem_props->memoryTypeCount); |
| if ((mem_props->memoryTypes[mem_type_index].propertyFlags & |
| required_mem_flags) == required_mem_flags) |
| return mem_type_index; |
| } |
| |
| return UINT32_MAX; |
| } |
| |
| VkResult |
| vn_feedback_buffer_create(struct vn_device *dev, |
| uint32_t size, |
| const VkAllocationCallbacks *alloc, |
| struct vn_feedback_buffer **out_fb_buf) |
| { |
| const bool exclusive = dev->queue_family_count == 1; |
| const VkPhysicalDeviceMemoryProperties *mem_props = |
| &dev->physical_device->memory_properties; |
| VkDevice dev_handle = vn_device_to_handle(dev); |
| VkResult result; |
| |
| struct vn_feedback_buffer *fb_buf = |
| vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!fb_buf) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| /* use concurrent to avoid explicit queue family ownership transfer for |
| * device created with queues from multiple queue families |
| */ |
| const VkBufferCreateInfo buf_create_info = { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| .size = size, |
| /* Feedback for fences and timeline semaphores will write to this buffer |
| * as a DST when signalling. Timeline semaphore feedback will also read |
| * from this buffer as a SRC to retrieve the counter value to signal. |
| */ |
| .usage = |
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, |
| .sharingMode = |
| exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT, |
| /* below favors the current venus protocol */ |
| .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count, |
| .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families, |
| }; |
| result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc, |
| &fb_buf->buf_handle); |
| if (result != VK_SUCCESS) |
| goto out_free_feedback_buffer; |
| |
| struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle); |
| const VkMemoryRequirements *mem_req = |
| &buf->requirements.memory.memoryRequirements; |
| const uint32_t mem_type_index = |
| vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits, |
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); |
| if (mem_type_index >= mem_props->memoryTypeCount) { |
| result = VK_ERROR_INITIALIZATION_FAILED; |
| goto out_destroy_buffer; |
| } |
| |
| const VkMemoryAllocateInfo mem_alloc_info = { |
| .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| .allocationSize = mem_req->size, |
| .memoryTypeIndex = mem_type_index, |
| }; |
| result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc, |
| &fb_buf->mem_handle); |
| if (result != VK_SUCCESS) |
| goto out_destroy_buffer; |
| |
| const VkBindBufferMemoryInfo bind_info = { |
| .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, |
| .buffer = fb_buf->buf_handle, |
| .memory = fb_buf->mem_handle, |
| .memoryOffset = 0, |
| }; |
| result = vn_BindBufferMemory2(dev_handle, 1, &bind_info); |
| if (result != VK_SUCCESS) |
| goto out_free_memory; |
| |
| result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0, |
| &fb_buf->data); |
| if (result != VK_SUCCESS) |
| goto out_free_memory; |
| |
| *out_fb_buf = fb_buf; |
| |
| return VK_SUCCESS; |
| |
| out_free_memory: |
| vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc); |
| |
| out_destroy_buffer: |
| vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc); |
| |
| out_free_feedback_buffer: |
| vk_free(alloc, fb_buf); |
| |
| return result; |
| } |
| |
| void |
| vn_feedback_buffer_destroy(struct vn_device *dev, |
| struct vn_feedback_buffer *fb_buf, |
| const VkAllocationCallbacks *alloc) |
| { |
| VkDevice dev_handle = vn_device_to_handle(dev); |
| |
| vn_UnmapMemory(dev_handle, fb_buf->mem_handle); |
| vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc); |
| vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc); |
| vk_free(alloc, fb_buf); |
| } |
| |
| static inline uint32_t |
| vn_get_feedback_buffer_alignment(struct vn_device *dev, |
| struct vn_feedback_buffer *fb_buf) |
| { |
| struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle); |
| return align(buf->requirements.memory.memoryRequirements.alignment, |
| dev->physical_device->wa_min_fb_align); |
| } |
| |
| static VkResult |
| vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool) |
| { |
| VN_TRACE_FUNC(); |
| struct vn_feedback_buffer *fb_buf = NULL; |
| VkResult result; |
| |
| result = |
| vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| pool->used = 0; |
| pool->alignment = vn_get_feedback_buffer_alignment(pool->dev, fb_buf); |
| |
| list_add(&fb_buf->head, &pool->fb_bufs); |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| vn_feedback_pool_init(struct vn_device *dev, |
| struct vn_feedback_pool *pool, |
| uint32_t size, |
| const VkAllocationCallbacks *alloc) |
| { |
| simple_mtx_init(&pool->mutex, mtx_plain); |
| |
| pool->dev = dev; |
| pool->alloc = alloc; |
| pool->size = size; |
| pool->used = size; |
| pool->alignment = 1; |
| list_inithead(&pool->fb_bufs); |
| list_inithead(&pool->free_slots); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| vn_feedback_pool_fini(struct vn_feedback_pool *pool) |
| { |
| list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots, |
| head) |
| vk_free(pool->alloc, slot); |
| |
| list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs, |
| head) |
| vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc); |
| |
| simple_mtx_destroy(&pool->mutex); |
| } |
| |
| static struct vn_feedback_buffer * |
| vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool, |
| uint32_t size, |
| uint32_t *out_offset) |
| { |
| /* Default values of pool->used and pool->alignment are used to trigger the |
| * initial pool grow, and will be properly initialized after that. |
| */ |
| if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) { |
| VkResult result = vn_feedback_pool_grow_locked(pool); |
| if (result != VK_SUCCESS) |
| return NULL; |
| |
| assert(align(size, pool->alignment) <= pool->size - pool->used); |
| } |
| |
| *out_offset = pool->used; |
| pool->used += align(size, pool->alignment); |
| |
| return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head); |
| } |
| |
| struct vn_feedback_slot * |
| vn_feedback_pool_alloc(struct vn_feedback_pool *pool, |
| enum vn_feedback_type type) |
| { |
| static const uint32_t slot_size = 8; |
| struct vn_feedback_buffer *fb_buf; |
| uint32_t offset; |
| struct vn_feedback_slot *slot; |
| |
| simple_mtx_lock(&pool->mutex); |
| if (!list_is_empty(&pool->free_slots)) { |
| slot = |
| list_first_entry(&pool->free_slots, struct vn_feedback_slot, head); |
| list_del(&slot->head); |
| simple_mtx_unlock(&pool->mutex); |
| |
| slot->type = type; |
| return slot; |
| } |
| |
| slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!slot) { |
| simple_mtx_unlock(&pool->mutex); |
| return NULL; |
| } |
| |
| fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset); |
| simple_mtx_unlock(&pool->mutex); |
| |
| if (!fb_buf) { |
| vk_free(pool->alloc, slot); |
| return NULL; |
| } |
| |
| slot->type = type; |
| slot->offset = offset; |
| slot->buf_handle = fb_buf->buf_handle; |
| slot->data = fb_buf->data + offset; |
| |
| return slot; |
| } |
| |
| void |
| vn_feedback_pool_free(struct vn_feedback_pool *pool, |
| struct vn_feedback_slot *slot) |
| { |
| simple_mtx_lock(&pool->mutex); |
| list_add(&slot->head, &pool->free_slots); |
| simple_mtx_unlock(&pool->mutex); |
| } |
| |
| static inline bool |
| mask_is_32bit(uint64_t x) |
| { |
| return (x & 0xffffffff00000000) == 0; |
| } |
| |
| static void |
| vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info, |
| VkBufferMemoryBarrier *barrier1, |
| VkPipelineStageFlags *src_stage_mask, |
| VkPipelineStageFlags *dst_stage_mask) |
| { |
| |
| assert(dep_info->pNext == NULL); |
| assert(dep_info->memoryBarrierCount == 0); |
| assert(dep_info->bufferMemoryBarrierCount == 1); |
| assert(dep_info->imageMemoryBarrierCount == 0); |
| |
| const VkBufferMemoryBarrier2 *barrier2 = |
| &dep_info->pBufferMemoryBarriers[0]; |
| assert(barrier2->pNext == NULL); |
| assert(mask_is_32bit(barrier2->srcStageMask)); |
| assert(mask_is_32bit(barrier2->srcAccessMask)); |
| assert(mask_is_32bit(barrier2->dstStageMask)); |
| assert(mask_is_32bit(barrier2->dstAccessMask)); |
| |
| *barrier1 = (VkBufferMemoryBarrier){ |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| .pNext = NULL, |
| .srcAccessMask = barrier2->srcAccessMask, |
| .dstAccessMask = barrier2->dstAccessMask, |
| .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex, |
| .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex, |
| .buffer = barrier2->buffer, |
| .offset = barrier2->offset, |
| .size = barrier2->size, |
| }; |
| |
| *src_stage_mask = barrier2->srcStageMask; |
| *dst_stage_mask = barrier2->dstStageMask; |
| } |
| |
| static void |
| vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle, |
| const VkDependencyInfo *dep_info, |
| bool sync2) |
| { |
| if (sync2) |
| vn_CmdPipelineBarrier2(cmd_handle, dep_info); |
| else { |
| VkBufferMemoryBarrier barrier1; |
| VkPipelineStageFlags src_stage_mask; |
| VkPipelineStageFlags dst_stage_mask; |
| |
| vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask, |
| &dst_stage_mask); |
| vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask, |
| dep_info->dependencyFlags, 0, NULL, 1, &barrier1, |
| 0, NULL); |
| } |
| } |
| |
| void |
| vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle, |
| VkEvent ev_handle, |
| VkPipelineStageFlags2 src_stage_mask, |
| VkResult status, |
| bool sync2) |
| { |
| /* For vkCmdSetEvent and vkCmdResetEvent feedback interception. |
| * |
| * The injection point is after the event call to avoid introducing |
| * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and |
| * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by |
| * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the |
| * feedback signal is acceptable for the nature of VkEvent, and the event |
| * feedback cmds lifecycle is guarded by the intercepted command buffer. |
| */ |
| struct vn_event *ev = vn_event_from_handle(ev_handle); |
| struct vn_feedback_slot *slot = ev->feedback_slot; |
| |
| if (!slot) |
| return; |
| |
| STATIC_ASSERT(sizeof(*slot->status) == 4); |
| |
| const VkDependencyInfo dep_before = { |
| .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, |
| .dependencyFlags = 0, |
| .bufferMemoryBarrierCount = 1, |
| .pBufferMemoryBarriers = |
| (VkBufferMemoryBarrier2[]){ |
| { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, |
| .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT | |
| VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .srcAccessMask = |
| VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, |
| .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .buffer = slot->buf_handle, |
| .offset = slot->offset, |
| .size = 4, |
| }, |
| }, |
| }; |
| vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2); |
| |
| vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status); |
| |
| const VkDependencyInfo dep_after = { |
| .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, |
| .dependencyFlags = 0, |
| .bufferMemoryBarrierCount = 1, |
| .pBufferMemoryBarriers = |
| (VkBufferMemoryBarrier2[]){ |
| { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, |
| .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT, |
| .dstAccessMask = |
| VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT, |
| .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .buffer = slot->buf_handle, |
| .offset = slot->offset, |
| .size = 4, |
| }, |
| }, |
| }; |
| vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2); |
| } |
| |
| static inline void |
| vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| VkDeviceSize size) |
| { |
| const VkBufferMemoryBarrier buf_flush_barrier = { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| .pNext = NULL, |
| .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT, |
| .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .buffer = buffer, |
| .offset = offset, |
| .size = size, |
| }; |
| vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, |
| &buf_flush_barrier, 0, NULL); |
| } |
| |
| static VkResult |
| vn_feedback_cmd_record(VkCommandBuffer cmd_handle, |
| struct vn_feedback_slot *dst_slot, |
| struct vn_feedback_slot *src_slot) |
| { |
| STATIC_ASSERT(sizeof(*dst_slot->status) == 4); |
| STATIC_ASSERT(sizeof(*dst_slot->counter) == 8); |
| STATIC_ASSERT(sizeof(*src_slot->counter) == 8); |
| |
| /* slot size is 8 bytes for timeline semaphore and 4 bytes fence. |
| * src slot is non-null for timeline semaphore. |
| */ |
| const VkDeviceSize buf_size = src_slot ? 8 : 4; |
| |
| static const VkCommandBufferBeginInfo begin_info = { |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, |
| .pNext = NULL, |
| .flags = 0, |
| .pInheritanceInfo = NULL, |
| }; |
| VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| static const VkMemoryBarrier mem_barrier_before = { |
| .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| .pNext = NULL, |
| /* make pending writes available to stay close to signal op */ |
| .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| /* no need to make all memory visible for feedback update */ |
| .dstAccessMask = 0, |
| }; |
| |
| const VkBufferMemoryBarrier buf_barrier_before = { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| .pNext = NULL, |
| /* slot memory has been made available via mem_barrier_before */ |
| .srcAccessMask = 0, |
| .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .buffer = dst_slot->buf_handle, |
| .offset = dst_slot->offset, |
| .size = buf_size, |
| }; |
| |
| /* host writes for src_slots should implicitly be made visible upon |
| * QueueSubmit call */ |
| vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, |
| &mem_barrier_before, 1, &buf_barrier_before, 0, |
| NULL); |
| |
| /* If passed a src_slot, timeline semaphore feedback records a |
| * cmd to copy the counter value from the src slot to the dst slot. |
| * If src_slot is NULL, then fence feedback records a cmd to fill |
| * the dst slot with VK_SUCCESS. |
| */ |
| if (src_slot) { |
| assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE); |
| assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE); |
| |
| const VkBufferCopy buffer_copy = { |
| .srcOffset = src_slot->offset, |
| .dstOffset = dst_slot->offset, |
| .size = buf_size, |
| }; |
| vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle, |
| 1, &buffer_copy); |
| } else { |
| assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE); |
| |
| vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset, |
| buf_size, VK_SUCCESS); |
| } |
| |
| vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle, |
| dst_slot->offset, buf_size); |
| |
| return vn_EndCommandBuffer(cmd_handle); |
| } |
| |
| struct vn_semaphore_feedback_cmd * |
| vn_semaphore_feedback_cmd_alloc(struct vn_device *dev, |
| struct vn_feedback_slot *dst_slot) |
| { |
| const VkAllocationCallbacks *alloc = &dev->base.base.alloc; |
| struct vn_semaphore_feedback_cmd *sfb_cmd; |
| VkCommandBuffer *cmd_handles; |
| |
| VK_MULTIALLOC(ma); |
| vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1); |
| vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles), |
| dev->queue_family_count); |
| if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) |
| return NULL; |
| |
| struct vn_feedback_slot *src_slot = |
| vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE); |
| if (!src_slot) { |
| vk_free(alloc, sfb_cmd); |
| return NULL; |
| } |
| |
| for (uint32_t i = 0; i < dev->queue_family_count; i++) { |
| VkDevice dev_handle = vn_device_to_handle(dev); |
| VkResult result = |
| vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot, |
| src_slot, &cmd_handles[i]); |
| if (result != VK_SUCCESS) { |
| for (uint32_t j = 0; j < i; j++) { |
| vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j], |
| cmd_handles[j]); |
| } |
| |
| vn_feedback_pool_free(&dev->feedback_pool, src_slot); |
| vk_free(alloc, sfb_cmd); |
| return NULL; |
| } |
| } |
| |
| sfb_cmd->cmd_handles = cmd_handles; |
| sfb_cmd->src_slot = src_slot; |
| return sfb_cmd; |
| } |
| |
| void |
| vn_semaphore_feedback_cmd_free(struct vn_device *dev, |
| struct vn_semaphore_feedback_cmd *sfb_cmd) |
| { |
| const VkAllocationCallbacks *alloc = &dev->base.base.alloc; |
| |
| for (uint32_t i = 0; i < dev->queue_family_count; i++) { |
| vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i], |
| sfb_cmd->cmd_handles[i]); |
| } |
| |
| vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot); |
| vk_free(alloc, sfb_cmd); |
| } |
| |
| static void |
| vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle, |
| VkQueryPool pool_handle, |
| uint32_t query, |
| uint32_t count, |
| bool copy) |
| { |
| struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle); |
| assert(pool->fb_buf); |
| |
| /* Results are always 64 bit and include availability bit (also 64 bit) */ |
| const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8; |
| const VkDeviceSize offset = slot_size * query; |
| const VkDeviceSize buf_size = slot_size * count; |
| |
| /* The first synchronization scope of vkCmdCopyQueryPoolResults does not |
| * include the query feedback buffer. Insert a barrier to ensure ordering |
| * against feedback buffer fill cmd injected in vkCmdResetQueryPool. |
| * |
| * The second synchronization scope of vkCmdResetQueryPool does not include |
| * the query feedback buffer. Insert a barrer to ensure ordering against |
| * prior cmds referencing the queries. |
| * |
| * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the |
| * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is |
| * done implicitly via queue submission. |
| */ |
| const VkPipelineStageFlags src_stage_mask = |
| copy ? VK_PIPELINE_STAGE_TRANSFER_BIT |
| : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; |
| |
| const VkBufferMemoryBarrier buf_barrier_before = { |
| .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| .pNext = NULL, |
| .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| .buffer = pool->fb_buf->buf_handle, |
| .offset = offset, |
| .size = buf_size, |
| }; |
| vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, |
| VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, |
| &buf_barrier_before, 0, NULL); |
| |
| if (copy) { |
| /* Per spec: "The first synchronization scope includes all commands |
| * which reference the queries in queryPool indicated by query that |
| * occur earlier in submission order. If flags does not include |
| * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT, |
| * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are |
| * excluded from this scope." |
| * |
| * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after |
| * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available. |
| * |
| * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app |
| * requested that. |
| * |
| * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer |
| * operation, and its writes to buffer memory must be synchronized using |
| * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT |
| * before using the results." |
| * |
| * So we can reuse the flush barrier after this copy cmd. |
| */ |
| vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count, |
| pool->fb_buf->buf_handle, offset, slot_size, |
| VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | |
| VK_QUERY_RESULT_64_BIT | |
| VK_QUERY_RESULT_WAIT_BIT); |
| } else { |
| vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size, |
| 0); |
| } |
| |
| vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle, |
| offset, buf_size); |
| } |
| |
| static VkResult |
| vn_query_feedback_cmd_record(VkDevice dev_handle, |
| struct list_head *query_records, |
| struct vn_query_feedback_cmd *qfb_cmd) |
| { |
| assert(!list_is_empty(query_records)); |
| |
| static const VkCommandBufferBeginInfo begin_info = { |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, |
| }; |
| VkResult result = vn_BeginCommandBuffer(qfb_cmd->cmd_handle, &begin_info); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| list_for_each_entry_safe(struct vn_cmd_query_record, record, query_records, |
| head) { |
| vn_query_feedback_cmd_record_internal( |
| qfb_cmd->cmd_handle, vn_query_pool_to_handle(record->query_pool), |
| record->query, record->query_count, record->copy); |
| } |
| |
| return vn_EndCommandBuffer(qfb_cmd->cmd_handle); |
| } |
| |
| VkResult |
| vn_query_feedback_cmd_alloc(VkDevice dev_handle, |
| struct vn_feedback_cmd_pool *fb_cmd_pool, |
| struct list_head *query_records, |
| struct vn_query_feedback_cmd **out_qfb_cmd) |
| { |
| struct vn_query_feedback_cmd *qfb_cmd; |
| VkResult result; |
| |
| simple_mtx_lock(&fb_cmd_pool->mutex); |
| |
| if (list_is_empty(&fb_cmd_pool->free_qfb_cmds)) { |
| struct vn_command_pool *cmd_pool = |
| vn_command_pool_from_handle(fb_cmd_pool->pool_handle); |
| |
| qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd), |
| VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!qfb_cmd) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| goto out_unlock; |
| } |
| |
| const VkCommandBufferAllocateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, |
| .commandPool = fb_cmd_pool->pool_handle, |
| .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, |
| .commandBufferCount = 1, |
| }; |
| VkCommandBuffer qfb_cmd_handle; |
| result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_pool->allocator, qfb_cmd); |
| goto out_unlock; |
| } |
| |
| qfb_cmd->fb_cmd_pool = fb_cmd_pool; |
| qfb_cmd->cmd_handle = qfb_cmd_handle; |
| } else { |
| qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds, |
| struct vn_query_feedback_cmd, head); |
| list_del(&qfb_cmd->head); |
| vn_ResetCommandBuffer(qfb_cmd->cmd_handle, 0); |
| } |
| |
| result = vn_query_feedback_cmd_record(dev_handle, query_records, qfb_cmd); |
| if (result != VK_SUCCESS) { |
| list_add(&qfb_cmd->head, &fb_cmd_pool->free_qfb_cmds); |
| goto out_unlock; |
| } |
| |
| *out_qfb_cmd = qfb_cmd; |
| |
| out_unlock: |
| simple_mtx_unlock(&fb_cmd_pool->mutex); |
| |
| return result; |
| } |
| |
| void |
| vn_query_feedback_cmd_free(struct vn_query_feedback_cmd *qfb_cmd) |
| { |
| simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex); |
| list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds); |
| simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex); |
| } |
| |
| VkResult |
| vn_feedback_cmd_alloc(VkDevice dev_handle, |
| struct vn_feedback_cmd_pool *fb_cmd_pool, |
| struct vn_feedback_slot *dst_slot, |
| struct vn_feedback_slot *src_slot, |
| VkCommandBuffer *out_cmd_handle) |
| { |
| VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle; |
| const VkCommandBufferAllocateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, |
| .pNext = NULL, |
| .commandPool = cmd_pool_handle, |
| .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, |
| .commandBufferCount = 1, |
| }; |
| VkCommandBuffer cmd_handle; |
| VkResult result; |
| |
| simple_mtx_lock(&fb_cmd_pool->mutex); |
| result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle); |
| if (result != VK_SUCCESS) |
| goto out_unlock; |
| |
| result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot); |
| if (result != VK_SUCCESS) { |
| vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle); |
| goto out_unlock; |
| } |
| |
| *out_cmd_handle = cmd_handle; |
| |
| out_unlock: |
| simple_mtx_unlock(&fb_cmd_pool->mutex); |
| |
| return result; |
| } |
| |
| void |
| vn_feedback_cmd_free(VkDevice dev_handle, |
| struct vn_feedback_cmd_pool *fb_cmd_pool, |
| VkCommandBuffer cmd_handle) |
| { |
| simple_mtx_lock(&fb_cmd_pool->mutex); |
| vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1, |
| &cmd_handle); |
| simple_mtx_unlock(&fb_cmd_pool->mutex); |
| } |
| |
| VkResult |
| vn_feedback_cmd_pools_init(struct vn_device *dev) |
| { |
| const VkAllocationCallbacks *alloc = &dev->base.base.alloc; |
| VkDevice dev_handle = vn_device_to_handle(dev); |
| struct vn_feedback_cmd_pool *fb_cmd_pools; |
| VkCommandPoolCreateInfo info = { |
| .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, |
| .pNext = NULL, |
| .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, |
| }; |
| |
| if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) && |
| VN_PERF(NO_QUERY_FEEDBACK)) |
| return VK_SUCCESS; |
| |
| assert(dev->queue_family_count); |
| |
| fb_cmd_pools = |
| vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count, |
| VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!fb_cmd_pools) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| for (uint32_t i = 0; i < dev->queue_family_count; i++) { |
| VkResult result; |
| |
| info.queueFamilyIndex = dev->queue_families[i]; |
| result = vn_CreateCommandPool(dev_handle, &info, alloc, |
| &fb_cmd_pools[i].pool_handle); |
| if (result != VK_SUCCESS) { |
| for (uint32_t j = 0; j < i; j++) { |
| vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle, |
| alloc); |
| simple_mtx_destroy(&fb_cmd_pools[j].mutex); |
| } |
| |
| vk_free(alloc, fb_cmd_pools); |
| return result; |
| } |
| |
| simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain); |
| list_inithead(&fb_cmd_pools[i].free_qfb_cmds); |
| } |
| |
| dev->fb_cmd_pools = fb_cmd_pools; |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| vn_feedback_cmd_pools_fini(struct vn_device *dev) |
| { |
| const VkAllocationCallbacks *alloc = &dev->base.base.alloc; |
| VkDevice dev_handle = vn_device_to_handle(dev); |
| |
| if (!dev->fb_cmd_pools) |
| return; |
| |
| for (uint32_t i = 0; i < dev->queue_family_count; i++) { |
| list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd, |
| &dev->fb_cmd_pools[i].free_qfb_cmds, head) |
| vk_free(alloc, feedback_cmd); |
| |
| vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle, |
| alloc); |
| simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex); |
| } |
| |
| vk_free(alloc, dev->fb_cmd_pools); |
| } |