| /* |
| * Copyright 2019 Google LLC |
| * SPDX-License-Identifier: MIT |
| * |
| * based in part on anv and radv which are: |
| * Copyright © 2015 Intel Corporation |
| * Copyright © 2016 Red Hat. |
| * Copyright © 2016 Bas Nieuwenhuizen |
| */ |
| |
| #include "vn_query_pool.h" |
| |
| #include "venus-protocol/vn_protocol_driver_query_pool.h" |
| |
| #include "vn_device.h" |
| #include "vn_feedback.h" |
| #include "vn_physical_device.h" |
| |
| /* query pool commands */ |
| |
| VkResult |
| vn_CreateQueryPool(VkDevice device, |
| const VkQueryPoolCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkQueryPool *pQueryPool) |
| { |
| struct vn_device *dev = vn_device_from_handle(device); |
| const VkAllocationCallbacks *alloc = |
| pAllocator ? pAllocator : &dev->base.base.alloc; |
| |
| struct vn_query_pool *pool = |
| vk_zalloc(alloc, sizeof(*pool), VN_DEFAULT_ALIGN, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!pool) |
| return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| vn_object_base_init(&pool->base, VK_OBJECT_TYPE_QUERY_POOL, &dev->base); |
| |
| pool->allocator = *alloc; |
| pool->query_count = pCreateInfo->queryCount; |
| |
| simple_mtx_init(&pool->mutex, mtx_plain); |
| |
| switch (pCreateInfo->queryType) { |
| case VK_QUERY_TYPE_OCCLUSION: |
| /* |
| * Occlusion queries write one integer value - the number of samples |
| * passed. |
| */ |
| pool->result_array_size = 1; |
| break; |
| case VK_QUERY_TYPE_PIPELINE_STATISTICS: |
| /* |
| * Pipeline statistics queries write one integer value for each bit that |
| * is enabled in the pipelineStatistics when the pool is created, and |
| * the statistics values are written in bit order starting from the |
| * least significant bit. |
| */ |
| pool->result_array_size = |
| util_bitcount(pCreateInfo->pipelineStatistics); |
| break; |
| case VK_QUERY_TYPE_TIMESTAMP: |
| /* Timestamp queries write one integer value. */ |
| pool->result_array_size = 1; |
| break; |
| case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: |
| /* |
| * Transform feedback queries write two integers; the first integer is |
| * the number of primitives successfully written to the corresponding |
| * transform feedback buffer and the second is the number of primitives |
| * output to the vertex stream, regardless of whether they were |
| * successfully captured or not. |
| */ |
| pool->result_array_size = 2; |
| break; |
| case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: |
| /* |
| * Primitives generated queries write one integer value; the number of |
| * primitives output to the vertex stream, regardless of whether |
| * transform feedback is active or not, or whether they were |
| * successfully captured by transform feedback or not. This is identical |
| * to the second integer of the transform feedback queries if transform |
| * feedback is active. |
| */ |
| pool->result_array_size = 1; |
| break; |
| default: |
| unreachable("bad query type"); |
| break; |
| } |
| |
| /* Venus has to handle overflow behavior with query feedback to keep |
| * consistency between vkCmdCopyQueryPoolResults and vkGetQueryPoolResults. |
| * The default query feedback behavior is to wrap on overflow. However, per |
| * spec: |
| * |
| * If an unsigned integer query’s value overflows the result type, the |
| * value may either wrap or saturate. |
| * |
| * We detect the renderer side implementation to align with the |
| * implementation specific behavior. |
| */ |
| switch (dev->physical_device->renderer_driver_id) { |
| case VK_DRIVER_ID_ARM_PROPRIETARY: |
| case VK_DRIVER_ID_MESA_LLVMPIPE: |
| case VK_DRIVER_ID_MESA_TURNIP: |
| pool->saturate_on_overflow = true; |
| break; |
| default: |
| break; |
| }; |
| |
| VkQueryPool pool_handle = vn_query_pool_to_handle(pool); |
| vn_async_vkCreateQueryPool(dev->primary_ring, device, pCreateInfo, NULL, |
| &pool_handle); |
| |
| *pQueryPool = pool_handle; |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| vn_DestroyQueryPool(VkDevice device, |
| VkQueryPool queryPool, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| struct vn_device *dev = vn_device_from_handle(device); |
| struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool); |
| const VkAllocationCallbacks *alloc; |
| |
| if (!pool) |
| return; |
| |
| alloc = pAllocator ? pAllocator : &pool->allocator; |
| |
| if (pool->fb_buf) |
| vn_feedback_buffer_destroy(dev, pool->fb_buf, alloc); |
| |
| simple_mtx_destroy(&pool->mutex); |
| |
| vn_async_vkDestroyQueryPool(dev->primary_ring, device, queryPool, NULL); |
| |
| vn_object_base_fini(&pool->base); |
| vk_free(alloc, pool); |
| } |
| |
| void |
| vn_ResetQueryPool(VkDevice device, |
| VkQueryPool queryPool, |
| uint32_t firstQuery, |
| uint32_t queryCount) |
| { |
| struct vn_device *dev = vn_device_from_handle(device); |
| struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool); |
| |
| vn_async_vkResetQueryPool(dev->primary_ring, device, queryPool, firstQuery, |
| queryCount); |
| if (pool->fb_buf) { |
| /* Feedback results are always 64 bit and include availability bit |
| * (also 64 bit) |
| */ |
| const uint32_t slot_size = (pool->result_array_size * 8) + 8; |
| const uint32_t offset = slot_size * firstQuery; |
| memset(pool->fb_buf->data + offset, 0, slot_size * queryCount); |
| } |
| } |
| |
| static VkResult |
| vn_get_query_pool_feedback(struct vn_query_pool *pool, |
| uint32_t firstQuery, |
| uint32_t queryCount, |
| void *pData, |
| VkDeviceSize stride, |
| VkQueryResultFlags flags) |
| { |
| VkResult result = VK_SUCCESS; |
| /* Feedback results are always 64 bit and include availability bit |
| * (also 64 bit) |
| */ |
| const uint32_t slot_array_size = pool->result_array_size + 1; |
| uint64_t *src = pool->fb_buf->data; |
| src += slot_array_size * firstQuery; |
| |
| uint32_t dst_index = 0; |
| uint32_t src_index = 0; |
| if (flags & VK_QUERY_RESULT_64_BIT) { |
| uint64_t *dst = pData; |
| uint32_t index_stride = stride / sizeof(uint64_t); |
| for (uint32_t i = 0; i < queryCount; i++) { |
| /* Copy the result if its available */ |
| const uint64_t avail = src[src_index + pool->result_array_size]; |
| if (avail) { |
| memcpy(&dst[dst_index], &src[src_index], |
| pool->result_array_size * sizeof(uint64_t)); |
| } else { |
| result = VK_NOT_READY; |
| /* valid to return result of 0 if partial bit is set */ |
| if (flags & VK_QUERY_RESULT_PARTIAL_BIT) { |
| memset(&dst[dst_index], 0, |
| pool->result_array_size * sizeof(uint64_t)); |
| } |
| } |
| /* Set the availability bit if requested */ |
| if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) |
| dst[dst_index + pool->result_array_size] = avail; |
| |
| dst_index += index_stride; |
| src_index += slot_array_size; |
| } |
| } else { |
| uint32_t *dst = pData; |
| uint32_t index_stride = stride / sizeof(uint32_t); |
| for (uint32_t i = 0; i < queryCount; i++) { |
| /* Copy the result if its available, converting down to uint32_t */ |
| const uint32_t avail = |
| (uint32_t)src[src_index + pool->result_array_size]; |
| if (avail) { |
| for (uint32_t j = 0; j < pool->result_array_size; j++) { |
| const uint64_t src_val = src[src_index + j]; |
| dst[dst_index + j] = |
| src_val > UINT32_MAX && pool->saturate_on_overflow |
| ? UINT32_MAX |
| : (uint32_t)src_val; |
| } |
| } else { |
| result = VK_NOT_READY; |
| /* valid to return result of 0 if partial bit is set */ |
| if (flags & VK_QUERY_RESULT_PARTIAL_BIT) { |
| for (uint32_t j = 0; j < pool->result_array_size; j++) |
| dst[dst_index + j] = 0; |
| } |
| } |
| /* Set the availability bit if requested */ |
| if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) |
| dst[dst_index + pool->result_array_size] = avail; |
| |
| dst_index += index_stride; |
| src_index += slot_array_size; |
| } |
| } |
| return result; |
| } |
| |
| static void |
| vn_query_feedback_wait_ready(struct vn_device *dev, |
| struct vn_query_pool *pool, |
| uint32_t first_query, |
| uint32_t query_count) |
| { |
| VN_TRACE_FUNC(); |
| |
| /* Feedback results are always 64 bit and include availability bit |
| * (also 64 bit) |
| */ |
| const uint32_t step = pool->result_array_size + 1; |
| const uint64_t *avail = (uint64_t *)pool->fb_buf->data + |
| first_query * step + pool->result_array_size; |
| |
| struct vn_relax_state relax_state = |
| vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY); |
| for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) { |
| while (!avail[j]) { |
| vn_relax(&relax_state); |
| } |
| } |
| vn_relax_fini(&relax_state); |
| } |
| |
| VkResult |
| vn_GetQueryPoolResults(VkDevice device, |
| VkQueryPool queryPool, |
| uint32_t firstQuery, |
| uint32_t queryCount, |
| size_t dataSize, |
| void *pData, |
| VkDeviceSize stride, |
| VkQueryResultFlags flags) |
| { |
| struct vn_device *dev = vn_device_from_handle(device); |
| struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool); |
| const VkAllocationCallbacks *alloc = &pool->allocator; |
| VkResult result; |
| |
| const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; |
| const size_t result_size = pool->result_array_size * result_width; |
| const bool result_always_written = |
| flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT); |
| |
| /* Get results from feedback buffers |
| * Not possible for VK_QUERY_RESULT_PARTIAL_BIT |
| */ |
| if (pool->fb_buf) { |
| /* If wait bit is set, wait poll until query is ready */ |
| if (flags & VK_QUERY_RESULT_WAIT_BIT) |
| vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount); |
| |
| result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData, |
| stride, flags); |
| return vn_result(dev->instance, result); |
| } |
| |
| VkQueryResultFlags packed_flags = flags; |
| size_t packed_stride = result_size; |
| if (!result_always_written) |
| packed_flags |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; |
| if (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) |
| packed_stride += result_width; |
| |
| const size_t packed_size = packed_stride * queryCount; |
| void *packed_data; |
| if (result_always_written && packed_stride == stride) { |
| packed_data = pData; |
| } else { |
| packed_data = vk_alloc(alloc, packed_size, VN_DEFAULT_ALIGN, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| if (!packed_data) |
| return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| result = vn_call_vkGetQueryPoolResults( |
| dev->primary_ring, device, queryPool, firstQuery, queryCount, |
| packed_size, packed_data, packed_stride, packed_flags); |
| |
| if (packed_data == pData) |
| return vn_result(dev->instance, result); |
| |
| const size_t copy_size = |
| result_size + |
| (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ? result_width : 0); |
| const void *src = packed_data; |
| void *dst = pData; |
| if (result == VK_SUCCESS) { |
| for (uint32_t i = 0; i < queryCount; i++) { |
| memcpy(dst, src, copy_size); |
| src += packed_stride; |
| dst += stride; |
| } |
| } else if (result == VK_NOT_READY) { |
| assert(!result_always_written && |
| (packed_flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); |
| if (flags & VK_QUERY_RESULT_64_BIT) { |
| for (uint32_t i = 0; i < queryCount; i++) { |
| const bool avail = *(const uint64_t *)(src + result_size); |
| if (avail) |
| memcpy(dst, src, copy_size); |
| else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) |
| *(uint64_t *)(dst + result_size) = 0; |
| |
| src += packed_stride; |
| dst += stride; |
| } |
| } else { |
| for (uint32_t i = 0; i < queryCount; i++) { |
| const bool avail = *(const uint32_t *)(src + result_size); |
| if (avail) |
| memcpy(dst, src, copy_size); |
| else if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) |
| *(uint32_t *)(dst + result_size) = 0; |
| |
| src += packed_stride; |
| dst += stride; |
| } |
| } |
| } |
| |
| vk_free(alloc, packed_data); |
| return vn_result(dev->instance, result); |
| } |
| |
| VkResult |
| vn_query_feedback_buffer_init_once(struct vn_device *dev, |
| struct vn_query_pool *pool) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| simple_mtx_lock(&pool->mutex); |
| if (pool->fb_buf) |
| goto out_unlock; |
| |
| const uint32_t fb_buf_size = |
| (pool->result_array_size + 1) * sizeof(uint64_t) * pool->query_count; |
| struct vn_feedback_buffer *fb_buf; |
| result = |
| vn_feedback_buffer_create(dev, fb_buf_size, &pool->allocator, &fb_buf); |
| if (result == VK_SUCCESS) |
| pool->fb_buf = fb_buf; |
| |
| out_unlock: |
| simple_mtx_unlock(&pool->mutex); |
| return result; |
| } |