| /* |
| * Copyright © 2020 Valve Corporation |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "radv_cmd_buffer.h" |
| #include "radv_cs.h" |
| #include "radv_entrypoints.h" |
| #include "radv_pipeline_rt.h" |
| #include "radv_queue.h" |
| #include "radv_shader.h" |
| #include "radv_spm.h" |
| #include "radv_sqtt.h" |
| #include "vk_common_entrypoints.h" |
| #include "vk_semaphore.h" |
| |
| #include "ac_rgp.h" |
| #include "ac_sqtt.h" |
| |
| void |
| radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| const enum amd_gfx_level gfx_level = pdev->info.gfx_level; |
| struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint64_t va; |
| |
| radv_cs_add_buffer(device->ws, cs, reloc->bo); |
| |
| /* VS */ |
| if (pipeline->base.shaders[MESA_SHADER_VERTEX]) { |
| struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX]; |
| |
| va = reloc->va[MESA_SHADER_VERTEX]; |
| if (vs->info.vs.as_ls) { |
| radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8); |
| } else if (vs->info.vs.as_es) { |
| radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B324_MEM_BASE(va >> 40)); |
| } else if (vs->info.is_ngg) { |
| radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8); |
| } else { |
| radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B124_MEM_BASE(va >> 40)); |
| } |
| } |
| |
| /* TCS */ |
| if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) { |
| const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]; |
| |
| va = reloc->va[MESA_SHADER_TESS_CTRL]; |
| |
| if (gfx_level >= GFX9) { |
| radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8); |
| } else { |
| radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B424_MEM_BASE(va >> 40)); |
| } |
| } |
| |
| /* TES */ |
| if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) { |
| struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]; |
| |
| va = reloc->va[MESA_SHADER_TESS_EVAL]; |
| if (tes->info.is_ngg) { |
| radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8); |
| } else if (tes->info.tes.as_es) { |
| radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B324_MEM_BASE(va >> 40)); |
| } else { |
| radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B124_MEM_BASE(va >> 40)); |
| } |
| } |
| |
| /* GS */ |
| if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) { |
| struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; |
| |
| va = reloc->va[MESA_SHADER_GEOMETRY]; |
| if (gs->info.is_ngg) { |
| radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8); |
| } else { |
| if (gfx_level >= GFX9) { |
| radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8); |
| } else { |
| radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B224_MEM_BASE(va >> 40)); |
| } |
| } |
| } |
| |
| /* FS */ |
| if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) { |
| const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| |
| va = reloc->va[MESA_SHADER_FRAGMENT]; |
| |
| radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2); |
| radeon_emit(cs, va >> 8); |
| radeon_emit(cs, S_00B024_MEM_BASE(va >> 40)); |
| } |
| |
| /* MS */ |
| if (pipeline->base.shaders[MESA_SHADER_MESH]) { |
| const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH]; |
| |
| va = reloc->va[MESA_SHADER_MESH]; |
| |
| radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8); |
| } |
| } |
| |
| static uint64_t |
| radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage) |
| { |
| if (pipeline->type == RADV_PIPELINE_GRAPHICS) { |
| struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); |
| struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc; |
| return reloc->va[stage]; |
| } |
| |
| return radv_shader_get_va(pipeline->shaders[stage]); |
| } |
| |
| static VkResult |
| radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline) |
| { |
| struct radv_shader_dma_submission *submission = NULL; |
| struct radv_sqtt_shaders_reloc *reloc; |
| uint32_t code_size = 0; |
| VkResult result; |
| |
| reloc = calloc(1, sizeof(*reloc)); |
| if (!reloc) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| /* Compute the total code size. */ |
| for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { |
| const struct radv_shader *shader = pipeline->base.shaders[i]; |
| if (!shader) |
| continue; |
| |
| code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT); |
| } |
| |
| /* Allocate memory for all shader binaries. */ |
| reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline); |
| if (!reloc->alloc) { |
| result = VK_ERROR_OUT_OF_DEVICE_MEMORY; |
| goto fail; |
| } |
| |
| reloc->bo = reloc->alloc->arena->bo; |
| |
| /* Relocate shader binaries to be contiguous in memory as requested by RGP. */ |
| uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset; |
| char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset; |
| uint64_t offset = 0; |
| |
| if (device->shader_use_invisible_vram) { |
| submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size); |
| if (!submission) { |
| result = VK_ERROR_UNKNOWN; |
| goto fail; |
| } |
| } |
| |
| for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { |
| const struct radv_shader *shader = pipeline->base.shaders[i]; |
| void *dest_ptr; |
| if (!shader) |
| continue; |
| |
| reloc->va[i] = slab_va + offset; |
| |
| if (device->shader_use_invisible_vram) |
| dest_ptr = submission->ptr + offset; |
| else |
| dest_ptr = slab_ptr + offset; |
| |
| memcpy(dest_ptr, shader->code, shader->code_size); |
| |
| offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT); |
| } |
| |
| if (device->shader_use_invisible_vram) { |
| uint64_t upload_seq = 0; |
| |
| if (!radv_shader_dma_submit(device, submission, &upload_seq)) { |
| result = VK_ERROR_UNKNOWN; |
| goto fail; |
| } |
| |
| for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { |
| struct radv_shader *shader = pipeline->base.shaders[i]; |
| |
| if (!shader) |
| continue; |
| |
| shader->upload_seq = upload_seq; |
| } |
| |
| if (pipeline->base.gs_copy_shader) |
| pipeline->base.gs_copy_shader->upload_seq = upload_seq; |
| } |
| |
| pipeline->sqtt_shaders_reloc = reloc; |
| |
| return VK_SUCCESS; |
| |
| fail: |
| if (reloc->alloc) |
| radv_free_shader_memory(device, reloc->alloc); |
| free(reloc); |
| return result; |
| } |
| |
| static void |
| radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) |
| { |
| struct rgp_sqtt_marker_general_api marker = {0}; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; |
| marker.api_type = api_type; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| static void |
| radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) |
| { |
| struct rgp_sqtt_marker_general_api marker = {0}; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; |
| marker.api_type = api_type; |
| marker.is_end = 1; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| static void |
| radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, |
| uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data, |
| uint32_t draw_index_user_data) |
| { |
| struct rgp_sqtt_marker_event marker = {0}; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; |
| marker.api_type = api_type; |
| marker.cmd_id = cmd_buffer->state.num_events++; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| |
| if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) { |
| vertex_offset_user_data = 0; |
| instance_offset_user_data = 0; |
| } |
| |
| if (draw_index_user_data == UINT_MAX) |
| draw_index_user_data = vertex_offset_user_data; |
| |
| marker.vertex_offset_reg_idx = vertex_offset_user_data; |
| marker.instance_offset_reg_idx = instance_offset_user_data; |
| marker.draw_index_reg_idx = draw_index_user_data; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| static void |
| radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type, |
| uint32_t x, uint32_t y, uint32_t z) |
| { |
| struct rgp_sqtt_marker_event_with_dims marker = {0}; |
| |
| marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; |
| marker.event.api_type = api_type; |
| marker.event.cmd_id = cmd_buffer->state.num_events++; |
| marker.event.cb_id = cmd_buffer->sqtt_cb_id; |
| marker.event.has_thread_dims = 1; |
| |
| marker.thread_x = x; |
| marker.thread_y = y; |
| marker.thread_z = z; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| void |
| radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type, |
| const char *str) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| if (type == UserEventPop) { |
| assert(str == NULL); |
| struct rgp_sqtt_marker_user_event marker = {0}; |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; |
| marker.data_type = type; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } else { |
| assert(str != NULL); |
| unsigned len = strlen(str); |
| struct rgp_sqtt_marker_user_event_with_length marker = {0}; |
| marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; |
| marker.user_event.data_type = type; |
| marker.length = align(len, 4); |
| |
| uint8_t *buffer = alloca(sizeof(marker) + marker.length); |
| memset(buffer, 0, sizeof(marker) + marker.length); |
| memcpy(buffer, &marker, sizeof(marker)); |
| memcpy(buffer + sizeof(marker), str, len); |
| |
| radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4); |
| } |
| } |
| |
| void |
| radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| uint64_t device_id = (uintptr_t)device; |
| struct rgp_sqtt_marker_cb_start marker = {0}; |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| /* Reserve a command buffer ID for SQTT. */ |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf); |
| union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&device->sqtt, ip_type); |
| cmd_buffer->sqtt_cb_id = cb_id.all; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| marker.device_id_low = device_id; |
| marker.device_id_high = device_id >> 32; |
| marker.queue = cmd_buffer->qf; |
| marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT; |
| |
| if (cmd_buffer->qf == RADV_QUEUE_GENERAL) |
| marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; |
| |
| if (!radv_sparse_queue_enabled(pdev)) |
| marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| void |
| radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| uint64_t device_id = (uintptr_t)device; |
| struct rgp_sqtt_marker_cb_end marker = {0}; |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| marker.device_id_low = device_id; |
| marker.device_id_high = device_id >> 32; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| void |
| radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX); |
| } |
| |
| void |
| radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| if (info->indirect) { |
| radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX); |
| } else { |
| radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0], |
| info->blocks[1], info->blocks[2]); |
| } |
| } |
| |
| void |
| radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects) |
| { |
| cmd_buffer->state.current_event_type = |
| (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear; |
| } |
| |
| void |
| radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer) |
| { |
| cmd_buffer->state.current_event_type = EventInternalUnknown; |
| } |
| |
| void |
| radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) |
| { |
| cmd_buffer->state.current_event_type = EventRenderPassResolve; |
| } |
| |
| void |
| radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) |
| { |
| cmd_buffer->state.current_event_type = EventInternalUnknown; |
| } |
| |
| void |
| radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| struct rgp_sqtt_marker_barrier_end marker = {0}; |
| |
| if (likely(!device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end) |
| return; |
| |
| cmd_buffer->state.pending_sqtt_barrier_end = false; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| |
| marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; |
| |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS) |
| marker.wait_on_eop_ts = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH) |
| marker.vs_partial_flush = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH) |
| marker.ps_partial_flush = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH) |
| marker.cs_partial_flush = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME) |
| marker.pfp_sync_me = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA) |
| marker.sync_cp_dma = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0) |
| marker.inval_tcp = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE) |
| marker.inval_sqI = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0) |
| marker.inval_sqK = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2) |
| marker.flush_tcc = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2) |
| marker.inval_tcc = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB) |
| marker.flush_cb = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB) |
| marker.inval_cb = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB) |
| marker.flush_db = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB) |
| marker.inval_db = true; |
| if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1) |
| marker.inval_gl1 = true; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| |
| cmd_buffer->state.num_layout_transitions = 0; |
| } |
| |
| void |
| radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| struct rgp_sqtt_marker_barrier_start marker = {0}; |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| if (cmd_buffer->state.in_barrier) { |
| assert(!"attempted to start a barrier while already in a barrier"); |
| return; |
| } |
| |
| radv_describe_barrier_end_delayed(cmd_buffer); |
| cmd_buffer->state.sqtt_flush_bits = 0; |
| cmd_buffer->state.in_barrier = true; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| marker.dword02 = reason; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| void |
| radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) |
| { |
| cmd_buffer->state.in_barrier = false; |
| cmd_buffer->state.pending_sqtt_barrier_end = true; |
| } |
| |
| void |
| radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| struct rgp_sqtt_marker_layout_transition marker = {0}; |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| if (!cmd_buffer->state.in_barrier) { |
| assert(!"layout transition marker should be only emitted inside a barrier marker"); |
| return; |
| } |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; |
| marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand; |
| marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand; |
| marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize; |
| marker.dcc_decompress = barrier->layout_transitions.dcc_decompress; |
| marker.fmask_decompress = barrier->layout_transitions.fmask_decompress; |
| marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate; |
| marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; |
| marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| |
| cmd_buffer->state.num_layout_transitions++; |
| } |
| |
| void |
| radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| char marker[64]; |
| snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count); |
| radv_write_user_event_marker(cmd_buffer, UserEventPush, marker); |
| } |
| |
| void |
| radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer) |
| { |
| radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); |
| } |
| |
| static void |
| radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint, |
| struct radv_pipeline *pipeline) |
| { |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| struct rgp_sqtt_marker_pipeline_bind marker = {0}; |
| |
| if (likely(!device->sqtt.bo)) |
| return; |
| |
| marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; |
| marker.cb_id = cmd_buffer->sqtt_cb_id; |
| marker.bind_point = pipelineBindPoint; |
| marker.api_pso_hash[0] = pipeline->pipeline_hash; |
| marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32; |
| |
| radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); |
| } |
| |
| /* Queue events */ |
| static void |
| radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record) |
| { |
| struct radv_device *device = radv_queue_device(queue); |
| struct ac_sqtt *sqtt = &device->sqtt; |
| struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event; |
| |
| simple_mtx_lock(&queue_event->lock); |
| list_addtail(&record->list, &queue_event->record); |
| queue_event->record_count++; |
| simple_mtx_unlock(&queue_event->lock); |
| } |
| |
| static VkResult |
| radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr) |
| { |
| struct rgp_queue_event_record *record; |
| |
| record = calloc(1, sizeof(struct rgp_queue_event_record)); |
| if (!record) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT; |
| record->cpu_timestamp = cpu_timestamp; |
| record->gpu_timestamps[0] = gpu_timestamp_ptr; |
| record->queue_info_index = queue->vk.queue_family_index; |
| |
| radv_describe_queue_event(queue, record); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx, |
| uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr) |
| { |
| struct radv_device *device = radv_queue_device(queue); |
| struct rgp_queue_event_record *record; |
| |
| record = calloc(1, sizeof(struct rgp_queue_event_record)); |
| if (!record) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT; |
| record->api_id = (uintptr_t)cmd_buffer; |
| record->cpu_timestamp = cpu_timestamp; |
| record->frame_index = device->vk.current_frame; |
| record->gpu_timestamps[0] = pre_gpu_timestamp_ptr; |
| record->gpu_timestamps[1] = post_gpu_timestamp_ptr; |
| record->queue_info_index = queue->vk.queue_family_index; |
| record->submit_sub_index = cmdbuf_idx; |
| |
| radv_describe_queue_event(queue, record); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync, |
| enum sqtt_queue_event_type event_type) |
| { |
| struct rgp_queue_event_record *record; |
| |
| record = calloc(1, sizeof(struct rgp_queue_event_record)); |
| if (!record) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| record->event_type = event_type; |
| record->api_id = (uintptr_t)sync; |
| record->cpu_timestamp = os_time_get_nano(); |
| record->queue_info_index = queue->vk.queue_family_index; |
| |
| radv_describe_queue_event(queue, record); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| radv_handle_sqtt(VkQueue _queue) |
| { |
| VK_FROM_HANDLE(radv_queue, queue, _queue); |
| struct radv_device *device = radv_queue_device(queue); |
| bool trigger = device->sqtt_triggered; |
| device->sqtt_triggered = false; |
| |
| if (device->sqtt_enabled) { |
| if (!radv_sqtt_stop_capturing(queue)) { |
| /* Try to capture the next frame if the buffer was too small initially. */ |
| trigger = true; |
| } |
| } |
| |
| if (trigger) { |
| radv_sqtt_start_capturing(queue); |
| } |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) |
| { |
| VK_FROM_HANDLE(radv_queue, queue, _queue); |
| struct radv_device *device = radv_queue_device(queue); |
| VkResult result; |
| |
| queue->sqtt_present = true; |
| |
| result = device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo); |
| if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) |
| return result; |
| |
| queue->sqtt_present = false; |
| |
| radv_handle_sqtt(_queue); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence) |
| { |
| VK_FROM_HANDLE(radv_queue, queue, _queue); |
| struct radv_device *device = radv_queue_device(queue); |
| VkCommandBufferSubmitInfo *new_cmdbufs = NULL; |
| struct radeon_winsys_bo *gpu_timestamp_bo; |
| uint32_t gpu_timestamp_offset; |
| VkCommandBuffer timed_cmdbuf; |
| void *gpu_timestamp_ptr; |
| uint64_t cpu_timestamp; |
| VkResult result = VK_SUCCESS; |
| |
| assert(submitCount <= 1 && pSubmits != NULL); |
| |
| for (uint32_t i = 0; i < submitCount; i++) { |
| const VkSubmitInfo2 *pSubmit = &pSubmits[i]; |
| VkSubmitInfo2 sqtt_submit = *pSubmit; |
| |
| assert(sqtt_submit.commandBufferInfoCount <= 1); |
| |
| /* Command buffers */ |
| uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1; |
| |
| new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs)); |
| if (!new_cmdbufs) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| /* Sample the current CPU time before building the GPU timestamp cmdbuf. */ |
| cpu_timestamp = os_time_get_nano(); |
| |
| result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset, |
| VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| new_cmdbufs[0] = (VkCommandBufferSubmitInfo){ |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, |
| .commandBuffer = timed_cmdbuf, |
| }; |
| |
| if (sqtt_submit.commandBufferInfoCount == 1) |
| new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0]; |
| |
| sqtt_submit.commandBufferInfoCount = new_cmdbuf_count; |
| sqtt_submit.pCommandBufferInfos = new_cmdbufs; |
| |
| radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr); |
| |
| result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| FREE(new_cmdbufs); |
| } |
| |
| return result; |
| |
| fail: |
| FREE(new_cmdbufs); |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence) |
| { |
| VK_FROM_HANDLE(radv_queue, queue, _queue); |
| struct radv_device *device = radv_queue_device(queue); |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| const struct radv_instance *instance = radv_physical_device_instance(pdev); |
| const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE; |
| VkCommandBufferSubmitInfo *new_cmdbufs = NULL; |
| VkResult result = VK_SUCCESS; |
| |
| /* Only consider queue events on graphics/compute when enabled. */ |
| if (((!device->sqtt_enabled || !radv_sqtt_queue_events_enabled()) && !instance->vk.trace_per_submit) || |
| !is_gfx_or_ace) |
| return device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence); |
| |
| for (uint32_t i = 0; i < submitCount; i++) { |
| const VkSubmitInfo2 *pSubmit = &pSubmits[i]; |
| |
| /* Wait semaphores */ |
| for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) { |
| const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j]; |
| VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore); |
| radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE); |
| } |
| } |
| |
| if (queue->sqtt_present) |
| return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence); |
| |
| if (instance->vk.trace_per_submit) |
| radv_sqtt_start_capturing(queue); |
| |
| for (uint32_t i = 0; i < submitCount; i++) { |
| const VkSubmitInfo2 *pSubmit = &pSubmits[i]; |
| VkSubmitInfo2 sqtt_submit = *pSubmit; |
| |
| /* Command buffers */ |
| uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3; |
| uint32_t cmdbuf_idx = 0; |
| |
| new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs)); |
| if (!new_cmdbufs) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) { |
| const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j]; |
| struct radeon_winsys_bo *gpu_timestamps_bo[2]; |
| uint32_t gpu_timestamps_offset[2]; |
| VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf; |
| void *gpu_timestamps_ptr[2]; |
| uint64_t cpu_timestamp; |
| |
| /* Sample the current CPU time before building the timed cmdbufs. */ |
| cpu_timestamp = os_time_get_nano(); |
| |
| result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0], |
| &gpu_timestamps_ptr[0]); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0], |
| VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){ |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, |
| .commandBuffer = pre_timed_cmdbuf, |
| }; |
| |
| new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo; |
| |
| result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1], |
| &gpu_timestamps_ptr[1]); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1], |
| VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){ |
| .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, |
| .commandBuffer = post_timed_cmdbuf, |
| }; |
| |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer); |
| radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]); |
| } |
| |
| sqtt_submit.commandBufferInfoCount = new_cmdbuf_count; |
| sqtt_submit.pCommandBufferInfos = new_cmdbufs; |
| |
| result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| /* Signal semaphores */ |
| for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) { |
| const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j]; |
| VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore); |
| radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE); |
| } |
| |
| FREE(new_cmdbufs); |
| } |
| |
| if (instance->vk.trace_per_submit) { |
| if (!radv_sqtt_stop_capturing(queue)) { |
| fprintf(stderr, |
| "radv: Failed to capture RGP for this submit because the buffer is too small and auto-resizing " |
| "is disabled. See RADV_THREAD_TRACE_BUFFER_SIZE for increasing the size.\n"); |
| } |
| } |
| |
| return result; |
| |
| fail: |
| FREE(new_cmdbufs); |
| return result; |
| } |
| |
| #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \ |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \ |
| radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ |
| cmd_buffer->state.current_event_type = EventCmd##event_name; \ |
| device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ |
| cmd_buffer->state.current_event_type = EventInternalUnknown; \ |
| radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); |
| |
| #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__); |
| |
| #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, |
| uint32_t firstInstance) |
| { |
| EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, |
| int32_t vertexOffset, uint32_t firstInstance) |
| { |
| EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, |
| uint32_t stride) |
| { |
| EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, |
| uint32_t stride) |
| { |
| EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, |
| VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) |
| { |
| EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, |
| VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, |
| uint32_t stride) |
| { |
| EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, |
| stride); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) |
| { |
| EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) |
| { |
| EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo) |
| { |
| EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize, |
| uint32_t data) |
| { |
| EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, |
| const void *pData) |
| { |
| EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo) |
| { |
| EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) |
| { |
| EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) |
| { |
| EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo) |
| { |
| EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges) |
| { |
| EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, |
| uint32_t rectCount, const VkClearRect *pRects) |
| { |
| EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo) |
| { |
| EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, |
| const VkDependencyInfo *pDependencyInfos) |
| { |
| EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo) |
| { |
| EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) |
| { |
| EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, |
| uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, |
| VkQueryResultFlags flags) |
| { |
| EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride, |
| flags); |
| } |
| |
| #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__); |
| |
| #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...) \ |
| EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__); |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width, |
| uint32_t height, uint32_t depth) |
| { |
| EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable, |
| pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer, |
| const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, |
| const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, |
| VkDeviceAddress indirectDeviceAddress) |
| { |
| EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable, |
| pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress) |
| { |
| EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, |
| indirectDeviceAddress); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo) |
| { |
| EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, |
| const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) |
| { |
| EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, |
| const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) |
| { |
| EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) |
| { |
| EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, |
| uint32_t drawCount, uint32_t stride) |
| { |
| EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, |
| VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, |
| uint32_t stride) |
| { |
| EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset, |
| maxDrawCount, stride); |
| } |
| |
| #undef EVENT_RT_MARKER_ALIAS |
| #undef EVENT_RT_MARKER |
| |
| #undef EVENT_MARKER |
| #undef EVENT_MARKER_ALIAS |
| #undef EVENT_MARKER_BASE |
| |
| #define API_MARKER_ALIAS(cmd_name, api_name, ...) \ |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \ |
| radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ |
| device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \ |
| radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); |
| |
| #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) |
| { |
| VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); |
| |
| API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline); |
| |
| if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) { |
| /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes |
| * sense somehow given that RT shaders are compiled to an unified compute shader. |
| */ |
| radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| } else { |
| radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, |
| VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, |
| const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, |
| const uint32_t *pDynamicOffsets) |
| { |
| API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount, |
| pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType) |
| { |
| API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, |
| const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, |
| const VkDeviceSize *pStrides) |
| { |
| API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, |
| pOffsets, pSizes, pStrides); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) |
| { |
| API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) |
| { |
| API_MARKER(EndQuery, commandBuffer, queryPool, query); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, |
| uint32_t query) |
| { |
| API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, |
| uint32_t offset, uint32_t size, const void *pValues) |
| { |
| API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) |
| { |
| API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdEndRendering(VkCommandBuffer commandBuffer) |
| { |
| API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) |
| { |
| API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed, |
| const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo) |
| { |
| /* There is no ExecuteIndirect Vulkan event in RGP yet. */ |
| API_MARKER_ALIAS(ExecuteGeneratedCommandsEXT, ExecuteCommands, commandBuffer, isPreprocessed, |
| pGeneratedCommandsInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, |
| const VkViewport *pViewports) |
| { |
| API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, |
| const VkRect2D *pScissors) |
| { |
| API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) |
| { |
| API_MARKER(SetLineWidth, commandBuffer, lineWidth); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, |
| float depthBiasSlopeFactor) |
| { |
| API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) |
| { |
| API_MARKER(SetBlendConstants, commandBuffer, blendConstants); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) |
| { |
| API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) |
| { |
| API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) |
| { |
| API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) |
| { |
| API_MARKER(SetStencilReference, commandBuffer, faceMask, reference); |
| } |
| |
| /* VK_EXT_debug_marker */ |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo) |
| { |
| /* no-op */ |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName); |
| |
| device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); |
| |
| device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo) |
| { |
| VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); |
| struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); |
| |
| radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName); |
| |
| device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo); |
| } |
| |
| /* Pipelines */ |
| static enum rgp_hardware_stages |
| radv_get_rgp_shader_stage(struct radv_shader *shader) |
| { |
| switch (shader->info.stage) { |
| case MESA_SHADER_VERTEX: |
| if (shader->info.vs.as_ls) |
| return RGP_HW_STAGE_LS; |
| else if (shader->info.vs.as_es) |
| return RGP_HW_STAGE_ES; |
| else if (shader->info.is_ngg) |
| return RGP_HW_STAGE_GS; |
| else |
| return RGP_HW_STAGE_VS; |
| case MESA_SHADER_TESS_CTRL: |
| return RGP_HW_STAGE_HS; |
| case MESA_SHADER_TESS_EVAL: |
| if (shader->info.tes.as_es) |
| return RGP_HW_STAGE_ES; |
| else if (shader->info.is_ngg) |
| return RGP_HW_STAGE_GS; |
| else |
| return RGP_HW_STAGE_VS; |
| case MESA_SHADER_MESH: |
| case MESA_SHADER_GEOMETRY: |
| return RGP_HW_STAGE_GS; |
| case MESA_SHADER_FRAGMENT: |
| return RGP_HW_STAGE_PS; |
| case MESA_SHADER_TASK: |
| case MESA_SHADER_COMPUTE: |
| case MESA_SHADER_RAYGEN: |
| case MESA_SHADER_CLOSEST_HIT: |
| case MESA_SHADER_ANY_HIT: |
| case MESA_SHADER_INTERSECTION: |
| case MESA_SHADER_MISS: |
| case MESA_SHADER_CALLABLE: |
| return RGP_HW_STAGE_CS; |
| default: |
| unreachable("invalid mesa shader stage"); |
| } |
| } |
| |
| static void |
| radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data, |
| struct radv_shader *shader, uint64_t va) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT |
| ? 1024 |
| : pdev->info.lds_encode_granularity; |
| |
| memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name)); |
| shader_data->hash[0] = (uint64_t)(uintptr_t)shader; |
| shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32; |
| shader_data->code_size = shader->code_size; |
| shader_data->code = shader->code; |
| shader_data->vgpr_count = shader->config.num_vgprs; |
| shader_data->sgpr_count = shader->config.num_sgprs; |
| shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave; |
| shader_data->lds_size = shader->config.lds_size * lds_increment; |
| shader_data->wavefront_size = shader->info.wave_size; |
| shader_data->base_address = va & 0xffffffffffff; |
| shader_data->elf_symbol_offset = 0; |
| shader_data->hw_stage = radv_get_rgp_shader_stage(shader); |
| shader_data->is_combined = false; |
| } |
| |
| static VkResult |
| radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline) |
| { |
| struct ac_sqtt *sqtt = &device->sqtt; |
| struct rgp_code_object *code_object = &sqtt->rgp_code_object; |
| struct rgp_code_object_record *record; |
| |
| record = malloc(sizeof(struct rgp_code_object_record)); |
| if (!record) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| record->shader_stages_mask = 0; |
| record->num_shaders_combined = 0; |
| record->pipeline_hash[0] = pipeline->pipeline_hash; |
| record->pipeline_hash[1] = pipeline->pipeline_hash; |
| record->is_rt = false; |
| |
| for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { |
| struct radv_shader *shader = pipeline->shaders[i]; |
| |
| if (!shader) |
| continue; |
| |
| radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i)); |
| |
| record->shader_stages_mask |= (1 << i); |
| record->num_shaders_combined++; |
| } |
| |
| simple_mtx_lock(&code_object->lock); |
| list_addtail(&record->list, &code_object->record); |
| code_object->record_count++; |
| simple_mtx_unlock(&code_object->lock); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object, |
| struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size, |
| uint32_t index, uint64_t hash) |
| { |
| struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record)); |
| if (!record) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage]; |
| |
| record->shader_stages_mask = 0; |
| record->num_shaders_combined = 0; |
| record->pipeline_hash[0] = hash; |
| record->pipeline_hash[1] = hash; |
| |
| radv_fill_code_object_record(device, shader_data, shader, shader->va); |
| shader_data->rt_stack_size = stack_size; |
| |
| record->shader_stages_mask |= (1 << shader->info.stage); |
| record->is_rt = true; |
| switch (shader->info.stage) { |
| case MESA_SHADER_RAYGEN: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index); |
| break; |
| case MESA_SHADER_CLOSEST_HIT: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index); |
| break; |
| case MESA_SHADER_MISS: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index); |
| break; |
| case MESA_SHADER_INTERSECTION: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal"); |
| break; |
| case MESA_SHADER_CALLABLE: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index); |
| break; |
| case MESA_SHADER_COMPUTE: |
| snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main"); |
| break; |
| default: |
| unreachable("invalid rt stage"); |
| } |
| record->num_shaders_combined = 1; |
| |
| simple_mtx_lock(&code_object->lock); |
| list_addtail(&record->list, &code_object->record); |
| code_object->record_count++; |
| simple_mtx_unlock(&code_object->lock); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH]) |
| { |
| struct mesa_sha1 ctx; |
| _mesa_sha1_init(&ctx); |
| _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash)); |
| _mesa_sha1_update(&ctx, &index, sizeof(index)); |
| _mesa_sha1_final(&ctx, sha1); |
| } |
| |
| static VkResult |
| radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index, |
| uint32_t stack_size, struct radv_shader *shader) |
| { |
| unsigned char sha1[SHA1_DIGEST_LENGTH]; |
| VkResult result; |
| |
| compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1); |
| |
| result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash); |
| if (!result) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va); |
| if (!result) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| result = |
| radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1); |
| return result; |
| } |
| |
| static VkResult |
| radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) |
| { |
| VkResult result = VK_SUCCESS; |
| |
| uint32_t max_any_hit_stack_size = 0; |
| uint32_t max_intersection_stack_size = 0; |
| |
| for (unsigned i = 0; i < pipeline->stage_count; i++) { |
| struct radv_ray_tracing_stage *stage = &pipeline->stages[i]; |
| if (stage->stage == MESA_SHADER_ANY_HIT) |
| max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size); |
| else if (stage->stage == MESA_SHADER_INTERSECTION) |
| max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size); |
| |
| if (!pipeline->stages[i].shader) |
| continue; |
| |
| result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| uint32_t idx = pipeline->stage_count; |
| |
| /* Combined traversal shader */ |
| if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) { |
| result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size, |
| pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| /* Prolog */ |
| result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog); |
| |
| return result; |
| } |
| |
| static VkResult |
| radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline) |
| { |
| bool result; |
| uint64_t base_va = ~0; |
| |
| result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash); |
| if (!result) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| /* Find the lowest shader BO VA. */ |
| for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { |
| struct radv_shader *shader = pipeline->shaders[i]; |
| uint64_t va; |
| |
| if (!shader) |
| continue; |
| |
| va = radv_sqtt_shader_get_va_reloc(pipeline, i); |
| base_va = MIN2(base_va, va); |
| } |
| |
| result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va); |
| if (!result) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| result = radv_add_code_object(device, pipeline); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| radv_unregister_records(struct radv_device *device, uint64_t hash) |
| { |
| struct ac_sqtt *sqtt = &device->sqtt; |
| struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation; |
| struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events; |
| struct rgp_code_object *code_object = &sqtt->rgp_code_object; |
| |
| /* Destroy the PSO correlation record. */ |
| simple_mtx_lock(&pso_correlation->lock); |
| list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) { |
| if (record->pipeline_hash[0] == hash) { |
| pso_correlation->record_count--; |
| list_del(&record->list); |
| free(record); |
| break; |
| } |
| } |
| simple_mtx_unlock(&pso_correlation->lock); |
| |
| /* Destroy the code object loader record. */ |
| simple_mtx_lock(&loader_events->lock); |
| list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) { |
| if (record->code_object_hash[0] == hash) { |
| loader_events->record_count--; |
| list_del(&record->list); |
| free(record); |
| break; |
| } |
| } |
| simple_mtx_unlock(&loader_events->lock); |
| |
| /* Destroy the code object record. */ |
| simple_mtx_lock(&code_object->lock); |
| list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) { |
| if (record->pipeline_hash[0] == hash) { |
| code_object->record_count--; |
| list_del(&record->list); |
| free(record); |
| break; |
| } |
| } |
| simple_mtx_unlock(&code_object->lock); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, |
| const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipelines) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VkResult result; |
| |
| result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, |
| pPipelines); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| for (unsigned i = 0; i < count; i++) { |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); |
| |
| if (!pipeline) |
| continue; |
| |
| const VkPipelineCreateFlagBits2 create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]); |
| if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) |
| continue; |
| |
| result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline)); |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| result = radv_register_pipeline(device, pipeline); |
| if (result != VK_SUCCESS) |
| goto fail; |
| } |
| |
| return VK_SUCCESS; |
| |
| fail: |
| for (unsigned i = 0; i < count; i++) { |
| sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); |
| pPipelines[i] = VK_NULL_HANDLE; |
| } |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, |
| const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, |
| VkPipeline *pPipelines) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VkResult result; |
| |
| result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, |
| pPipelines); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| for (unsigned i = 0; i < count; i++) { |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); |
| |
| if (!pipeline) |
| continue; |
| |
| result = radv_register_pipeline(device, pipeline); |
| if (result != VK_SUCCESS) |
| goto fail; |
| } |
| |
| return VK_SUCCESS; |
| |
| fail: |
| for (unsigned i = 0; i < count; i++) { |
| sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); |
| pPipelines[i] = VK_NULL_HANDLE; |
| } |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, |
| VkPipelineCache pipelineCache, uint32_t count, |
| const VkRayTracingPipelineCreateInfoKHR *pCreateInfos, |
| const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VkResult result; |
| |
| result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count, |
| pCreateInfos, pAllocator, pPipelines); |
| if (result != VK_SUCCESS && result != VK_OPERATION_DEFERRED_KHR) |
| return result; |
| |
| for (unsigned i = 0; i < count; i++) { |
| VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); |
| |
| if (!pipeline) |
| continue; |
| |
| const VkPipelineCreateFlagBits2 create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]); |
| if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) |
| continue; |
| |
| result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline)); |
| if (result != VK_SUCCESS) |
| goto fail; |
| } |
| |
| return VK_SUCCESS; |
| |
| fail: |
| for (unsigned i = 0; i < count; i++) { |
| sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); |
| pPipelines[i] = VK_NULL_HANDLE; |
| } |
| return result; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) |
| { |
| VK_FROM_HANDLE(radv_device, device, _device); |
| VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); |
| |
| if (!_pipeline) |
| return; |
| |
| /* Ray tracing pipelines have multiple records, each with their own hash */ |
| if (pipeline->type == RADV_PIPELINE_RAY_TRACING) { |
| /* We have one record for each stage, plus one for the traversal shader and one for the prolog */ |
| uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2; |
| unsigned char sha1[SHA1_DIGEST_LENGTH]; |
| for (uint32_t i = 0; i < record_count; ++i) { |
| compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1); |
| radv_unregister_records(device, *(uint64_t *)sha1); |
| } |
| } else |
| radv_unregister_records(device, pipeline->pipeline_hash); |
| |
| if (pipeline->type == RADV_PIPELINE_GRAPHICS) { |
| struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); |
| struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc; |
| |
| radv_free_shader_memory(device, reloc->alloc); |
| free(reloc); |
| } |
| |
| device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator); |
| } |
| |
| #undef API_MARKER |