| /* |
| * Copyright 2020 Advanced Micro Devices, Inc. |
| * Copyright 2020 Valve Corporation |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #ifndef AC_SQTT_H |
| #define AC_SQTT_H |
| |
| #include <stdint.h> |
| #include <stdbool.h> |
| |
| #include <assert.h> |
| #include "ac_pm4.h" |
| #include "ac_rgp.h" |
| #include "amd_family.h" |
| |
| struct radeon_cmdbuf; |
| struct radeon_info; |
| |
| /** |
| * SQ Thread tracing is a tracing mechanism that allows taking a detailed look |
| * at what the shader cores are doing. |
| * |
| * Among the things recorded are: |
| * - draws/dispatches + state |
| * - when each wave starts and stops. |
| * - for one SIMD per SE all instructions executed on that SIMD. |
| * |
| * The hardware stores all these as events in a buffer, no manual barrier |
| * around each command needed. The primary user of this is RGP. |
| */ |
| struct ac_sqtt { |
| struct radeon_cmdbuf *start_cs[2]; |
| struct radeon_cmdbuf *stop_cs[2]; |
| /* struct radeon_winsys_bo or struct pb_buffer */ |
| void *bo; |
| uint64_t buffer_va; |
| void *ptr; |
| uint32_t buffer_size; |
| int start_frame; |
| char *trigger_file; |
| bool instruction_timing_enabled; |
| |
| uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; |
| |
| struct rgp_code_object rgp_code_object; |
| struct rgp_loader_events rgp_loader_events; |
| struct rgp_pso_correlation rgp_pso_correlation; |
| |
| struct rgp_queue_info rgp_queue_info; |
| struct rgp_queue_event rgp_queue_event; |
| |
| struct rgp_clock_calibration rgp_clock_calibration; |
| |
| struct hash_table_u64 *pipeline_bos; |
| }; |
| |
| #define SQTT_BUFFER_ALIGN_SHIFT 12 |
| |
| struct ac_sqtt_data_info { |
| uint32_t cur_offset; |
| uint32_t trace_status; |
| union { |
| uint32_t gfx9_write_counter; |
| uint32_t gfx10_dropped_cntr; |
| }; |
| }; |
| |
| struct ac_sqtt_data_se { |
| struct ac_sqtt_data_info info; |
| void *data_ptr; |
| uint32_t shader_engine; |
| uint32_t compute_unit; |
| }; |
| |
| #define SQTT_MAX_TRACES 6 |
| |
| struct ac_sqtt_trace { |
| const struct rgp_code_object *rgp_code_object; |
| const struct rgp_loader_events *rgp_loader_events; |
| const struct rgp_pso_correlation *rgp_pso_correlation; |
| const struct rgp_queue_info *rgp_queue_info; |
| const struct rgp_queue_event *rgp_queue_event; |
| const struct rgp_clock_calibration *rgp_clock_calibration; |
| |
| uint32_t num_traces; |
| struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; |
| }; |
| |
| uint64_t ac_sqtt_get_info_offset(unsigned se); |
| |
| uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, |
| unsigned se); |
| |
| void ac_sqtt_init(struct ac_sqtt *data); |
| |
| void ac_sqtt_finish(struct ac_sqtt *data); |
| |
| bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, |
| const struct ac_sqtt_data_info *info); |
| |
| /** |
| * Identifiers for RGP SQ thread-tracing markers (Table 1) |
| */ |
| enum rgp_sqtt_marker_identifier |
| { |
| RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, |
| RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, |
| RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, |
| RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, |
| RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, |
| RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, |
| RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, |
| RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, |
| RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, |
| RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, |
| RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, |
| RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, |
| RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, |
| RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, |
| RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, |
| RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF |
| }; |
| |
| /** |
| * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). |
| */ |
| union rgp_sqtt_marker_cb_id { |
| struct { |
| uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ |
| uint32_t frame_index : 7; |
| uint32_t cb_index : 12; /* Command buffer index within the frame. */ |
| uint32_t reserved : 12; |
| } per_frame_cb_id; |
| |
| struct { |
| uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ |
| uint32_t cb_index : 19; /* Global command buffer index. */ |
| uint32_t reserved : 12; |
| } global_cb_id; |
| |
| uint32_t all; |
| }; |
| |
| /** |
| * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) |
| */ |
| struct rgp_sqtt_marker_cb_start { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t cb_id : 20; |
| uint32_t queue : 5; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| uint32_t device_id_low; |
| uint32_t dword02; |
| }; |
| union { |
| uint32_t device_id_high; |
| uint32_t dword03; |
| }; |
| union { |
| uint32_t queue_flags; |
| uint32_t dword04; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, |
| "rgp_sqtt_marker_cb_start doesn't match RGP spec"); |
| |
| /** |
| * |
| * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) |
| */ |
| struct rgp_sqtt_marker_cb_end { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t cb_id : 20; |
| uint32_t reserved : 5; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| uint32_t device_id_low; |
| uint32_t dword02; |
| }; |
| union { |
| uint32_t device_id_high; |
| uint32_t dword03; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, |
| "rgp_sqtt_marker_cb_end doesn't match RGP spec"); |
| |
| /** |
| * API types used in RGP SQ thread-tracing markers for the "General API" |
| * packet. |
| */ |
| enum rgp_sqtt_marker_general_api_type |
| { |
| ApiCmdBindPipeline = 0, |
| ApiCmdBindDescriptorSets = 1, |
| ApiCmdBindIndexBuffer = 2, |
| ApiCmdBindVertexBuffers = 3, |
| ApiCmdDraw = 4, |
| ApiCmdDrawIndexed = 5, |
| ApiCmdDrawIndirect = 6, |
| ApiCmdDrawIndexedIndirect = 7, |
| ApiCmdDrawIndirectCountAMD = 8, |
| ApiCmdDrawIndexedIndirectCountAMD = 9, |
| ApiCmdDispatch = 10, |
| ApiCmdDispatchIndirect = 11, |
| ApiCmdCopyBuffer = 12, |
| ApiCmdCopyImage = 13, |
| ApiCmdBlitImage = 14, |
| ApiCmdCopyBufferToImage = 15, |
| ApiCmdCopyImageToBuffer = 16, |
| ApiCmdUpdateBuffer = 17, |
| ApiCmdFillBuffer = 18, |
| ApiCmdClearColorImage = 19, |
| ApiCmdClearDepthStencilImage = 20, |
| ApiCmdClearAttachments = 21, |
| ApiCmdResolveImage = 22, |
| ApiCmdWaitEvents = 23, |
| ApiCmdPipelineBarrier = 24, |
| ApiCmdBeginQuery = 25, |
| ApiCmdEndQuery = 26, |
| ApiCmdResetQueryPool = 27, |
| ApiCmdWriteTimestamp = 28, |
| ApiCmdCopyQueryPoolResults = 29, |
| ApiCmdPushConstants = 30, |
| ApiCmdBeginRenderPass = 31, |
| ApiCmdNextSubpass = 32, |
| ApiCmdEndRenderPass = 33, |
| ApiCmdExecuteCommands = 34, |
| ApiCmdSetViewport = 35, |
| ApiCmdSetScissor = 36, |
| ApiCmdSetLineWidth = 37, |
| ApiCmdSetDepthBias = 38, |
| ApiCmdSetBlendConstants = 39, |
| ApiCmdSetDepthBounds = 40, |
| ApiCmdSetStencilCompareMask = 41, |
| ApiCmdSetStencilWriteMask = 42, |
| ApiCmdSetStencilReference = 43, |
| ApiCmdDrawIndirectCount = 44, |
| ApiCmdDrawIndexedIndirectCount = 45, |
| /* gap */ |
| ApiCmdDrawMeshTasksEXT = 47, |
| ApiCmdDrawMeshTasksIndirectCountEXT = 48, |
| ApiCmdDrawMeshTasksIndirectEXT = 49, |
| |
| ApiRayTracingSeparateCompiled = 0x800000, |
| ApiInvalid = 0xffffffff |
| }; |
| |
| /** |
| * RGP SQ thread-tracing marker for a "General API" instrumentation packet. |
| */ |
| struct rgp_sqtt_marker_general_api { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t api_type : 20; |
| uint32_t is_end : 1; |
| uint32_t reserved : 4; |
| }; |
| uint32_t dword01; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, |
| "rgp_sqtt_marker_general_api doesn't match RGP spec"); |
| |
| /** |
| * API types used in RGP SQ thread-tracing markers (Table 16). |
| */ |
| enum rgp_sqtt_marker_event_type |
| { |
| EventCmdDraw = 0, |
| EventCmdDrawIndexed = 1, |
| EventCmdDrawIndirect = 2, |
| EventCmdDrawIndexedIndirect = 3, |
| EventCmdDrawIndirectCountAMD = 4, |
| EventCmdDrawIndexedIndirectCountAMD = 5, |
| EventCmdDispatch = 6, |
| EventCmdDispatchIndirect = 7, |
| EventCmdCopyBuffer = 8, |
| EventCmdCopyImage = 9, |
| EventCmdBlitImage = 10, |
| EventCmdCopyBufferToImage = 11, |
| EventCmdCopyImageToBuffer = 12, |
| EventCmdUpdateBuffer = 13, |
| EventCmdFillBuffer = 14, |
| EventCmdClearColorImage = 15, |
| EventCmdClearDepthStencilImage = 16, |
| EventCmdClearAttachments = 17, |
| EventCmdResolveImage = 18, |
| EventCmdWaitEvents = 19, |
| EventCmdPipelineBarrier = 20, |
| EventCmdResetQueryPool = 21, |
| EventCmdCopyQueryPoolResults = 22, |
| EventRenderPassColorClear = 23, |
| EventRenderPassDepthStencilClear = 24, |
| EventRenderPassResolve = 25, |
| EventInternalUnknown = 26, |
| EventCmdDrawIndirectCount = 27, |
| EventCmdDrawIndexedIndirectCount = 28, |
| /* gap */ |
| EventCmdTraceRaysKHR = 30, |
| EventCmdTraceRaysIndirectKHR = 31, |
| EventCmdBuildAccelerationStructuresKHR = 32, |
| EventCmdBuildAccelerationStructuresIndirectKHR = 33, |
| EventCmdCopyAccelerationStructureKHR = 34, |
| EventCmdCopyAccelerationStructureToMemoryKHR = 35, |
| EventCmdCopyMemoryToAccelerationStructureKHR = 36, |
| /* gap */ |
| EventCmdDrawMeshTasksEXT = 41, |
| EventCmdDrawMeshTasksIndirectCountEXT = 42, |
| EventCmdDrawMeshTasksIndirectEXT = 43, |
| EventUnknown = 0x7fff, |
| EventInvalid = 0xffffffff |
| }; |
| |
| /** |
| * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) |
| */ |
| struct rgp_sqtt_marker_event { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t api_type : 24; |
| uint32_t has_thread_dims : 1; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| struct { |
| uint32_t cb_id : 20; |
| uint32_t vertex_offset_reg_idx : 4; |
| uint32_t instance_offset_reg_idx : 4; |
| uint32_t draw_index_reg_idx : 4; |
| }; |
| uint32_t dword02; |
| }; |
| union { |
| uint32_t cmd_id; |
| uint32_t dword03; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, |
| "rgp_sqtt_marker_event doesn't match RGP spec"); |
| |
| /** |
| * Per-dispatch specific marker where workgroup dims are included. |
| */ |
| struct rgp_sqtt_marker_event_with_dims { |
| struct rgp_sqtt_marker_event event; |
| uint32_t thread_x; |
| uint32_t thread_y; |
| uint32_t thread_z; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, |
| "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); |
| |
| /** |
| * "Barrier Start" RGP SQTT instrumentation marker (Table 5) |
| */ |
| struct rgp_sqtt_marker_barrier_start { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t cb_id : 20; |
| uint32_t reserved : 5; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| struct { |
| uint32_t driver_reason : 31; |
| uint32_t internal : 1; |
| }; |
| uint32_t dword02; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, |
| "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); |
| |
| /** |
| * "Barrier End" RGP SQTT instrumentation marker (Table 6) |
| */ |
| struct rgp_sqtt_marker_barrier_end { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t cb_id : 20; |
| uint32_t wait_on_eop_ts : 1; |
| uint32_t vs_partial_flush : 1; |
| uint32_t ps_partial_flush : 1; |
| uint32_t cs_partial_flush : 1; |
| uint32_t pfp_sync_me : 1; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| struct { |
| uint32_t sync_cp_dma : 1; |
| uint32_t inval_tcp : 1; |
| uint32_t inval_sqI : 1; |
| uint32_t inval_sqK : 1; |
| uint32_t flush_tcc : 1; |
| uint32_t inval_tcc : 1; |
| uint32_t flush_cb : 1; |
| uint32_t inval_cb : 1; |
| uint32_t flush_db : 1; |
| uint32_t inval_db : 1; |
| uint32_t num_layout_transitions : 16; |
| uint32_t inval_gl1 : 1; |
| uint32_t wait_on_ts : 1; |
| uint32_t eop_ts_bottom_of_pipe : 1; |
| uint32_t eos_ts_ps_done : 1; |
| uint32_t eos_ts_cs_done : 1; |
| uint32_t reserved : 1; |
| }; |
| uint32_t dword02; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, |
| "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); |
| |
| /** |
| * "Layout Transition" RGP SQTT instrumentation marker (Table 7) |
| */ |
| struct rgp_sqtt_marker_layout_transition { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t depth_stencil_expand : 1; |
| uint32_t htile_hiz_range_expand : 1; |
| uint32_t depth_stencil_resummarize : 1; |
| uint32_t dcc_decompress : 1; |
| uint32_t fmask_decompress : 1; |
| uint32_t fast_clear_eliminate : 1; |
| uint32_t fmask_color_expand : 1; |
| uint32_t init_mask_ram : 1; |
| uint32_t reserved1 : 17; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| struct { |
| uint32_t reserved2 : 32; |
| }; |
| uint32_t dword02; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, |
| "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); |
| |
| |
| /** |
| * "User Event" RGP SQTT instrumentation marker (Table 8) |
| */ |
| struct rgp_sqtt_marker_user_event { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t reserved0 : 8; |
| uint32_t data_type : 8; |
| uint32_t reserved1 : 12; |
| }; |
| uint32_t dword01; |
| }; |
| }; |
| struct rgp_sqtt_marker_user_event_with_length { |
| struct rgp_sqtt_marker_user_event user_event; |
| uint32_t length; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, |
| "rgp_sqtt_marker_user_event doesn't match RGP spec"); |
| |
| enum rgp_sqtt_marker_user_event_type |
| { |
| UserEventTrigger = 0, |
| UserEventPop, |
| UserEventPush, |
| UserEventObjectName, |
| }; |
| |
| /** |
| * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) |
| */ |
| struct rgp_sqtt_marker_pipeline_bind { |
| union { |
| struct { |
| uint32_t identifier : 4; |
| uint32_t ext_dwords : 3; |
| uint32_t bind_point : 1; |
| uint32_t cb_id : 20; |
| uint32_t reserved : 4; |
| }; |
| uint32_t dword01; |
| }; |
| union { |
| uint32_t api_pso_hash[2]; |
| struct { |
| uint32_t dword02; |
| uint32_t dword03; |
| }; |
| }; |
| }; |
| |
| static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, |
| "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); |
| |
| bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); |
| |
| bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, |
| uint64_t base_address); |
| |
| bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, |
| uint64_t gpu_timestamp); |
| |
| bool ac_check_profile_state(const struct radeon_info *info); |
| |
| union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, |
| enum amd_ip_type ip_type); |
| |
| bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, |
| struct ac_sqtt_trace *sqtt_trace); |
| |
| uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable); |
| |
| uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); |
| |
| void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4, |
| const struct ac_sqtt *sqtt, bool is_compute_queue); |
| |
| void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4, |
| bool is_compute_queue); |
| |
| void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4, |
| const struct ac_sqtt *sqtt, bool is_compute_queue); |
| |
| #endif |