| /* |
| * Copyright © 2022 Friedrich Vock |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #ifndef _WIN32 |
| #include <dirent.h> |
| #include <unistd.h> |
| #endif |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include "ac_gpu_info.h" |
| #include "radv_buffer.h" |
| #include "radv_descriptor_set.h" |
| #include "radv_device_memory.h" |
| #include "radv_event.h" |
| #include "radv_image.h" |
| #include "radv_pipeline_graphics.h" |
| #include "radv_pipeline_rt.h" |
| #include "radv_query.h" |
| #include "radv_rmv.h" |
| |
| #define RADV_FTRACE_INSTANCE_PATH "/sys/kernel/tracing/instances/amd_rmv" |
| |
| static FILE * |
| open_event_file(const char *event_name, const char *event_filename, const char *mode) |
| { |
| char filename[2048]; |
| snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename); |
| return fopen(filename, mode); |
| } |
| |
| static bool |
| set_event_tracing_enabled(const char *event_name, bool enabled) |
| { |
| FILE *file = open_event_file(event_name, "enable", "w"); |
| if (!file) |
| return false; |
| |
| size_t written_bytes = fwrite("1", 1, 1, file); |
| fclose(file); |
| return written_bytes == 1; |
| } |
| |
| static uint16_t |
| trace_event_id(const char *event_name) |
| { |
| /* id is 16-bit, so <= 65535 */ |
| char data[6]; |
| |
| FILE *file = open_event_file(event_name, "id", "r"); |
| if (!file) |
| return (uint16_t)~0; |
| |
| size_t read_bytes = fread(data, 1, 6, file); |
| fclose(file); |
| |
| if (!read_bytes) |
| return (uint16_t)~0; |
| |
| return (uint16_t)strtoul(data, NULL, 10); |
| } |
| |
| static void |
| open_trace_pipe(uint32_t cpu_index, int *dst_fd) |
| { |
| #ifdef _WIN32 |
| *dst_fd = -1; |
| #else |
| char filename[2048]; |
| snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index); |
| /* I/O to the pipe needs to be non-blocking, otherwise reading all available |
| * data would block indefinitely by waiting for more data to be written to the pipe */ |
| *dst_fd = open(filename, O_RDONLY | O_NONBLOCK); |
| #endif |
| } |
| |
| /* |
| * Kernel trace buffer parsing |
| */ |
| |
| struct trace_page_header { |
| uint64_t timestamp; |
| int32_t commit; |
| }; |
| |
| enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP }; |
| |
| struct trace_event_header { |
| uint32_t type_len : 5; |
| uint32_t time_delta : 27; |
| /* Only present if length is too big for type_len */ |
| uint32_t excess_length; |
| }; |
| |
| struct trace_event_common { |
| unsigned short type; |
| unsigned char flags; |
| unsigned char preempt_count; |
| int pid; |
| }; |
| |
| struct trace_event_amdgpu_vm_update_ptes { |
| struct trace_event_common common; |
| uint64_t start; |
| uint64_t end; |
| uint64_t flags; |
| unsigned int num_ptes; |
| uint64_t incr; |
| int pid; |
| uint64_t vm_ctx; |
| }; |
| |
| /* Represents a dynamic array of addresses in the ftrace buffer. */ |
| struct trace_event_address_array { |
| uint16_t data_size; |
| uint16_t reserved; |
| char data[]; |
| }; |
| |
| /* Possible flags for PTEs, taken from amdgpu_vm.h */ |
| #define AMDGPU_PTE_VALID (1ULL << 0) |
| #define AMDGPU_PTE_SYSTEM (1ULL << 1) |
| #define AMDGPU_PTE_PRT (1ULL << 51) |
| |
| /* The minimum size of a GPU page */ |
| #define MIN_GPU_PAGE_SIZE 4096 |
| |
| static void |
| emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp, |
| struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index) |
| { |
| struct vk_rmv_token token; |
| |
| uint64_t end_addr; |
| /* There may be more updated PTEs than the ones reported in the ftrace buffer. |
| * We choose the reported end virtual address here to report the correct total committed memory. */ |
| if (pte_index == event->num_ptes - 1) |
| end_addr = event->end; |
| else |
| end_addr = event->start + (pte_index + 1) * (event->incr / MIN_GPU_PAGE_SIZE); |
| uint64_t start_addr = event->start + pte_index * (event->incr / MIN_GPU_PAGE_SIZE); |
| |
| token.type = VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE; |
| token.timestamp = timestamp; |
| token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE; |
| token.data.page_table_update.page_size = event->incr; |
| token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr; |
| token.data.page_table_update.pid = event->common.pid; |
| token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr; |
| /* RMV expects mappings to system memory to have a physical address of 0. |
| * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to |
| * be marked as "committed to system memory". */ |
| token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index]; |
| |
| token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)); |
| util_dynarray_append(&data->tokens, struct vk_rmv_token, token); |
| } |
| |
| static void |
| evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens, |
| struct trace_event_amdgpu_vm_update_ptes *event) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| |
| if (event->common.pid != getpid() && event->pid != getpid()) { |
| return; |
| } |
| |
| struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1); |
| |
| for (uint32_t i = 0; i < event->num_ptes; ++i) |
| emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event, |
| (uint64_t *)array->data, i); |
| } |
| |
| static void |
| append_trace_events(struct radv_device *device, int pipe_fd) |
| { |
| /* Assuming 4KB if os_get_page_size fails. */ |
| uint64_t page_size = 4096; |
| os_get_page_size(&page_size); |
| |
| uint64_t timestamp; |
| |
| /* |
| * Parse the trace ring buffer page by page. |
| */ |
| char *page = (char *)malloc(page_size); |
| if (!page) { |
| return; |
| } |
| int64_t read_bytes; |
| do { |
| read_bytes = (int64_t)read(pipe_fd, page, page_size); |
| if (read_bytes < (int64_t)sizeof(struct trace_page_header)) |
| break; |
| |
| struct trace_page_header *page_header = (struct trace_page_header *)page; |
| timestamp = page_header->timestamp; |
| |
| size_t data_size = MIN2((size_t)read_bytes, (size_t)page_header->commit); |
| |
| char *read_ptr = page + sizeof(struct trace_page_header); |
| while (read_ptr - page < data_size) { |
| struct trace_event_header *event_header = (struct trace_event_header *)read_ptr; |
| read_ptr += sizeof(struct trace_event_header); |
| |
| /* Handle special event type, see include/linux/ring_buffer.h in the |
| * kernel source */ |
| switch (event_header->type_len) { |
| case TRACE_EVENT_TYPE_PADDING: |
| if (event_header->time_delta) { |
| /* Specified size, skip past padding */ |
| read_ptr += event_header->excess_length; |
| timestamp += event_header->time_delta; |
| continue; |
| } else { |
| /* Padding is until end of page, skip until next page */ |
| read_ptr = page + data_size; |
| continue; |
| } |
| case TRACE_EVENT_TYPE_EXTENDED_DELTA: |
| timestamp += event_header->time_delta; |
| timestamp += (uint64_t)event_header->excess_length << 27ULL; |
| continue; |
| case TRACE_EVENT_TYPE_TIMESTAMP: |
| timestamp = event_header->time_delta; |
| timestamp |= (uint64_t)event_header->excess_length << 27ULL; |
| continue; |
| default: |
| break; |
| } |
| |
| timestamp += event_header->time_delta; |
| |
| /* If type_len is not one of the special types and not zero, it is |
| * the data length / 4. */ |
| size_t length; |
| struct trace_event_common *event; |
| if (event_header->type_len) { |
| length = event_header->type_len * 4 + 4; |
| /* The length variable already contains event data in this case. |
| */ |
| event = (struct trace_event_common *)&event_header->excess_length; |
| } else { |
| length = event_header->excess_length + 4; |
| event = (struct trace_event_common *)read_ptr; |
| } |
| |
| if (event->type == device->memory_trace.ftrace_update_ptes_id) |
| evaluate_trace_event(device, timestamp, &device->vk.memory_trace_data.tokens, |
| (struct trace_event_amdgpu_vm_update_ptes *)event); |
| |
| read_ptr += length - sizeof(struct trace_event_header); |
| } |
| } while (true); |
| |
| free(page); |
| } |
| |
| static void |
| close_pipe_fds(struct radv_device *device) |
| { |
| for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) { |
| close(device->memory_trace.pipe_fds[i]); |
| } |
| } |
| |
| void |
| radv_memory_trace_init(struct radv_device *device) |
| { |
| #ifndef _WIN32 |
| DIR *dir = opendir(RADV_FTRACE_INSTANCE_PATH); |
| if (!dir) { |
| fprintf(stderr, |
| "radv: Couldn't initialize memory tracing: " |
| "Can't access the tracing instance directory (%s)\n", |
| strerror(errno)); |
| goto error; |
| } |
| closedir(dir); |
| |
| device->memory_trace.num_cpus = 0; |
| |
| char cpuinfo_line[1024]; |
| FILE *cpuinfo_file = fopen("/proc/cpuinfo", "r"); |
| uint32_t num_physical_cores; |
| while (fgets(cpuinfo_line, sizeof(cpuinfo_line), cpuinfo_file)) { |
| char *logical_core_string = strstr(cpuinfo_line, "siblings"); |
| if (logical_core_string) |
| sscanf(logical_core_string, "siblings : %d", &device->memory_trace.num_cpus); |
| char *physical_core_string = strstr(cpuinfo_line, "cpu cores"); |
| if (physical_core_string) |
| sscanf(physical_core_string, "cpu cores : %d", &num_physical_cores); |
| } |
| if (!device->memory_trace.num_cpus) |
| device->memory_trace.num_cpus = num_physical_cores; |
| fclose(cpuinfo_file); |
| |
| FILE *clock_file = fopen(RADV_FTRACE_INSTANCE_PATH "/trace_clock", "w"); |
| if (!clock_file) { |
| fprintf(stderr, |
| "radv: Couldn't initialize memory tracing: " |
| "Can't access the tracing control files (%s).\n", |
| strerror(errno)); |
| goto error; |
| } |
| |
| fprintf(clock_file, "mono"); |
| fclose(clock_file); |
| |
| device->memory_trace.pipe_fds = malloc(device->memory_trace.num_cpus * sizeof(int)); |
| |
| if (!device->memory_trace.pipe_fds) { |
| device->memory_trace.num_cpus = 0; |
| } |
| for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) { |
| open_trace_pipe(i, device->memory_trace.pipe_fds + i); |
| |
| if (device->memory_trace.pipe_fds[i] == -1) { |
| fprintf(stderr, |
| "radv: Couldn't initialize memory tracing: " |
| "Can't access the trace buffer pipes (%s).\n", |
| strerror(errno)); |
| for (i -= 1; i < device->memory_trace.num_cpus; --i) { |
| close(device->memory_trace.pipe_fds[i]); |
| } |
| goto error; |
| } |
| } |
| |
| device->memory_trace.ftrace_update_ptes_id = trace_event_id("amdgpu_vm_update_ptes"); |
| if (device->memory_trace.ftrace_update_ptes_id == (uint16_t)~0U) { |
| fprintf(stderr, |
| "radv: Couldn't initialize memory tracing: " |
| "Can't access the trace event ID file (%s).\n", |
| strerror(errno)); |
| goto error_pipes; |
| } |
| |
| if (!set_event_tracing_enabled("amdgpu_vm_update_ptes", true)) { |
| fprintf(stderr, |
| "radv: Couldn't initialize memory tracing: " |
| "Can't enable trace events (%s).\n", |
| strerror(errno)); |
| goto error_pipes; |
| } |
| |
| fprintf(stderr, "radv: Enabled Memory Trace.\n"); |
| return; |
| |
| error_pipes: |
| close_pipe_fds(device); |
| error: |
| vk_memory_trace_finish(&device->vk); |
| #endif |
| } |
| |
| static void |
| fill_memory_info(const struct radeon_info *gpu_info, struct vk_rmv_memory_info *out_info, int32_t index) |
| { |
| switch (index) { |
| case VK_RMV_MEMORY_LOCATION_DEVICE: |
| out_info->physical_base_address = 0; |
| out_info->size = gpu_info->all_vram_visible ? (uint64_t)gpu_info->vram_size_kb * 1024ULL |
| : (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL; |
| break; |
| case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE: |
| out_info->physical_base_address = (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL; |
| out_info->size = gpu_info->all_vram_visible ? 0 : (uint64_t)gpu_info->vram_size_kb * 1024ULL; |
| break; |
| case VK_RMV_MEMORY_LOCATION_HOST: { |
| uint64_t ram_size = -1U; |
| os_get_total_physical_memory(&ram_size); |
| out_info->physical_base_address = 0; |
| out_info->size = MIN2((uint64_t)gpu_info->gart_size_kb * 1024ULL, ram_size); |
| } break; |
| default: |
| unreachable("invalid memory index"); |
| } |
| } |
| |
| static enum vk_rmv_memory_type |
| memory_type_from_vram_type(uint32_t vram_type) |
| { |
| switch (vram_type) { |
| case AMD_VRAM_TYPE_UNKNOWN: |
| return VK_RMV_MEMORY_TYPE_UNKNOWN; |
| case AMD_VRAM_TYPE_DDR2: |
| return VK_RMV_MEMORY_TYPE_DDR2; |
| case AMD_VRAM_TYPE_DDR3: |
| return VK_RMV_MEMORY_TYPE_DDR3; |
| case AMD_VRAM_TYPE_DDR4: |
| return VK_RMV_MEMORY_TYPE_DDR4; |
| case AMD_VRAM_TYPE_GDDR5: |
| return VK_RMV_MEMORY_TYPE_GDDR5; |
| case AMD_VRAM_TYPE_HBM: |
| return VK_RMV_MEMORY_TYPE_HBM; |
| case AMD_VRAM_TYPE_GDDR6: |
| return VK_RMV_MEMORY_TYPE_GDDR6; |
| case AMD_VRAM_TYPE_DDR5: |
| return VK_RMV_MEMORY_TYPE_DDR5; |
| case AMD_VRAM_TYPE_LPDDR4: |
| return VK_RMV_MEMORY_TYPE_LPDDR4; |
| case AMD_VRAM_TYPE_LPDDR5: |
| return VK_RMV_MEMORY_TYPE_LPDDR5; |
| default: |
| unreachable("Invalid vram type"); |
| } |
| } |
| |
| void |
| radv_rmv_fill_device_info(const struct radv_physical_device *pdev, struct vk_rmv_device_info *info) |
| { |
| const struct radeon_info *gpu_info = &pdev->info; |
| |
| for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) { |
| fill_memory_info(gpu_info, &info->memory_infos[i], i); |
| } |
| |
| if (gpu_info->marketing_name) |
| strncpy(info->device_name, gpu_info->marketing_name, sizeof(info->device_name) - 1); |
| info->pcie_family_id = gpu_info->family_id; |
| info->pcie_revision_id = gpu_info->pci_rev_id; |
| info->pcie_device_id = gpu_info->pci.dev; |
| info->minimum_shader_clock = 0; |
| info->maximum_shader_clock = gpu_info->max_gpu_freq_mhz; |
| info->vram_type = memory_type_from_vram_type(gpu_info->vram_type); |
| info->vram_bus_width = gpu_info->memory_bus_width; |
| info->vram_operations_per_clock = ac_memory_ops_per_clock(gpu_info->vram_type); |
| info->minimum_memory_clock = 0; |
| info->maximum_memory_clock = gpu_info->memory_freq_mhz; |
| info->vram_bandwidth = gpu_info->memory_bandwidth_gbps; |
| } |
| |
| void |
| radv_rmv_collect_trace_events(struct radv_device *device) |
| { |
| for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) { |
| append_trace_events(device, device->memory_trace.pipe_fds[i]); |
| } |
| } |
| |
| void |
| radv_memory_trace_finish(struct radv_device *device) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| set_event_tracing_enabled("amdgpu_vm_update_ptes", false); |
| close_pipe_fds(device); |
| } |
| |
| /* The token lock must be held when entering _locked functions */ |
| static void |
| log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset, |
| uint64_t size) |
| { |
| struct vk_rmv_resource_bind_token token = {0}; |
| token.address = bo->va + offset; |
| token.is_system_memory = bo->initial_domain & RADEON_DOMAIN_GTT; |
| token.size = size; |
| token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, resource); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token); |
| } |
| |
| void |
| radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal, |
| VkMemoryAllocateFlags alloc_flags) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_device_memory, memory, heap); |
| |
| /* Do not log zero-sized device memory objects. */ |
| if (!memory->alloc_size) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| |
| struct vk_rmv_resource_create_token token = {0}; |
| token.is_driver_internal = is_internal; |
| token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap); |
| token.type = VK_RMV_RESOURCE_TYPE_HEAP; |
| token.heap.alignment = pdev->info.max_alignment; |
| token.heap.size = memory->alloc_size; |
| token.heap.heap_index = memory->heap_index; |
| token.heap.alloc_flags = alloc_flags; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); |
| log_resource_bind_locked(device, (uint64_t)heap, memory->bo, 0, memory->alloc_size); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal) |
| { |
| const struct radv_physical_device *pdev = radv_device_physical(device); |
| |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| /* RMV doesn't seem to support GDS/OA domains. */ |
| if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT)) |
| return; |
| |
| struct vk_rmv_virtual_allocate_token token = {0}; |
| token.address = bo->va; |
| /* If all VRAM is visible, no bo will be in invisible memory. */ |
| token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible; |
| token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain; |
| token.is_driver_internal = is_internal; |
| token.page_count = DIV_ROUND_UP(bo->size, 4096); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, &token); |
| radv_rmv_collect_trace_events(device); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| /* RMV doesn't seem to support GDS/OA domains. */ |
| if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT)) |
| return; |
| |
| struct vk_rmv_virtual_free_token token = {0}; |
| token.address = bo->va; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &token); |
| radv_rmv_collect_trace_events(device); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_buffer, buffer, _buffer); |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| log_resource_bind_locked(device, (uint64_t)_buffer, buffer->bo, buffer->offset, buffer->vk.size); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal, |
| VkImage _image) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_image, image, _image); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token token = {0}; |
| token.is_driver_internal = is_internal; |
| token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_image); |
| token.type = VK_RMV_RESOURCE_TYPE_IMAGE; |
| token.image.create_flags = create_info->flags; |
| token.image.usage_flags = create_info->usage; |
| token.image.type = create_info->imageType; |
| token.image.extent = create_info->extent; |
| token.image.format = create_info->format; |
| token.image.num_mips = create_info->mipLevels; |
| token.image.num_slices = create_info->arrayLayers; |
| token.image.tiling = create_info->tiling; |
| token.image.alignment_log2 = util_logbase2(image->alignment); |
| token.image.log2_samples = util_logbase2(image->vk.samples); |
| token.image.log2_storage_samples = util_logbase2(image->vk.samples); |
| token.image.metadata_alignment_log2 = image->planes[0].surface.meta_alignment_log2; |
| token.image.image_alignment_log2 = image->planes[0].surface.alignment_log2; |
| token.image.size = image->size; |
| token.image.metadata_size = image->planes[0].surface.meta_size; |
| token.image.metadata_header_size = 0; |
| token.image.metadata_offset = image->planes[0].surface.meta_offset; |
| token.image.metadata_header_offset = image->planes[0].surface.meta_offset; |
| token.image.presentable = image->planes[0].surface.is_displayable; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_image_bind(struct radv_device *device, uint32_t bind_idx, VkImage _image) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_image, image, _image); |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| log_resource_bind_locked(device, (uint64_t)_image, image->bindings[bind_idx].bo, image->bindings[bind_idx].offset, |
| image->bindings[bind_idx].range); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_query_pool, pool, _pool); |
| |
| if (pool->vk.query_type != VK_QUERY_TYPE_OCCLUSION && pool->vk.query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS && |
| pool->vk.query_type != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool); |
| create_token.type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP; |
| create_token.query_pool.type = pool->vk.query_type; |
| create_token.query_pool.has_cpu_access = true; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size, |
| uint32_t data_size, uint32_t scratch_size) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| uint64_t upload_resource_identifier = (uint64_t)(uintptr_t)bo; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = true; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier); |
| create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR; |
| create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws); |
| create_token.command_buffer.executable_size = executable_size; |
| create_token.command_buffer.app_available_executable_size = executable_size; |
| create_token.command_buffer.embedded_data_size = data_size; |
| create_token.command_buffer.app_available_embedded_data_size = data_size; |
| create_token.command_buffer.scratch_size = scratch_size; |
| create_token.command_buffer.app_available_scratch_size = scratch_size; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| log_resource_bind_locked(device, upload_resource_identifier, bo, 0, bo->size); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_log_cpu_map(&device->vk, bo->va, false); |
| } |
| |
| void |
| radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_destroy_token destroy_token = {0}; |
| destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token); |
| vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_log_cpu_map(&device->vk, bo->va, true); |
| } |
| |
| void |
| radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| uint32_t resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); |
| |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = true; |
| create_token.resource_id = resource_id; |
| create_token.type = VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE; |
| /* |
| * We have 4096 entries, but the corresponding RMV token only has 8 bits. |
| */ |
| create_token.border_color_palette.num_entries = 255; /* = RADV_BORDER_COLOR_COUNT; */ |
| |
| struct vk_rmv_resource_bind_token bind_token; |
| bind_token.address = bo->va; |
| bind_token.is_system_memory = false; |
| bind_token.resource_id = resource_id; |
| bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_log_cpu_map(&device->vk, bo->va, false); |
| } |
| |
| void |
| radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_destroy_token token = {0}; |
| /* same resource id as the create token */ |
| token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_log_cpu_map(&device->vk, bo->va, true); |
| } |
| |
| void |
| radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| struct vk_rmv_resource_reference_token token = {0}; |
| token.virtual_address = src_bo->va + offset; |
| token.residency_removed = false; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token); |
| radv_rmv_collect_trace_events(device); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| struct vk_rmv_resource_reference_token token = {0}; |
| token.virtual_address = src_bo->va + offset; |
| token.residency_removed = true; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token); |
| radv_rmv_collect_trace_events(device); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info, |
| VkDescriptorPool _pool) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_descriptor_pool, pool, _pool); |
| |
| if (pool->bo) |
| vk_rmv_log_cpu_map(&device->vk, pool->bo->va, false); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = false; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool); |
| create_token.type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL; |
| create_token.descriptor_pool.max_sets = create_info->maxSets; |
| create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount; |
| /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */ |
| create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize)); |
| if (!create_token.descriptor_pool.pool_sizes) |
| return; |
| |
| memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes, |
| create_info->poolSizeCount * sizeof(VkDescriptorPoolSize)); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| |
| if (pool->bo) { |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_bind_token bind_token; |
| bind_token.address = pool->bo->va; |
| bind_token.is_system_memory = false; |
| bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool); |
| bind_token.size = pool->size; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| } |
| |
| void |
| radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VkPipeline _pipeline = radv_pipeline_to_handle(pipeline); |
| struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = is_internal; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline); |
| create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE; |
| create_token.pipeline.is_internal = is_internal; |
| create_token.pipeline.hash_lo = pipeline->pipeline_hash; |
| create_token.pipeline.is_ngg = graphics_pipeline->is_ngg; |
| create_token.pipeline.shader_stages = graphics_pipeline->active_stages; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { |
| struct radv_shader *shader = pipeline->shaders[s]; |
| |
| if (!shader) |
| continue; |
| |
| log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size); |
| } |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VkPipeline _pipeline = radv_pipeline_to_handle(pipeline); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = is_internal; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline); |
| create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE; |
| create_token.pipeline.is_internal = is_internal; |
| create_token.pipeline.hash_lo = pipeline->pipeline_hash; |
| create_token.pipeline.is_ngg = false; |
| create_token.pipeline.shader_stages = VK_SHADER_STAGE_COMPUTE_BIT; |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; |
| log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VkPipeline _pipeline = radv_pipeline_to_handle(&pipeline->base.base); |
| |
| struct radv_shader *prolog = pipeline->prolog; |
| struct radv_shader *traversal = pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]; |
| |
| VkShaderStageFlagBits active_stages = traversal ? VK_SHADER_STAGE_INTERSECTION_BIT_KHR : 0; |
| if (prolog) |
| active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; |
| |
| for (uint32_t i = 0; i < pipeline->stage_count; i++) { |
| if (pipeline->stages[i].shader) |
| active_stages |= mesa_to_vk_shader_stage(pipeline->stages[i].stage); |
| } |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline); |
| create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE; |
| create_token.pipeline.hash_lo = pipeline->base.base.pipeline_hash; |
| create_token.pipeline.shader_stages = active_stages; |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| |
| if (prolog) |
| log_resource_bind_locked(device, (uint64_t)_pipeline, prolog->bo, prolog->alloc->offset, prolog->alloc->size); |
| |
| if (traversal) |
| log_resource_bind_locked(device, (uint64_t)_pipeline, traversal->bo, traversal->alloc->offset, |
| traversal->alloc->size); |
| |
| for (uint32_t i = 0; i < pipeline->non_imported_stage_count; i++) { |
| struct radv_shader *shader = pipeline->stages[i].shader; |
| if (shader) |
| log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size); |
| } |
| |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |
| |
| void |
| radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| VK_FROM_HANDLE(radv_event, event, _event); |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_create_token create_token = {0}; |
| create_token.is_driver_internal = is_internal; |
| create_token.type = VK_RMV_RESOURCE_TYPE_GPU_EVENT; |
| create_token.event.flags = flags; |
| create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token); |
| log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| |
| if (event->map) |
| vk_rmv_log_cpu_map(&device->vk, event->bo->va, false); |
| } |
| |
| void |
| radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type) |
| { |
| if (!device->vk.memory_trace_data.is_enabled) |
| return; |
| |
| switch (type) { |
| case AMD_IP_GFX: |
| vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS); |
| break; |
| case AMD_IP_COMPUTE: |
| vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE); |
| break; |
| case AMD_IP_SDMA: |
| vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY); |
| break; |
| default: |
| unreachable("invalid ip type"); |
| } |
| } |
| |
| void |
| radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle) |
| { |
| if (!device->vk.memory_trace_data.is_enabled || handle == 0) |
| return; |
| |
| simple_mtx_lock(&device->vk.memory_trace_data.token_mtx); |
| struct vk_rmv_resource_destroy_token token = {0}; |
| token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, handle); |
| |
| vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token); |
| vk_rmv_destroy_resource_id_locked(&device->vk, handle); |
| simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx); |
| } |