| /* |
| * Copyright © 2020 Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "brw_kernel.h" |
| #include "brw_nir.h" |
| #include "elk/elk_nir_options.h" |
| #include "intel_nir.h" |
| |
| #include "intel_nir.h" |
| #include "nir_clc_helpers.h" |
| #include "compiler/nir/nir_builder.h" |
| #include "compiler/spirv/nir_spirv.h" |
| #include "compiler/spirv/spirv_info.h" |
| #include "dev/intel_debug.h" |
| #include "util/u_atomic.h" |
| #include "util/u_dynarray.h" |
| |
| static const nir_shader * |
| load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache, |
| const nir_shader_compiler_options *nir_options, |
| const struct spirv_to_nir_options *spirv_options) |
| { |
| if (compiler->clc_shader) |
| return compiler->clc_shader; |
| |
| nir_shader *nir = nir_load_libclc_shader(64, disk_cache, |
| spirv_options, nir_options, |
| disk_cache != NULL); |
| if (nir == NULL) |
| return NULL; |
| |
| const nir_shader *old_nir = |
| p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir); |
| if (old_nir == NULL) { |
| /* We won the race */ |
| ralloc_steal(compiler, nir); |
| return nir; |
| } else { |
| /* Someone else built the shader first */ |
| ralloc_free(nir); |
| return old_nir; |
| } |
| } |
| |
| static nir_builder |
| builder_init_new_impl(nir_function *func) |
| { |
| nir_function_impl *impl = nir_function_impl_create(func); |
| return nir_builder_at(nir_before_impl(impl)); |
| } |
| |
| static void |
| implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op, |
| enum glsl_base_type data_base_type, |
| nir_variable_mode mode) |
| { |
| nir_builder b = builder_init_new_impl(func); |
| const struct glsl_type *data_type = glsl_scalar_type(data_base_type); |
| |
| unsigned p = 0; |
| |
| nir_deref_instr *ret = NULL; |
| ret = nir_build_deref_cast(&b, nir_load_param(&b, p++), |
| nir_var_function_temp, data_type, 0); |
| |
| nir_intrinsic_op op = nir_intrinsic_deref_atomic; |
| nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op); |
| nir_intrinsic_set_atomic_op(atomic, atomic_op); |
| |
| for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) { |
| nir_def *src = nir_load_param(&b, p++); |
| if (i == 0) { |
| /* The first source is our deref */ |
| assert(nir_intrinsic_infos[op].src_components[i] == -1); |
| src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def; |
| } |
| atomic->src[i] = nir_src_for_ssa(src); |
| } |
| |
| nir_def_init_for_type(&atomic->instr, &atomic->def, data_type); |
| |
| nir_builder_instr_insert(&b, &atomic->instr); |
| nir_store_deref(&b, ret, &atomic->def, ~0); |
| } |
| |
| static void |
| implement_sub_group_ballot_builtin(nir_function *func) |
| { |
| nir_builder b = builder_init_new_impl(func); |
| nir_deref_instr *ret = |
| nir_build_deref_cast(&b, nir_load_param(&b, 0), |
| nir_var_function_temp, glsl_uint_type(), 0); |
| nir_def *cond = nir_load_param(&b, 1); |
| |
| nir_intrinsic_instr *ballot = |
| nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot); |
| ballot->src[0] = nir_src_for_ssa(cond); |
| ballot->num_components = 1; |
| nir_def_init(&ballot->instr, &ballot->def, 1, 32); |
| nir_builder_instr_insert(&b, &ballot->instr); |
| |
| nir_store_deref(&b, ret, &ballot->def, ~0); |
| } |
| |
| static bool |
| implement_intel_builtins(nir_shader *nir) |
| { |
| bool progress = false; |
| |
| nir_foreach_function(func, nir) { |
| if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) { |
| /* float atom_min(__global float volatile *p, float val) */ |
| implement_atomic_builtin(func, nir_atomic_op_fmin, |
| GLSL_TYPE_FLOAT, nir_var_mem_global); |
| progress = true; |
| } else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) { |
| /* float atom_max(__global float volatile *p, float val) */ |
| implement_atomic_builtin(func, nir_atomic_op_fmax, |
| GLSL_TYPE_FLOAT, nir_var_mem_global); |
| progress = true; |
| } else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) { |
| /* float atomic_min(__shared float volatile *, float) */ |
| implement_atomic_builtin(func, nir_atomic_op_fmin, |
| GLSL_TYPE_FLOAT, nir_var_mem_shared); |
| progress = true; |
| } else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) { |
| /* float atomic_max(__shared float volatile *, float) */ |
| implement_atomic_builtin(func, nir_atomic_op_fmax, |
| GLSL_TYPE_FLOAT, nir_var_mem_shared); |
| progress = true; |
| } else if (strcmp(func->name, "intel_sub_group_ballot") == 0) { |
| implement_sub_group_ballot_builtin(func); |
| progress = true; |
| } |
| } |
| |
| nir_shader_preserve_all_metadata(nir); |
| |
| return progress; |
| } |
| |
| static bool |
| lower_kernel_intrinsics(nir_shader *nir) |
| { |
| nir_function_impl *impl = nir_shader_get_entrypoint(nir); |
| |
| bool progress = false; |
| |
| unsigned kernel_sysvals_start = 0; |
| unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals); |
| nir->num_uniforms += kernel_arg_start; |
| |
| nir_builder b = nir_builder_create(impl); |
| |
| nir_foreach_block(block, impl) { |
| nir_foreach_instr_safe(instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| switch (intrin->intrinsic) { |
| case nir_intrinsic_load_kernel_input: { |
| b.cursor = nir_instr_remove(&intrin->instr); |
| |
| nir_intrinsic_instr *load = |
| nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); |
| load->num_components = intrin->num_components; |
| load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa)); |
| nir_intrinsic_set_base(load, kernel_arg_start); |
| nir_intrinsic_set_range(load, nir->num_uniforms); |
| nir_def_init(&load->instr, &load->def, |
| intrin->def.num_components, |
| intrin->def.bit_size); |
| nir_builder_instr_insert(&b, &load->instr); |
| |
| nir_def_rewrite_uses(&intrin->def, &load->def); |
| progress = true; |
| break; |
| } |
| |
| case nir_intrinsic_load_constant_base_ptr: { |
| b.cursor = nir_instr_remove(&intrin->instr); |
| nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b, |
| nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW), |
| nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH)); |
| nir_def_rewrite_uses(&intrin->def, const_data_base_addr); |
| progress = true; |
| break; |
| } |
| |
| case nir_intrinsic_load_num_workgroups: { |
| b.cursor = nir_instr_remove(&intrin->instr); |
| |
| nir_intrinsic_instr *load = |
| nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); |
| load->num_components = 3; |
| load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); |
| nir_intrinsic_set_base(load, kernel_sysvals_start + |
| offsetof(struct brw_kernel_sysvals, num_work_groups)); |
| nir_intrinsic_set_range(load, 3 * 4); |
| nir_def_init(&load->instr, &load->def, 3, 32); |
| nir_builder_instr_insert(&b, &load->instr); |
| nir_def_rewrite_uses(&intrin->def, &load->def); |
| progress = true; |
| break; |
| } |
| |
| default: |
| break; |
| } |
| } |
| } |
| |
| if (progress) { |
| nir_metadata_preserve(impl, nir_metadata_control_flow); |
| } else { |
| nir_metadata_preserve(impl, nir_metadata_all); |
| } |
| |
| return progress; |
| } |
| |
| static const struct spirv_capabilities spirv_caps = { |
| .Addresses = true, |
| .Float16 = true, |
| .Float64 = true, |
| .Groups = true, |
| .StorageImageWriteWithoutFormat = true, |
| .Int8 = true, |
| .Int16 = true, |
| .Int64 = true, |
| .Int64Atomics = true, |
| .Kernel = true, |
| .Linkage = true, /* We receive linked kernel from clc */ |
| .DenormFlushToZero = true, |
| .DenormPreserve = true, |
| .SignedZeroInfNanPreserve = true, |
| .RoundingModeRTE = true, |
| .RoundingModeRTZ = true, |
| .GenericPointer = true, |
| .GroupNonUniform = true, |
| .GroupNonUniformArithmetic = true, |
| .GroupNonUniformClustered = true, |
| .GroupNonUniformBallot = true, |
| .GroupNonUniformQuad = true, |
| .GroupNonUniformShuffle = true, |
| .GroupNonUniformVote = true, |
| .SubgroupDispatch = true, |
| |
| .SubgroupShuffleINTEL = true, |
| .SubgroupBufferBlockIOINTEL = true, |
| }; |
| |
| bool |
| brw_kernel_from_spirv(struct brw_compiler *compiler, |
| struct disk_cache *disk_cache, |
| struct brw_kernel *kernel, |
| void *log_data, void *mem_ctx, |
| const uint32_t *spirv, size_t spirv_size, |
| const char *entrypoint_name, |
| char **error_str) |
| { |
| const struct intel_device_info *devinfo = compiler->devinfo; |
| const nir_shader_compiler_options *nir_options = |
| compiler->nir_options[MESA_SHADER_KERNEL]; |
| |
| struct spirv_to_nir_options spirv_options = { |
| .environment = NIR_SPIRV_OPENCL, |
| .capabilities = &spirv_caps, |
| .printf = true, |
| .shared_addr_format = nir_address_format_62bit_generic, |
| .global_addr_format = nir_address_format_62bit_generic, |
| .temp_addr_format = nir_address_format_62bit_generic, |
| .constant_addr_format = nir_address_format_64bit_global, |
| }; |
| |
| spirv_options.clc_shader = load_clc_shader(compiler, disk_cache, |
| nir_options, &spirv_options); |
| if (spirv_options.clc_shader == NULL) { |
| fprintf(stderr, "ERROR: libclc shader missing." |
| " Consider installing the libclc package\n"); |
| abort(); |
| } |
| |
| assert(spirv_size % 4 == 0); |
| nir_shader *nir = |
| spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, |
| entrypoint_name, &spirv_options, nir_options); |
| nir_validate_shader(nir, "after spirv_to_nir"); |
| nir_validate_ssa_dominance(nir, "after spirv_to_nir"); |
| ralloc_steal(mem_ctx, nir); |
| nir->info.name = ralloc_strdup(nir, entrypoint_name); |
| |
| if (INTEL_DEBUG(DEBUG_CS)) { |
| /* Re-index SSA defs so we print more sensible numbers. */ |
| nir_foreach_function_impl(impl, nir) { |
| nir_index_ssa_defs(impl); |
| } |
| |
| fprintf(stderr, "NIR (from SPIR-V) for kernel\n"); |
| nir_print_shader(nir, stderr); |
| } |
| |
| nir_lower_printf_options printf_opts = { |
| .ptr_bit_size = 64, |
| .max_buffer_size = 1024 * 1024, |
| .use_printf_base_identifier = true, |
| }; |
| NIR_PASS_V(nir, nir_lower_printf, &printf_opts); |
| |
| NIR_PASS_V(nir, implement_intel_builtins); |
| NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader); |
| |
| /* We have to lower away local constant initializers right before we |
| * inline functions. That way they get properly initialized at the top |
| * of the function and not at the top of its caller. |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); |
| NIR_PASS_V(nir, nir_lower_returns); |
| NIR_PASS_V(nir, nir_inline_functions); |
| NIR_PASS_V(nir, nir_copy_prop); |
| NIR_PASS_V(nir, nir_opt_deref); |
| |
| /* Pick off the single entrypoint that we want */ |
| nir_remove_non_entrypoints(nir); |
| |
| /* Now that we've deleted all but the main function, we can go ahead and |
| * lower the rest of the constant initializers. We do this here so that |
| * nir_remove_dead_variables and split_per_member_structs below see the |
| * corresponding stores. |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, ~0); |
| |
| /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B |
| * aligned and so it can just read/write them as vec4s. This results in a |
| * LOT of vec4->vec3 casts on loads and stores. One solution to this |
| * problem is to get rid of all vec3 variables. |
| */ |
| NIR_PASS_V(nir, nir_lower_vec3_to_vec4, |
| nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global| |
| nir_var_mem_constant); |
| |
| /* We assign explicit types early so that the optimizer can take advantage |
| * of that information and hopefully get rid of some of our memcpys. |
| */ |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, |
| nir_var_uniform | |
| nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global, |
| glsl_get_cl_type_size_align); |
| |
| struct brw_nir_compiler_opts opts = {}; |
| brw_preprocess_nir(compiler, nir, &opts); |
| |
| int max_arg_idx = -1; |
| nir_foreach_uniform_variable(var, nir) { |
| assert(var->data.location < 256); |
| max_arg_idx = MAX2(max_arg_idx, var->data.location); |
| } |
| |
| kernel->args_size = nir->num_uniforms; |
| kernel->arg_count = max_arg_idx + 1; |
| |
| /* No bindings */ |
| struct brw_kernel_arg_desc *args = |
| rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count); |
| kernel->args = args; |
| |
| nir_foreach_uniform_variable(var, nir) { |
| struct brw_kernel_arg_desc arg_desc = { |
| .offset = var->data.driver_location, |
| .size = glsl_get_explicit_size(var->type, false), |
| }; |
| assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms); |
| |
| assert(var->data.location >= 0); |
| args[var->data.location] = arg_desc; |
| } |
| |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL); |
| |
| /* Lower again, this time after dead-variables to get more compact variable |
| * layouts. |
| */ |
| nir->global_mem_size = 0; |
| nir->scratch_size = 0; |
| nir->info.shared_size = 0; |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, |
| nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant, |
| glsl_get_cl_type_size_align); |
| if (nir->constant_data_size > 0) { |
| assert(nir->constant_data == NULL); |
| nir->constant_data = rzalloc_size(nir, nir->constant_data_size); |
| nir_gather_explicit_io_initializers(nir, nir->constant_data, |
| nir->constant_data_size, |
| nir_var_mem_constant); |
| } |
| |
| if (INTEL_DEBUG(DEBUG_CS)) { |
| /* Re-index SSA defs so we print more sensible numbers. */ |
| nir_foreach_function_impl(impl, nir) { |
| nir_index_ssa_defs(impl); |
| } |
| |
| fprintf(stderr, "NIR (before I/O lowering) for kernel\n"); |
| nir_print_shader(nir, stderr); |
| } |
| |
| NIR_PASS_V(nir, nir_lower_memcpy); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant, |
| nir_address_format_64bit_global); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform, |
| nir_address_format_32bit_offset_as_64bit); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, |
| nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global, |
| nir_address_format_62bit_generic); |
| |
| NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL); |
| |
| NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL); |
| NIR_PASS_V(nir, lower_kernel_intrinsics); |
| |
| struct brw_cs_prog_key key = { }; |
| |
| memset(&kernel->prog_data, 0, sizeof(kernel->prog_data)); |
| kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4); |
| |
| struct brw_compile_cs_params params = { |
| .base = { |
| .nir = nir, |
| .stats = kernel->stats, |
| .log_data = log_data, |
| .mem_ctx = mem_ctx, |
| }, |
| .key = &key, |
| .prog_data = &kernel->prog_data, |
| }; |
| |
| kernel->code = brw_compile_cs(compiler, ¶ms); |
| |
| if (error_str) |
| *error_str = params.base.error_str; |
| |
| return kernel->code != NULL; |
| } |
| |
| static nir_def * |
| rebuild_value_from_store(struct util_dynarray *stores, |
| nir_def *value, unsigned read_offset) |
| { |
| unsigned read_size = value->num_components * value->bit_size / 8; |
| |
| util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) { |
| nir_intrinsic_instr *store = *_store; |
| |
| unsigned write_offset = nir_src_as_uint(store->src[1]); |
| unsigned write_size = nir_src_num_components(store->src[0]) * |
| nir_src_bit_size(store->src[0]) / 8; |
| if (write_offset <= read_offset && |
| (write_offset + write_size) >= (read_offset + read_size)) { |
| assert(nir_block_dominates(store->instr.block, value->parent_instr->block)); |
| assert(write_size == read_size); |
| return store->src[0].ssa; |
| } |
| } |
| unreachable("Matching scratch store not found"); |
| } |
| |
| /** |
| * Remove temporary variables stored to scratch to be then reloaded |
| * immediately. Remap the load to the store SSA value. |
| * |
| * This workaround is only meant to be applied to shaders in src/intel/shaders |
| * were we know there should be no issue. More complex cases might not work |
| * with this approach. |
| */ |
| static bool |
| nir_remove_llvm17_scratch(nir_shader *nir) |
| { |
| struct util_dynarray scratch_stores; |
| void *mem_ctx = ralloc_context(NULL); |
| |
| util_dynarray_init(&scratch_stores, mem_ctx); |
| |
| nir_foreach_function_impl(func, nir) { |
| nir_foreach_block(block, func) { |
| nir_foreach_instr(instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| |
| if (intrin->intrinsic != nir_intrinsic_store_scratch) |
| continue; |
| |
| nir_const_value *offset = nir_src_as_const_value(intrin->src[1]); |
| if (offset != NULL) { |
| util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin); |
| } |
| } |
| } |
| } |
| |
| bool progress = false; |
| if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) { |
| nir_foreach_function_impl(func, nir) { |
| nir_foreach_block(block, func) { |
| nir_foreach_instr_safe(instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| |
| if (intrin->intrinsic != nir_intrinsic_load_scratch) |
| continue; |
| |
| nir_const_value *offset = nir_src_as_const_value(intrin->src[0]); |
| if (offset == NULL) |
| continue; |
| |
| nir_def_replace(&intrin->def, |
| rebuild_value_from_store(&scratch_stores, &intrin->def, nir_src_as_uint(intrin->src[0]))); |
| |
| progress = true; |
| } |
| } |
| } |
| } |
| |
| util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) { |
| nir_intrinsic_instr *store = *_store; |
| nir_instr_remove(&store->instr); |
| } |
| |
| /* Quick sanity check */ |
| assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 || |
| progress); |
| |
| ralloc_free(mem_ctx); |
| |
| return progress; |
| } |
| |
| static void |
| cleanup_llvm17_scratch(nir_shader *nir) |
| { |
| { |
| bool progress; |
| do { |
| progress = false; |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| } while (progress); |
| } |
| |
| nir_remove_llvm17_scratch(nir); |
| |
| { |
| bool progress; |
| do { |
| progress = false; |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| } while (progress); |
| } |
| } |
| |
| nir_shader * |
| brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv, |
| size_t spirv_size, bool llvm17_wa) |
| { |
| struct spirv_to_nir_options spirv_options = { |
| .environment = NIR_SPIRV_OPENCL, |
| .capabilities = &spirv_caps, |
| .printf = true, |
| .shared_addr_format = nir_address_format_62bit_generic, |
| .global_addr_format = nir_address_format_62bit_generic, |
| .temp_addr_format = nir_address_format_62bit_generic, |
| .constant_addr_format = nir_address_format_64bit_global, |
| .create_library = true, |
| }; |
| |
| assert(spirv_size % 4 == 0); |
| |
| assert(gfx_version); |
| const nir_shader_compiler_options *nir_options = |
| gfx_version >= 9 ? &brw_scalar_nir_options |
| : &elk_scalar_nir_options; |
| |
| nir_shader *nir = |
| spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, |
| "library", &spirv_options, nir_options); |
| nir_validate_shader(nir, "after spirv_to_nir"); |
| nir_validate_ssa_dominance(nir, "after spirv_to_nir"); |
| ralloc_steal(mem_ctx, nir); |
| nir->info.name = ralloc_strdup(nir, "library"); |
| |
| if (INTEL_DEBUG(DEBUG_CS)) { |
| /* Re-index SSA defs so we print more sensible numbers. */ |
| nir_foreach_function_impl(impl, nir) { |
| nir_index_ssa_defs(impl); |
| } |
| |
| fprintf(stderr, "NIR (from SPIR-V) for kernel\n"); |
| nir_print_shader(nir, stderr); |
| } |
| |
| nir_lower_printf_options printf_opts = { |
| .ptr_bit_size = 64, |
| .use_printf_base_identifier = true, |
| }; |
| NIR_PASS_V(nir, nir_lower_printf, &printf_opts); |
| |
| NIR_PASS_V(nir, implement_intel_builtins); |
| NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader); |
| |
| /* We have to lower away local constant initializers right before we |
| * inline functions. That way they get properly initialized at the top |
| * of the function and not at the top of its caller. |
| */ |
| NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp | |
| nir_var_function_temp)); |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | |
| nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL); |
| { |
| bool progress; |
| do |
| { |
| progress = false; |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, nir, nir_opt_deref); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_opt_undef); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| } while (progress); |
| } |
| |
| NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); |
| NIR_PASS_V(nir, nir_lower_returns); |
| NIR_PASS_V(nir, nir_inline_functions); |
| |
| assert(nir->scratch_size == 0); |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); |
| |
| { |
| bool progress; |
| do |
| { |
| progress = false; |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, nir, nir_opt_deref); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_opt_undef); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| NIR_PASS(progress, nir, nir_split_var_copies); |
| NIR_PASS(progress, nir, nir_lower_var_copies); |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| NIR_PASS(progress, nir, nir_opt_algebraic); |
| NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); |
| NIR_PASS(progress, nir, nir_opt_dead_cf); |
| NIR_PASS(progress, nir, nir_opt_remove_phis); |
| NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); |
| NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform); |
| NIR_PASS(progress, nir, nir_opt_memcpy); |
| } while (progress); |
| } |
| |
| NIR_PASS_V(nir, nir_scale_fdiv); |
| |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | |
| nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL); |
| |
| |
| NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL); |
| |
| nir->scratch_size = 0; |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, |
| nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | |
| nir_var_mem_global | nir_var_mem_constant, |
| glsl_get_cl_type_size_align); |
| |
| // Lower memcpy - needs to wait until types are sized |
| { |
| bool progress; |
| do { |
| progress = false; |
| NIR_PASS(progress, nir, nir_opt_memcpy); |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, nir, nir_opt_deref); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| NIR_PASS(progress, nir, nir_split_var_copies); |
| NIR_PASS(progress, nir, nir_lower_var_copies); |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| NIR_PASS(progress, nir, nir_opt_cse); |
| } while (progress); |
| } |
| NIR_PASS_V(nir, nir_lower_memcpy); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, |
| nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform, |
| nir_address_format_32bit_offset_as_64bit); |
| |
| NIR_PASS_V(nir, nir_lower_system_values); |
| |
| /* Hopefully we can drop this once lower_vars_to_ssa has improved to not |
| * lower everything to scratch. |
| */ |
| if (llvm17_wa) |
| cleanup_llvm17_scratch(nir); |
| |
| /* Lower again, this time after dead-variables to get more compact variable |
| * layouts. |
| */ |
| nir->global_mem_size = 0; |
| nir->scratch_size = 0; |
| nir->info.shared_size = 0; |
| NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, |
| nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant, |
| glsl_get_cl_type_size_align); |
| if (nir->constant_data_size > 0) { |
| assert(nir->constant_data == NULL); |
| nir->constant_data = rzalloc_size(nir, nir->constant_data_size); |
| nir_gather_explicit_io_initializers(nir, nir->constant_data, |
| nir->constant_data_size, |
| nir_var_mem_constant); |
| } |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant, |
| nir_address_format_64bit_global); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform, |
| nir_address_format_32bit_offset_as_64bit); |
| |
| NIR_PASS_V(nir, nir_lower_explicit_io, |
| nir_var_shader_temp | nir_var_function_temp | |
| nir_var_mem_shared | nir_var_mem_global, |
| nir_address_format_62bit_generic); |
| |
| if (INTEL_DEBUG(DEBUG_CS)) { |
| /* Re-index SSA defs so we print more sensible numbers. */ |
| nir_foreach_function_impl(impl, nir) { |
| nir_index_ssa_defs(impl); |
| } |
| |
| fprintf(stderr, "NIR (before I/O lowering) for kernel\n"); |
| nir_print_shader(nir, stderr); |
| } |
| |
| return nir; |
| } |