| /* |
| * Copyright © Microsoft Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "nir_to_dxil.h" |
| |
| #include "dxil_container.h" |
| #include "dxil_dump.h" |
| #include "dxil_enums.h" |
| #include "dxil_function.h" |
| #include "dxil_module.h" |
| #include "dxil_nir.h" |
| #include "dxil_signature.h" |
| |
| #include "nir/nir_builder.h" |
| #include "nir_deref.h" |
| #include "util/ralloc.h" |
| #include "util/u_debug.h" |
| #include "util/u_dynarray.h" |
| #include "util/u_math.h" |
| |
| #include "git_sha1.h" |
| |
| #include "vulkan/vulkan_core.h" |
| |
| #include <stdint.h> |
| |
| int debug_dxil = 0; |
| |
| static const struct debug_named_value |
| dxil_debug_options[] = { |
| { "verbose", DXIL_DEBUG_VERBOSE, NULL }, |
| { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" }, |
| { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" }, |
| { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"}, |
| DEBUG_NAMED_VALUE_END |
| }; |
| |
| DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0) |
| |
| static void |
| log_nir_instr_unsupported(const struct dxil_logger *logger, |
| const char *message_prefix, const nir_instr *instr) |
| { |
| char *msg = NULL; |
| char *instr_str = nir_instr_as_str(instr, NULL); |
| asprintf(&msg, "%s: %s\n", message_prefix, instr_str); |
| ralloc_free(instr_str); |
| assert(msg); |
| logger->log(logger->priv, msg); |
| free(msg); |
| } |
| |
| static void |
| default_logger_func(void *priv, const char *msg) |
| { |
| fprintf(stderr, "%s", msg); |
| unreachable("Unhandled error"); |
| } |
| |
| static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func }; |
| |
| #define TRACE_CONVERSION(instr) \ |
| if (debug_dxil & DXIL_DEBUG_TRACE) \ |
| do { \ |
| fprintf(stderr, "Convert '"); \ |
| nir_print_instr(instr, stderr); \ |
| fprintf(stderr, "'\n"); \ |
| } while (0) |
| |
| static const nir_shader_compiler_options |
| nir_options = { |
| .compact_arrays = true, |
| .lower_ineg = true, |
| .lower_fneg = true, |
| .lower_ffma16 = true, |
| .lower_ffma32 = true, |
| .lower_isign = true, |
| .lower_fsign = true, |
| .lower_iabs = true, |
| .lower_fmod = true, |
| .lower_fpow = true, |
| .lower_scmp = true, |
| .lower_ldexp = true, |
| .lower_flrp16 = true, |
| .lower_flrp32 = true, |
| .lower_flrp64 = true, |
| .lower_bitfield_extract = true, |
| .lower_ifind_msb = true, |
| .lower_ufind_msb = true, |
| .lower_extract_word = true, |
| .lower_extract_byte = true, |
| .lower_insert_word = true, |
| .lower_insert_byte = true, |
| .lower_all_io_to_elements = true, |
| .lower_hadd = true, |
| .lower_uadd_sat = true, |
| .lower_usub_sat = true, |
| .lower_iadd_sat = true, |
| .lower_uadd_carry = true, |
| .lower_usub_borrow = true, |
| .lower_mul_high = true, |
| .lower_pack_half_2x16 = true, |
| .lower_pack_unorm_4x8 = true, |
| .lower_pack_snorm_4x8 = true, |
| .lower_pack_snorm_2x16 = true, |
| .lower_pack_unorm_2x16 = true, |
| .lower_pack_64_2x32_split = true, |
| .lower_pack_32_2x16_split = true, |
| .lower_pack_64_4x16 = true, |
| .lower_unpack_64_2x32_split = true, |
| .lower_unpack_32_2x16_split = true, |
| .lower_unpack_half_2x16 = true, |
| .lower_unpack_snorm_2x16 = true, |
| .lower_unpack_snorm_4x8 = true, |
| .lower_unpack_unorm_2x16 = true, |
| .lower_unpack_unorm_4x8 = true, |
| .lower_interpolate_at = true, |
| .has_fsub = true, |
| .has_isub = true, |
| .has_bfe = true, |
| .has_find_msb_rev = true, |
| .vertex_id_zero_based = true, |
| .lower_base_vertex = true, |
| .lower_helper_invocation = true, |
| .has_cs_global_id = true, |
| .lower_mul_2x32_64 = true, |
| .lower_doubles_options = |
| nir_lower_drcp | |
| nir_lower_dsqrt | |
| nir_lower_drsq | |
| nir_lower_dfract | |
| nir_lower_dtrunc | |
| nir_lower_dfloor | |
| nir_lower_dceil | |
| nir_lower_dround_even, |
| .lower_uniforms_to_ubo = true, |
| .max_unroll_iterations = 32, /* arbitrary */ |
| .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out), |
| .lower_device_index_to_zero = true, |
| .support_16bit_alu = true, |
| .preserve_mediump = true, |
| .discard_is_demote = true, |
| .scalarize_ddx = true, |
| .io_options = nir_io_dont_use_pos_for_non_fs_varyings | nir_io_mediump_is_32bit, |
| }; |
| |
| const nir_shader_compiler_options* |
| dxil_get_base_nir_compiler_options(void) |
| { |
| return &nir_options; |
| } |
| |
| void |
| dxil_get_nir_compiler_options(nir_shader_compiler_options *options, |
| enum dxil_shader_model shader_model_max, |
| unsigned supported_int_sizes, |
| unsigned supported_float_sizes) |
| { |
| *options = nir_options; |
| if (!(supported_int_sizes & 64)) { |
| options->lower_pack_64_2x32_split = false; |
| options->lower_unpack_64_2x32_split = false; |
| options->lower_int64_options = ~0; |
| } |
| if (!(supported_float_sizes & 64)) |
| options->lower_doubles_options = ~0; |
| if (shader_model_max >= SHADER_MODEL_6_4) { |
| options->has_sdot_4x8 = true; |
| options->has_udot_4x8 = true; |
| } |
| } |
| |
| static bool |
| emit_llvm_ident(struct dxil_module *m) |
| { |
| const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1); |
| if (!compiler) |
| return false; |
| |
| const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1); |
| return llvm_ident && |
| dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1); |
| } |
| |
| static bool |
| emit_named_version(struct dxil_module *m, const char *name, |
| int major, int minor) |
| { |
| const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major); |
| const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor); |
| const struct dxil_mdnode *version_nodes[] = { major_node, minor_node }; |
| const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes, |
| ARRAY_SIZE(version_nodes)); |
| return dxil_add_metadata_named_node(m, name, &version, 1); |
| } |
| |
| static const char * |
| get_shader_kind_str(enum dxil_shader_kind kind) |
| { |
| switch (kind) { |
| case DXIL_PIXEL_SHADER: |
| return "ps"; |
| case DXIL_VERTEX_SHADER: |
| return "vs"; |
| case DXIL_GEOMETRY_SHADER: |
| return "gs"; |
| case DXIL_HULL_SHADER: |
| return "hs"; |
| case DXIL_DOMAIN_SHADER: |
| return "ds"; |
| case DXIL_COMPUTE_SHADER: |
| return "cs"; |
| default: |
| unreachable("invalid shader kind"); |
| } |
| } |
| |
| static bool |
| emit_dx_shader_model(struct dxil_module *m) |
| { |
| const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind)); |
| const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version); |
| const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version); |
| const struct dxil_mdnode *shader_model[] = { type_node, major_node, |
| minor_node }; |
| const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model)); |
| |
| return dxil_add_metadata_named_node(m, "dx.shaderModel", |
| &dx_shader_model, 1); |
| } |
| |
| enum { |
| DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0, |
| DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1 |
| }; |
| |
| enum dxil_intr { |
| DXIL_INTR_LOAD_INPUT = 4, |
| DXIL_INTR_STORE_OUTPUT = 5, |
| DXIL_INTR_FABS = 6, |
| DXIL_INTR_SATURATE = 7, |
| |
| DXIL_INTR_ISFINITE = 10, |
| DXIL_INTR_ISNORMAL = 11, |
| |
| DXIL_INTR_FCOS = 12, |
| DXIL_INTR_FSIN = 13, |
| |
| DXIL_INTR_FEXP2 = 21, |
| DXIL_INTR_FRC = 22, |
| DXIL_INTR_FLOG2 = 23, |
| |
| DXIL_INTR_SQRT = 24, |
| DXIL_INTR_RSQRT = 25, |
| DXIL_INTR_ROUND_NE = 26, |
| DXIL_INTR_ROUND_NI = 27, |
| DXIL_INTR_ROUND_PI = 28, |
| DXIL_INTR_ROUND_Z = 29, |
| |
| DXIL_INTR_BFREV = 30, |
| DXIL_INTR_COUNTBITS = 31, |
| DXIL_INTR_FIRSTBIT_LO = 32, |
| DXIL_INTR_FIRSTBIT_HI = 33, |
| DXIL_INTR_FIRSTBIT_SHI = 34, |
| |
| DXIL_INTR_FMAX = 35, |
| DXIL_INTR_FMIN = 36, |
| DXIL_INTR_IMAX = 37, |
| DXIL_INTR_IMIN = 38, |
| DXIL_INTR_UMAX = 39, |
| DXIL_INTR_UMIN = 40, |
| |
| DXIL_INTR_FMA = 47, |
| |
| DXIL_INTR_IBFE = 51, |
| DXIL_INTR_UBFE = 52, |
| DXIL_INTR_BFI = 53, |
| |
| DXIL_INTR_CREATE_HANDLE = 57, |
| DXIL_INTR_CBUFFER_LOAD_LEGACY = 59, |
| |
| DXIL_INTR_SAMPLE = 60, |
| DXIL_INTR_SAMPLE_BIAS = 61, |
| DXIL_INTR_SAMPLE_LEVEL = 62, |
| DXIL_INTR_SAMPLE_GRAD = 63, |
| DXIL_INTR_SAMPLE_CMP = 64, |
| DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65, |
| |
| DXIL_INTR_TEXTURE_LOAD = 66, |
| DXIL_INTR_TEXTURE_STORE = 67, |
| |
| DXIL_INTR_BUFFER_LOAD = 68, |
| DXIL_INTR_BUFFER_STORE = 69, |
| |
| DXIL_INTR_TEXTURE_SIZE = 72, |
| DXIL_INTR_TEXTURE_GATHER = 73, |
| DXIL_INTR_TEXTURE_GATHER_CMP = 74, |
| |
| DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75, |
| DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76, |
| DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77, |
| |
| DXIL_INTR_ATOMIC_BINOP = 78, |
| DXIL_INTR_ATOMIC_CMPXCHG = 79, |
| DXIL_INTR_BARRIER = 80, |
| DXIL_INTR_TEXTURE_LOD = 81, |
| |
| DXIL_INTR_DISCARD = 82, |
| DXIL_INTR_DDX_COARSE = 83, |
| DXIL_INTR_DDY_COARSE = 84, |
| DXIL_INTR_DDX_FINE = 85, |
| DXIL_INTR_DDY_FINE = 86, |
| |
| DXIL_INTR_EVAL_SNAPPED = 87, |
| DXIL_INTR_EVAL_SAMPLE_INDEX = 88, |
| DXIL_INTR_EVAL_CENTROID = 89, |
| |
| DXIL_INTR_SAMPLE_INDEX = 90, |
| DXIL_INTR_COVERAGE = 91, |
| |
| DXIL_INTR_THREAD_ID = 93, |
| DXIL_INTR_GROUP_ID = 94, |
| DXIL_INTR_THREAD_ID_IN_GROUP = 95, |
| DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96, |
| |
| DXIL_INTR_EMIT_STREAM = 97, |
| DXIL_INTR_CUT_STREAM = 98, |
| |
| DXIL_INTR_GS_INSTANCE_ID = 100, |
| |
| DXIL_INTR_MAKE_DOUBLE = 101, |
| DXIL_INTR_SPLIT_DOUBLE = 102, |
| |
| DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103, |
| DXIL_INTR_LOAD_PATCH_CONSTANT = 104, |
| DXIL_INTR_DOMAIN_LOCATION = 105, |
| DXIL_INTR_STORE_PATCH_CONSTANT = 106, |
| DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107, |
| DXIL_INTR_PRIMITIVE_ID = 108, |
| |
| DXIL_INTR_WAVE_IS_FIRST_LANE = 110, |
| DXIL_INTR_WAVE_GET_LANE_INDEX = 111, |
| DXIL_INTR_WAVE_GET_LANE_COUNT = 112, |
| DXIL_INTR_WAVE_ANY_TRUE = 113, |
| DXIL_INTR_WAVE_ALL_TRUE = 114, |
| DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115, |
| DXIL_INTR_WAVE_ACTIVE_BALLOT = 116, |
| DXIL_INTR_WAVE_READ_LANE_AT = 117, |
| DXIL_INTR_WAVE_READ_LANE_FIRST = 118, |
| DXIL_INTR_WAVE_ACTIVE_OP = 119, |
| DXIL_INTR_WAVE_ACTIVE_BIT = 120, |
| DXIL_INTR_WAVE_PREFIX_OP = 121, |
| DXIL_INTR_QUAD_READ_LANE_AT = 122, |
| DXIL_INTR_QUAD_OP = 123, |
| |
| DXIL_INTR_LEGACY_F32TOF16 = 130, |
| DXIL_INTR_LEGACY_F16TOF32 = 131, |
| |
| DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137, |
| DXIL_INTR_VIEW_ID = 138, |
| |
| DXIL_INTR_RAW_BUFFER_LOAD = 139, |
| DXIL_INTR_RAW_BUFFER_STORE = 140, |
| |
| DXIL_INTR_DOT4_ADD_I8_PACKED = 163, |
| DXIL_INTR_DOT4_ADD_U8_PACKED = 164, |
| |
| DXIL_INTR_ANNOTATE_HANDLE = 216, |
| DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217, |
| DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218, |
| |
| DXIL_INTR_IS_HELPER_LANE = 221, |
| DXIL_INTR_SAMPLE_CMP_LEVEL = 224, |
| DXIL_INTR_SAMPLE_CMP_GRAD = 254, |
| DXIL_INTR_SAMPLE_CMP_BIAS = 255, |
| |
| DXIL_INTR_START_VERTEX_LOCATION = 256, |
| DXIL_INTR_START_INSTANCE_LOCATION = 257, |
| }; |
| |
| enum dxil_atomic_op { |
| DXIL_ATOMIC_ADD = 0, |
| DXIL_ATOMIC_AND = 1, |
| DXIL_ATOMIC_OR = 2, |
| DXIL_ATOMIC_XOR = 3, |
| DXIL_ATOMIC_IMIN = 4, |
| DXIL_ATOMIC_IMAX = 5, |
| DXIL_ATOMIC_UMIN = 6, |
| DXIL_ATOMIC_UMAX = 7, |
| DXIL_ATOMIC_EXCHANGE = 8, |
| }; |
| |
| static enum dxil_atomic_op |
| nir_atomic_to_dxil_atomic(nir_atomic_op op) |
| { |
| switch (op) { |
| case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD; |
| case nir_atomic_op_iand: return DXIL_ATOMIC_AND; |
| case nir_atomic_op_ior: return DXIL_ATOMIC_OR; |
| case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR; |
| case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN; |
| case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX; |
| case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN; |
| case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX; |
| case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE; |
| default: unreachable("Unsupported atomic op"); |
| } |
| } |
| |
| static enum dxil_rmw_op |
| nir_atomic_to_dxil_rmw(nir_atomic_op op) |
| { |
| switch (op) { |
| case nir_atomic_op_iadd: return DXIL_RMWOP_ADD; |
| case nir_atomic_op_iand: return DXIL_RMWOP_AND; |
| case nir_atomic_op_ior: return DXIL_RMWOP_OR; |
| case nir_atomic_op_ixor: return DXIL_RMWOP_XOR; |
| case nir_atomic_op_imin: return DXIL_RMWOP_MIN; |
| case nir_atomic_op_imax: return DXIL_RMWOP_MAX; |
| case nir_atomic_op_umin: return DXIL_RMWOP_UMIN; |
| case nir_atomic_op_umax: return DXIL_RMWOP_UMAX; |
| case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG; |
| default: unreachable("Unsupported atomic op"); |
| } |
| } |
| |
| typedef struct { |
| unsigned id; |
| unsigned binding; |
| unsigned size; |
| unsigned space; |
| } resource_array_layout; |
| |
| static void |
| fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields, |
| const struct dxil_type *struct_type, |
| const char *name, const resource_array_layout *layout) |
| { |
| const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type); |
| const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type); |
| |
| fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID |
| fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol |
| fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name |
| fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID |
| fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound |
| fields[5] = dxil_get_metadata_int32(m, layout->size); // range size |
| } |
| |
| static const struct dxil_mdnode * |
| emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type, |
| const char *name, const resource_array_layout *layout, |
| enum dxil_component_type comp_type, |
| enum dxil_resource_kind res_kind) |
| { |
| const struct dxil_mdnode *fields[9]; |
| |
| const struct dxil_mdnode *metadata_tag_nodes[2]; |
| |
| fill_resource_metadata(m, fields, elem_type, name, layout); |
| fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape |
| fields[7] = dxil_get_metadata_int1(m, 0); // sample count |
| if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && |
| res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { |
| metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); |
| metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); |
| fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata |
| } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) |
| fields[8] = NULL; |
| else |
| unreachable("Structured buffers not supported yet"); |
| |
| return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type, |
| const char *name, const resource_array_layout *layout, |
| enum dxil_component_type comp_type, |
| enum dxil_resource_kind res_kind, |
| enum gl_access_qualifier access) |
| { |
| const struct dxil_mdnode *fields[11]; |
| |
| const struct dxil_mdnode *metadata_tag_nodes[2]; |
| |
| fill_resource_metadata(m, fields, struct_type, name, layout); |
| fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape |
| fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent |
| fields[8] = dxil_get_metadata_int1(m, false); // has counter |
| fields[9] = dxil_get_metadata_int1(m, false); // is ROV |
| if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && |
| res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { |
| metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); |
| metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); |
| fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata |
| } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) |
| fields[10] = NULL; |
| else |
| unreachable("Structured buffers not supported yet"); |
| |
| return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type, |
| const char *name, const resource_array_layout *layout, |
| unsigned size) |
| { |
| const struct dxil_mdnode *fields[8]; |
| |
| fill_resource_metadata(m, fields, struct_type, name, layout); |
| fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size |
| fields[7] = NULL; // metadata |
| |
| return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type, |
| nir_variable *var, const resource_array_layout *layout) |
| { |
| const struct dxil_mdnode *fields[8]; |
| const struct glsl_type *type = glsl_without_array(var->type); |
| |
| fill_resource_metadata(m, fields, struct_type, var->name, layout); |
| enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ? |
| DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT; |
| fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind |
| fields[7] = NULL; // metadata |
| |
| return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); |
| } |
| |
| |
| #define MAX_SRVS 128 |
| #define MAX_UAVS 64 |
| #define MAX_CBVS 64 // ?? |
| #define MAX_SAMPLERS 64 // ?? |
| |
| struct dxil_def { |
| const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS]; |
| }; |
| |
| struct ntd_context { |
| void *ralloc_ctx; |
| const struct nir_to_dxil_options *opts; |
| struct nir_shader *shader; |
| |
| struct dxil_module mod; |
| |
| struct util_dynarray srv_metadata_nodes; |
| const struct dxil_value *srv_handles[MAX_SRVS]; |
| |
| struct util_dynarray uav_metadata_nodes; |
| const struct dxil_value *ssbo_handles[MAX_UAVS]; |
| const struct dxil_value *image_handles[MAX_UAVS]; |
| uint32_t num_uavs; |
| |
| struct util_dynarray cbv_metadata_nodes; |
| const struct dxil_value *cbv_handles[MAX_CBVS]; |
| |
| struct util_dynarray sampler_metadata_nodes; |
| const struct dxil_value *sampler_handles[MAX_SAMPLERS]; |
| |
| struct util_dynarray resources; |
| |
| const struct dxil_mdnode *shader_property_nodes[6]; |
| size_t num_shader_property_nodes; |
| |
| struct dxil_def *defs; |
| unsigned num_defs; |
| struct hash_table *phis; |
| |
| const struct dxil_value **sharedvars; |
| const struct dxil_value **scratchvars; |
| const struct dxil_value **consts; |
| |
| nir_variable *system_value[SYSTEM_VALUE_MAX]; |
| |
| nir_function *tess_ctrl_patch_constant_func; |
| unsigned tess_input_control_point_count; |
| |
| struct dxil_func_def *main_func_def; |
| struct dxil_func_def *tess_ctrl_patch_constant_func_def; |
| unsigned unnamed_ubo_count; |
| |
| BITSET_WORD *float_types; |
| BITSET_WORD *int_types; |
| |
| const struct dxil_logger *logger; |
| }; |
| |
| static const char* |
| unary_func_name(enum dxil_intr intr) |
| { |
| switch (intr) { |
| case DXIL_INTR_COUNTBITS: |
| case DXIL_INTR_FIRSTBIT_HI: |
| case DXIL_INTR_FIRSTBIT_SHI: |
| case DXIL_INTR_FIRSTBIT_LO: |
| return "dx.op.unaryBits"; |
| case DXIL_INTR_ISFINITE: |
| case DXIL_INTR_ISNORMAL: |
| return "dx.op.isSpecialFloat"; |
| default: |
| return "dx.op.unary"; |
| } |
| } |
| |
| static const struct dxil_value * |
| emit_unary_call(struct ntd_context *ctx, enum overload_type overload, |
| enum dxil_intr intr, |
| const struct dxil_value *op0) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, |
| unary_func_name(intr), |
| overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| op0 |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_binary_call(struct ntd_context *ctx, enum overload_type overload, |
| enum dxil_intr intr, |
| const struct dxil_value *op0, const struct dxil_value *op1) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| op0, |
| op1 |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload, |
| enum dxil_intr intr, |
| const struct dxil_value *op0, |
| const struct dxil_value *op1, |
| const struct dxil_value *op2) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| op0, |
| op1, |
| op2 |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload, |
| enum dxil_intr intr, |
| const struct dxil_value *op0, |
| const struct dxil_value *op1, |
| const struct dxil_value *op2, |
| const struct dxil_value *op3) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| op0, |
| op1, |
| op2, |
| op3 |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_THREAD_ID); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| comp |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_threadidingroup_call(struct ntd_context *ctx, |
| const struct dxil_value *comp) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32); |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_THREAD_ID_IN_GROUP); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| comp |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_flattenedthreadidingroup_call(struct ntd_context *ctx) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32); |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32); |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_GROUP_ID); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| comp |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_raw_bufferload_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[2], |
| enum overload_type overload, |
| unsigned component_count, |
| unsigned alignment) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_RAW_BUFFER_LOAD); |
| const struct dxil_value *args[] = { |
| opcode, handle, coord[0], coord[1], |
| dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1), |
| dxil_module_get_int32_const(&ctx->mod, alignment), |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_bufferload_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[2], |
| enum overload_type overload) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_BUFFER_LOAD); |
| const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_raw_bufferstore_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[2], |
| const struct dxil_value *value[4], |
| const struct dxil_value *write_mask, |
| enum overload_type overload, |
| unsigned alignment) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_RAW_BUFFER_STORE); |
| const struct dxil_value *args[] = { |
| opcode, handle, coord[0], coord[1], |
| value[0], value[1], value[2], value[3], |
| write_mask, |
| dxil_module_get_int32_const(&ctx->mod, alignment), |
| }; |
| |
| return dxil_emit_call_void(&ctx->mod, func, |
| args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_bufferstore_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[2], |
| const struct dxil_value *value[4], |
| const struct dxil_value *write_mask, |
| enum overload_type overload) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_BUFFER_STORE); |
| const struct dxil_value *args[] = { |
| opcode, handle, coord[0], coord[1], |
| value[0], value[1], value[2], value[3], |
| write_mask |
| }; |
| |
| return dxil_emit_call_void(&ctx->mod, func, |
| args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_textureload_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[3], |
| enum overload_type overload) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload); |
| if (!func) |
| return NULL; |
| const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); |
| const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_TEXTURE_LOAD); |
| const struct dxil_value *args[] = { opcode, handle, |
| /*lod_or_sample*/ int_undef, |
| coord[0], coord[1], coord[2], |
| /* offsets */ int_undef, int_undef, int_undef}; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_texturestore_call(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[3], |
| const struct dxil_value *value[4], |
| const struct dxil_value *write_mask, |
| enum overload_type overload) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, |
| DXIL_INTR_TEXTURE_STORE); |
| const struct dxil_value *args[] = { |
| opcode, handle, coord[0], coord[1], coord[2], |
| value[0], value[1], value[2], value[3], |
| write_mask |
| }; |
| |
| return dxil_emit_call_void(&ctx->mod, func, |
| args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_atomic_binop(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| enum dxil_atomic_op atomic_op, |
| const struct dxil_value *coord[3], |
| const struct dxil_value *value) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP); |
| const struct dxil_value *atomic_op_value = |
| dxil_module_get_int32_const(&ctx->mod, atomic_op); |
| const struct dxil_value *args[] = { |
| opcode, handle, atomic_op_value, |
| coord[0], coord[1], coord[2], value |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_atomic_cmpxchg(struct ntd_context *ctx, |
| const struct dxil_value *handle, |
| const struct dxil_value *coord[3], |
| const struct dxil_value *cmpval, |
| const struct dxil_value *newval) |
| { |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG); |
| const struct dxil_value *args[] = { |
| opcode, handle, coord[0], coord[1], coord[2], cmpval, newval |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_call_pre_6_6(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned lower_bound, |
| unsigned upper_bound, |
| unsigned space, |
| unsigned resource_range_id, |
| const struct dxil_value *resource_range_index, |
| bool non_uniform_resource_index) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE); |
| const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class); |
| const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id); |
| const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); |
| if (!opcode || !resource_class_value || !resource_range_id_value || |
| !non_uniform_resource_index_value) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| resource_class_value, |
| resource_range_id_value, |
| resource_range_index, |
| non_uniform_resource_index_value |
| }; |
| |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE); |
| |
| if (!func) |
| return NULL; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_annotate_handle(struct ntd_context *ctx, |
| const struct dxil_value *unannotated_handle, |
| const struct dxil_value *res_props) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| unannotated_handle, |
| res_props |
| }; |
| |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE); |
| |
| if (!func) |
| return NULL; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_annotate_handle_from_metadata(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned resource_range_id, |
| const struct dxil_value *unannotated_handle) |
| { |
| |
| const struct util_dynarray *mdnodes; |
| switch (resource_class) { |
| case DXIL_RESOURCE_CLASS_SRV: |
| mdnodes = &ctx->srv_metadata_nodes; |
| break; |
| case DXIL_RESOURCE_CLASS_UAV: |
| mdnodes = &ctx->uav_metadata_nodes; |
| break; |
| case DXIL_RESOURCE_CLASS_CBV: |
| mdnodes = &ctx->cbv_metadata_nodes; |
| break; |
| case DXIL_RESOURCE_CLASS_SAMPLER: |
| mdnodes = &ctx->sampler_metadata_nodes; |
| break; |
| default: |
| unreachable("Invalid resource class"); |
| } |
| |
| const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id); |
| const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode); |
| if (!res_props) |
| return NULL; |
| |
| return emit_annotate_handle(ctx, unannotated_handle, res_props); |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_and_annotate(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned lower_bound, |
| unsigned upper_bound, |
| unsigned space, |
| unsigned resource_range_id, |
| const struct dxil_value *resource_range_index, |
| bool non_uniform_resource_index) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING); |
| const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class); |
| const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); |
| if (!opcode || !res_bind || !non_uniform_resource_index_value) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| res_bind, |
| resource_range_index, |
| non_uniform_resource_index_value |
| }; |
| |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE); |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!unannotated_handle) |
| return NULL; |
| |
| return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle); |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_call(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned lower_bound, |
| unsigned upper_bound, |
| unsigned space, |
| unsigned resource_range_id, |
| const struct dxil_value *resource_range_index, |
| bool non_uniform_resource_index) |
| { |
| if (ctx->mod.minor_version < 6) |
| return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index); |
| else |
| return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index); |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_call_const_index(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned lower_bound, |
| unsigned upper_bound, |
| unsigned space, |
| unsigned resource_range_id, |
| unsigned resource_range_index, |
| bool non_uniform_resource_index) |
| { |
| |
| const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index); |
| if (!resource_range_index_value) |
| return NULL; |
| |
| return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space, |
| resource_range_id, resource_range_index_value, |
| non_uniform_resource_index); |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_heap(struct ntd_context *ctx, |
| const struct dxil_value *resource_range_index, |
| bool is_sampler, |
| bool non_uniform_resource_index) |
| { |
| if (is_sampler) |
| ctx->mod.feats.sampler_descriptor_heap_indexing = true; |
| else |
| ctx->mod.feats.resource_descriptor_heap_indexing = true; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP); |
| const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler); |
| const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); |
| if (!opcode || !sampler || !non_uniform_resource_index_value) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| resource_range_index, |
| sampler, |
| non_uniform_resource_index_value |
| }; |
| |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE); |
| |
| if (!func) |
| return NULL; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static void |
| add_resource(struct ntd_context *ctx, enum dxil_resource_type type, |
| enum dxil_resource_kind kind, |
| const resource_array_layout *layout) |
| { |
| struct dxil_resource_v0 *resource_v0 = NULL; |
| struct dxil_resource_v1 *resource_v1 = NULL; |
| if (ctx->mod.minor_validator >= 6) { |
| resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1); |
| resource_v0 = &resource_v1->v0; |
| } else { |
| resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1); |
| } |
| resource_v0->resource_type = type; |
| resource_v0->space = layout->space; |
| resource_v0->lower_bound = layout->binding; |
| if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX) |
| resource_v0->upper_bound = UINT_MAX; |
| else |
| resource_v0->upper_bound = layout->binding + layout->size - 1; |
| if (type == DXIL_RES_UAV_TYPED || |
| type == DXIL_RES_UAV_RAW || |
| type == DXIL_RES_UAV_STRUCTURED) { |
| uint32_t new_uav_count = ctx->num_uavs + layout->size; |
| if (layout->size == 0 || new_uav_count < ctx->num_uavs) |
| ctx->num_uavs = UINT_MAX; |
| else |
| ctx->num_uavs = new_uav_count; |
| if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8) |
| ctx->mod.feats.use_64uavs = 1; |
| } |
| |
| if (resource_v1) { |
| resource_v1->resource_kind = kind; |
| /* No flags supported yet */ |
| resource_v1->resource_flags = 0; |
| } |
| } |
| |
| static const struct dxil_value * |
| emit_createhandle_call_dynamic(struct ntd_context *ctx, |
| enum dxil_resource_class resource_class, |
| unsigned space, |
| unsigned binding, |
| const struct dxil_value *resource_range_index, |
| bool non_uniform_resource_index) |
| { |
| unsigned offset = 0; |
| unsigned count = 0; |
| |
| unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); |
| unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); |
| unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); |
| unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); |
| |
| switch (resource_class) { |
| case DXIL_RESOURCE_CLASS_UAV: |
| offset = num_srvs + num_samplers + num_cbvs; |
| count = num_uavs; |
| break; |
| case DXIL_RESOURCE_CLASS_SRV: |
| offset = num_samplers + num_cbvs; |
| count = num_srvs; |
| break; |
| case DXIL_RESOURCE_CLASS_SAMPLER: |
| offset = num_cbvs; |
| count = num_samplers; |
| break; |
| case DXIL_RESOURCE_CLASS_CBV: |
| offset = 0; |
| count = num_cbvs; |
| break; |
| } |
| |
| unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? |
| sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); |
| assert(offset + count <= ctx->resources.size / resource_element_size); |
| for (unsigned i = offset; i < offset + count; ++i) { |
| const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i); |
| if (resource->space == space && |
| resource->lower_bound <= binding && |
| resource->upper_bound >= binding) { |
| return emit_createhandle_call(ctx, resource_class, resource->lower_bound, |
| resource->upper_bound, space, |
| i - offset, |
| resource_range_index, |
| non_uniform_resource_index); |
| } |
| } |
| |
| unreachable("Resource access for undeclared range"); |
| } |
| |
| static bool |
| emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count) |
| { |
| unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); |
| unsigned binding = var->data.binding; |
| resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; |
| |
| enum dxil_component_type comp_type; |
| enum dxil_resource_kind res_kind; |
| enum dxil_resource_type res_type; |
| if (var->data.mode == nir_var_mem_ssbo) { |
| comp_type = DXIL_COMP_TYPE_INVALID; |
| res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER; |
| res_type = DXIL_RES_SRV_RAW; |
| } else { |
| comp_type = dxil_get_comp_type(var->type); |
| res_kind = dxil_get_resource_kind(var->type); |
| res_type = DXIL_RES_SRV_TYPED; |
| } |
| const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */); |
| |
| if (glsl_type_is_array(var->type)) |
| res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count); |
| |
| const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name, |
| &layout, comp_type, res_kind); |
| |
| if (!srv_meta) |
| return false; |
| |
| util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta); |
| add_resource(ctx, res_type, res_kind, &layout); |
| if (res_type == DXIL_RES_SRV_RAW) |
| ctx->mod.raw_and_structured_buffers = true; |
| |
| return true; |
| } |
| |
| static bool |
| emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count, |
| enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind, |
| enum gl_access_qualifier access, const char *name) |
| { |
| unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); |
| resource_array_layout layout = { id, binding, count, space }; |
| |
| const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */); |
| res_type = dxil_module_get_array_type(&ctx->mod, res_type, count); |
| const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name, |
| &layout, comp_type, res_kind, access); |
| |
| if (!uav_meta) |
| return false; |
| |
| util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); |
| if (ctx->mod.minor_validator < 6 && |
| util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) |
| ctx->mod.feats.use_64uavs = 1; |
| |
| add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout); |
| if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) |
| ctx->mod.raw_and_structured_buffers = true; |
| if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER && |
| ctx->mod.shader_kind != DXIL_COMPUTE_SHADER) |
| ctx->mod.feats.uavs_at_every_stage = true; |
| |
| return true; |
| } |
| |
| static bool |
| emit_globals(struct ntd_context *ctx, unsigned size) |
| { |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) |
| size++; |
| |
| if (!size) |
| return true; |
| |
| if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals")) |
| return false; |
| |
| return true; |
| } |
| |
| static bool |
| emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count) |
| { |
| unsigned binding, space; |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) { |
| /* For GL, the image intrinsics are already lowered, using driver_location |
| * as the 0-based image index. Use space 1 so that we can keep using these |
| * NIR constants without having to remap them, and so they don't overlap |
| * SSBOs, which are also 0-based UAV bindings. |
| */ |
| binding = var->data.driver_location; |
| space = 1; |
| } else { |
| binding = var->data.binding; |
| space = var->data.descriptor_set; |
| } |
| enum dxil_component_type comp_type = dxil_get_comp_type(var->type); |
| enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); |
| const char *name = var->name; |
| |
| return emit_uav(ctx, binding, space, count, comp_type, |
| util_format_get_nr_components(var->data.image.format), |
| res_kind, var->data.access, name); |
| } |
| |
| static const struct dxil_value * |
| get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type) |
| { |
| if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b); |
| if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32); |
| if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32); |
| if (type == mod->int16_type) { |
| mod->feats.min_precision = true; |
| return dxil_module_get_int16_const(mod, c->i16); |
| } |
| if (type == mod->int64_type) { |
| mod->feats.int64_ops = true; |
| return dxil_module_get_int64_const(mod, c->i64); |
| } |
| if (type == mod->float16_type) { |
| mod->feats.min_precision = true; |
| return dxil_module_get_float16_const(mod, c->u16); |
| } |
| if (type == mod->float64_type) { |
| mod->feats.doubles = true; |
| return dxil_module_get_double_const(mod, c->f64); |
| } |
| unreachable("Invalid type"); |
| } |
| |
| static const struct dxil_type * |
| get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type) |
| { |
| uint32_t bit_size = glsl_base_type_bit_size(type); |
| if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float) |
| return dxil_module_get_float_type(mod, bit_size); |
| return dxil_module_get_int_type(mod, bit_size); |
| } |
| |
| static const struct dxil_type * |
| get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type) |
| { |
| if (glsl_type_is_scalar(type)) |
| return get_type_for_glsl_base_type(mod, glsl_get_base_type(type)); |
| |
| if (glsl_type_is_vector(type)) |
| return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)), |
| glsl_get_vector_elements(type)); |
| |
| if (glsl_type_is_array(type)) |
| return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)), |
| glsl_array_size(type)); |
| |
| assert(glsl_type_is_struct(type)); |
| uint32_t size = glsl_get_length(type); |
| const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size); |
| for (uint32_t i = 0; i < size; ++i) |
| fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i)); |
| const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size); |
| free((void *)fields); |
| return ret; |
| } |
| |
| static const struct dxil_value * |
| get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type) |
| { |
| const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type); |
| if (glsl_type_is_vector_or_scalar(type)) { |
| const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type)); |
| const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS]; |
| for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i) |
| elements[i] = get_value_for_const(mod, &c->values[i], element_type); |
| if (glsl_type_is_scalar(type)) |
| return elements[0]; |
| return dxil_module_get_vector_const(mod, dxil_type, elements); |
| } |
| |
| uint32_t num_values = glsl_get_length(type); |
| assert(num_values == c->num_elements); |
| const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values); |
| const struct dxil_value *ret; |
| if (glsl_type_is_array(type)) { |
| const struct glsl_type *element_type = glsl_get_array_element(type); |
| for (uint32_t i = 0; i < num_values; ++i) |
| values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type); |
| ret = dxil_module_get_array_const(mod, dxil_type, values); |
| } else { |
| for (uint32_t i = 0; i < num_values; ++i) |
| values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i)); |
| ret = dxil_module_get_struct_const(mod, dxil_type, values); |
| } |
| free((void *)values); |
| return ret; |
| } |
| |
| static bool |
| emit_global_consts(struct ntd_context *ctx) |
| { |
| uint32_t index = 0; |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) { |
| assert(var->constant_initializer); |
| var->data.driver_location = index++; |
| } |
| |
| ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index); |
| |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) { |
| if (!var->name) |
| var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location); |
| |
| const struct dxil_value *agg_vals = |
| get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type); |
| if (!agg_vals) |
| return false; |
| |
| const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, |
| dxil_value_get_type(agg_vals), |
| DXIL_AS_DEFAULT, 16, |
| agg_vals); |
| if (!gvar) |
| return false; |
| |
| ctx->consts[var->data.driver_location] = gvar; |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_shared_vars(struct ntd_context *ctx) |
| { |
| uint32_t index = 0; |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) |
| var->data.driver_location = index++; |
| |
| ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index); |
| |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) { |
| if (!var->name) |
| var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location); |
| const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, |
| get_type_for_glsl_type(&ctx->mod, var->type), |
| DXIL_AS_GROUPSHARED, 16, |
| NULL); |
| if (!gvar) |
| return false; |
| |
| ctx->sharedvars[var->data.driver_location] = gvar; |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space, |
| unsigned size, unsigned count, char *name) |
| { |
| assert(count != 0); |
| |
| unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); |
| |
| const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32); |
| const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size); |
| const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name, |
| &array_type, 1); |
| // All ubo[1]s should have been lowered to ubo with static indexing |
| const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type; |
| resource_array_layout layout = {idx, binding, count, space}; |
| const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type, |
| name, &layout, 4 * size); |
| |
| if (!cbv_meta) |
| return false; |
| |
| util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta); |
| add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout); |
| |
| return true; |
| } |
| |
| static bool |
| emit_ubo_var(struct ntd_context *ctx, nir_variable *var) |
| { |
| unsigned count = 1; |
| if (glsl_type_is_array(var->type)) |
| count = glsl_get_length(var->type); |
| |
| char *name = var->name; |
| char temp_name[30]; |
| if (name && strlen(name) == 0) { |
| snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d", |
| ctx->unnamed_ubo_count++); |
| name = temp_name; |
| } |
| |
| const struct glsl_type *type = glsl_without_array(var->type); |
| assert(glsl_type_is_struct(type) || glsl_type_is_interface(type)); |
| unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4; |
| |
| return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, |
| dwords, count, name); |
| } |
| |
| static bool |
| emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count) |
| { |
| unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); |
| unsigned binding = var->data.binding; |
| resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; |
| const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); |
| const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1); |
| |
| if (glsl_type_is_array(var->type)) |
| sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count); |
| |
| const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout); |
| |
| if (!sampler_meta) |
| return false; |
| |
| util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta); |
| add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout); |
| |
| return true; |
| } |
| |
| static bool |
| emit_static_indexing_handles(struct ntd_context *ctx) |
| { |
| /* Vulkan always uses dynamic handles, from instructions in the NIR */ |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) |
| return true; |
| |
| unsigned last_res_class = -1; |
| unsigned id = 0; |
| |
| unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? |
| sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); |
| for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data; |
| res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size); |
| res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) { |
| enum dxil_resource_class res_class; |
| const struct dxil_value **handle_array; |
| switch (res->resource_type) { |
| case DXIL_RES_SRV_TYPED: |
| case DXIL_RES_SRV_RAW: |
| case DXIL_RES_SRV_STRUCTURED: |
| res_class = DXIL_RESOURCE_CLASS_SRV; |
| handle_array = ctx->srv_handles; |
| break; |
| case DXIL_RES_CBV: |
| res_class = DXIL_RESOURCE_CLASS_CBV; |
| handle_array = ctx->cbv_handles; |
| break; |
| case DXIL_RES_SAMPLER: |
| res_class = DXIL_RESOURCE_CLASS_SAMPLER; |
| handle_array = ctx->sampler_handles; |
| break; |
| case DXIL_RES_UAV_RAW: |
| res_class = DXIL_RESOURCE_CLASS_UAV; |
| handle_array = ctx->ssbo_handles; |
| break; |
| case DXIL_RES_UAV_TYPED: |
| case DXIL_RES_UAV_STRUCTURED: |
| case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER: |
| res_class = DXIL_RESOURCE_CLASS_UAV; |
| handle_array = ctx->image_handles; |
| break; |
| default: |
| unreachable("Unexpected resource type"); |
| } |
| |
| if (last_res_class != res_class) |
| id = 0; |
| else |
| id++; |
| last_res_class = res_class; |
| |
| if (res->space > 1) |
| continue; |
| assert(res->space == 0 || |
| (res->space == 1 && |
| res->resource_type != DXIL_RES_UAV_RAW && |
| ctx->opts->environment == DXIL_ENVIRONMENT_GL)); |
| |
| /* CL uses dynamic handles for the "globals" UAV array, but uses static |
| * handles for UBOs, textures, and samplers. |
| */ |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_CL && |
| res->resource_type == DXIL_RES_UAV_RAW) |
| continue; |
| |
| for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) { |
| handle_array[i] = emit_createhandle_call_const_index(ctx, |
| res_class, |
| res->lower_bound, |
| res->upper_bound, |
| res->space, |
| id, |
| i, |
| false); |
| if (!handle_array[i]) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| static const struct dxil_mdnode * |
| emit_gs_state(struct ntd_context *ctx) |
| { |
| const struct dxil_mdnode *gs_state_nodes[5]; |
| const nir_shader *s = ctx->shader; |
| |
| gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive)); |
| gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out); |
| gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1)); |
| gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive)); |
| gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations); |
| |
| for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) { |
| if (!gs_state_nodes[i]) |
| return NULL; |
| } |
| |
| return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes)); |
| } |
| |
| static enum dxil_tessellator_domain |
| get_tessellator_domain(enum tess_primitive_mode primitive_mode) |
| { |
| switch (primitive_mode) { |
| case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD; |
| case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI; |
| case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE; |
| default: |
| unreachable("Invalid tessellator primitive mode"); |
| } |
| } |
| |
| static enum dxil_tessellator_partitioning |
| get_tessellator_partitioning(enum gl_tess_spacing spacing) |
| { |
| switch (spacing) { |
| default: |
| case TESS_SPACING_EQUAL: |
| return DXIL_TESSELLATOR_PARTITIONING_INTEGER; |
| case TESS_SPACING_FRACTIONAL_EVEN: |
| return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; |
| case TESS_SPACING_FRACTIONAL_ODD: |
| return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; |
| } |
| } |
| |
| static enum dxil_tessellator_output_primitive |
| get_tessellator_output_primitive(const struct shader_info *info) |
| { |
| if (info->tess.point_mode) |
| return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT; |
| if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) |
| return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE; |
| /* Note: GL tessellation domain is inverted from D3D, which means triangle |
| * winding needs to be inverted. |
| */ |
| if (info->tess.ccw) |
| return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW; |
| return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW; |
| } |
| |
| static const struct dxil_mdnode * |
| emit_hs_state(struct ntd_context *ctx) |
| { |
| const struct dxil_mdnode *hs_state_nodes[7]; |
| |
| hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func); |
| hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count); |
| hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); |
| hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); |
| hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing)); |
| hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info)); |
| hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f); |
| |
| return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_ds_state(struct ntd_context *ctx) |
| { |
| const struct dxil_mdnode *ds_state_nodes[2]; |
| |
| ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); |
| ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); |
| |
| return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_threads(struct ntd_context *ctx) |
| { |
| const nir_shader *s = ctx->shader; |
| const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1)); |
| const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1)); |
| const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1)); |
| if (!threads_x || !threads_y || !threads_z) |
| return false; |
| |
| const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z }; |
| return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_wave_size(struct ntd_context *ctx) |
| { |
| const nir_shader *s = ctx->shader; |
| const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size); |
| return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_wave_size_range(struct ntd_context *ctx) |
| { |
| const nir_shader *s = ctx->shader; |
| const struct dxil_mdnode *wave_size_nodes[3]; |
| wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size); |
| wave_size_nodes[1] = wave_size_nodes[0]; |
| wave_size_nodes[2] = wave_size_nodes[0]; |
| return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes)); |
| } |
| |
| static int64_t |
| get_module_flags(struct ntd_context *ctx) |
| { |
| /* See the DXIL documentation for the definition of these flags: |
| * |
| * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags |
| */ |
| |
| uint64_t flags = 0; |
| if (ctx->mod.feats.doubles) |
| flags |= (1 << 2); |
| if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && |
| ctx->shader->info.fs.early_fragment_tests) |
| flags |= (1 << 3); |
| if (ctx->mod.raw_and_structured_buffers) |
| flags |= (1 << 4); |
| if (ctx->mod.feats.min_precision) |
| flags |= (1 << 5); |
| if (ctx->mod.feats.dx11_1_double_extensions) |
| flags |= (1 << 6); |
| if (ctx->mod.feats.array_layer_from_vs_or_ds) |
| flags |= (1 << 9); |
| if (ctx->mod.feats.inner_coverage) |
| flags |= (1 << 10); |
| if (ctx->mod.feats.stencil_ref) |
| flags |= (1 << 11); |
| if (ctx->mod.feats.tiled_resources) |
| flags |= (1 << 12); |
| if (ctx->mod.feats.typed_uav_load_additional_formats) |
| flags |= (1 << 13); |
| if (ctx->mod.feats.use_64uavs) |
| flags |= (1 << 15); |
| if (ctx->mod.feats.uavs_at_every_stage) |
| flags |= (1 << 16); |
| if (ctx->mod.feats.cs_4x_raw_sb) |
| flags |= (1 << 17); |
| if (ctx->mod.feats.rovs) |
| flags |= (1 << 18); |
| if (ctx->mod.feats.wave_ops) |
| flags |= (1 << 19); |
| if (ctx->mod.feats.int64_ops) |
| flags |= (1 << 20); |
| if (ctx->mod.feats.view_id) |
| flags |= (1 << 21); |
| if (ctx->mod.feats.barycentrics) |
| flags |= (1 << 22); |
| if (ctx->mod.feats.native_low_precision) |
| flags |= (1 << 23) | (1 << 5); |
| if (ctx->mod.feats.shading_rate) |
| flags |= (1 << 24); |
| if (ctx->mod.feats.raytracing_tier_1_1) |
| flags |= (1 << 25); |
| if (ctx->mod.feats.sampler_feedback) |
| flags |= (1 << 26); |
| if (ctx->mod.feats.atomic_int64_typed) |
| flags |= (1 << 27); |
| if (ctx->mod.feats.atomic_int64_tgsm) |
| flags |= (1 << 28); |
| if (ctx->mod.feats.derivatives_in_mesh_or_amp) |
| flags |= (1 << 29); |
| if (ctx->mod.feats.resource_descriptor_heap_indexing) |
| flags |= (1 << 30); |
| if (ctx->mod.feats.sampler_descriptor_heap_indexing) |
| flags |= (1ull << 31); |
| if (ctx->mod.feats.atomic_int64_heap_resource) |
| flags |= (1ull << 32); |
| if (ctx->mod.feats.advanced_texture_ops) |
| flags |= (1ull << 34); |
| if (ctx->mod.feats.writable_msaa) |
| flags |= (1ull << 35); |
| // Bit 36 is wave MMA |
| if (ctx->mod.feats.sample_cmp_bias_gradient) |
| flags |= (1ull << 37); |
| if (ctx->mod.feats.extended_command_info) |
| flags |= (1ull << 38); |
| |
| if (ctx->opts->disable_math_refactoring) |
| flags |= (1 << 1); |
| |
| /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616 |
| * When targeting SM6.7 and with at least one UAV, if no other flags are present, |
| * set the resources-may-not-alias flag, or else the DXIL validator may end up |
| * with uninitialized memory which will fail validation, due to missing that flag. |
| */ |
| if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0) |
| flags |= (1ull << 33); |
| |
| return flags; |
| } |
| |
| static const struct dxil_mdnode * |
| emit_entrypoint(struct ntd_context *ctx, |
| const struct dxil_func *func, const char *name, |
| const struct dxil_mdnode *signatures, |
| const struct dxil_mdnode *resources, |
| const struct dxil_mdnode *shader_props) |
| { |
| char truncated_name[254] = { 0 }; |
| strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1); |
| |
| const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func); |
| const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name); |
| const struct dxil_mdnode *nodes[] = { |
| func_md, |
| name_md, |
| signatures, |
| resources, |
| shader_props |
| }; |
| return dxil_get_metadata_node(&ctx->mod, nodes, |
| ARRAY_SIZE(nodes)); |
| } |
| |
| static const struct dxil_mdnode * |
| emit_resources(struct ntd_context *ctx) |
| { |
| bool emit_resources = false; |
| const struct dxil_mdnode *resources_nodes[] = { |
| NULL, NULL, NULL, NULL |
| }; |
| |
| #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *) |
| |
| if (ctx->srv_metadata_nodes.size) { |
| resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes)); |
| emit_resources = true; |
| } |
| |
| if (ctx->uav_metadata_nodes.size) { |
| resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes)); |
| emit_resources = true; |
| } |
| |
| if (ctx->cbv_metadata_nodes.size) { |
| resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes)); |
| emit_resources = true; |
| } |
| |
| if (ctx->sampler_metadata_nodes.size) { |
| resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes)); |
| emit_resources = true; |
| } |
| |
| #undef ARRAY_AND_SIZE |
| |
| return emit_resources ? |
| dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL; |
| } |
| |
| static bool |
| emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag, |
| const struct dxil_mdnode *value_node) |
| { |
| const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag); |
| if (!tag_node || !value_node) |
| return false; |
| assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2); |
| ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node; |
| ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node; |
| |
| return true; |
| } |
| |
| static bool |
| emit_metadata(struct ntd_context *ctx) |
| { |
| /* DXIL versions are 1.x for shader model 6.x */ |
| assert(ctx->mod.major_version == 6); |
| unsigned dxilMajor = 1; |
| unsigned dxilMinor = ctx->mod.minor_version; |
| unsigned valMajor = ctx->mod.major_validator; |
| unsigned valMinor = ctx->mod.minor_validator; |
| if (!emit_llvm_ident(&ctx->mod) || |
| !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) || |
| !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) || |
| !emit_dx_shader_model(&ctx->mod)) |
| return false; |
| |
| const struct dxil_func_def *main_func_def = ctx->main_func_def; |
| if (!main_func_def) |
| return false; |
| const struct dxil_func *main_func = main_func_def->func; |
| |
| const struct dxil_mdnode *resources_node = emit_resources(ctx); |
| |
| const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func); |
| const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0); |
| |
| const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0); |
| const struct dxil_mdnode *nodes_4_27_27[] = { |
| node4, node27, node27 |
| }; |
| const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27, |
| ARRAY_SIZE(nodes_4_27_27)); |
| |
| const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1); |
| |
| const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1); |
| const struct dxil_mdnode *main_type_annotation_nodes[] = { |
| node3, main_entrypoint, node29 |
| }; |
| const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes, |
| ARRAY_SIZE(main_type_annotation_nodes)); |
| |
| if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx))) |
| return false; |
| } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) { |
| ctx->tess_input_control_point_count = 32; |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { |
| if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) { |
| ctx->tess_input_control_point_count = glsl_array_size(var->type); |
| break; |
| } |
| } |
| |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx))) |
| return false; |
| } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx))) |
| return false; |
| } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx))) |
| return false; |
| if (ctx->mod.minor_version >= 6 && |
| ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) { |
| if (ctx->mod.minor_version < 8) { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx))) |
| return false; |
| } else { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx))) |
| return false; |
| } |
| } |
| } |
| |
| uint64_t flags = get_module_flags(ctx); |
| if (flags != 0) { |
| if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags))) |
| return false; |
| } |
| const struct dxil_mdnode *shader_properties = NULL; |
| if (ctx->num_shader_property_nodes > 0) { |
| shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes, |
| ctx->num_shader_property_nodes); |
| if (!shader_properties) |
| return false; |
| } |
| |
| nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader); |
| const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func, |
| entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties); |
| if (!dx_entry_point) |
| return false; |
| |
| if (resources_node) { |
| const struct dxil_mdnode *dx_resources = resources_node; |
| dxil_add_metadata_named_node(&ctx->mod, "dx.resources", |
| &dx_resources, 1); |
| } |
| |
| if (ctx->mod.minor_version >= 2 && |
| dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) { |
| const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32); |
| if (!i32_type) |
| return false; |
| |
| const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size); |
| if (!array_type) |
| return false; |
| |
| const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size); |
| if (!array_entries) |
| return false; |
| |
| for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i) |
| array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]); |
| const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries); |
| free((void *)array_entries); |
| |
| const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val); |
| if (!view_id_state_val) |
| return false; |
| |
| const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1); |
| |
| dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1); |
| } |
| |
| const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation }; |
| return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations", |
| dx_type_annotations, |
| ARRAY_SIZE(dx_type_annotations)) && |
| dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints", |
| &dx_entry_point, 1); |
| } |
| |
| static const struct dxil_value * |
| bitcast_to_int(struct ntd_context *ctx, unsigned bit_size, |
| const struct dxil_value *value) |
| { |
| const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); |
| if (!type) |
| return NULL; |
| |
| return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); |
| } |
| |
| static const struct dxil_value * |
| bitcast_to_float(struct ntd_context *ctx, unsigned bit_size, |
| const struct dxil_value *value) |
| { |
| const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size); |
| if (!type) |
| return NULL; |
| |
| return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); |
| } |
| |
| static bool |
| is_phi_src(nir_def *ssa) |
| { |
| nir_foreach_use(src, ssa) |
| if (nir_src_parent_instr(src)->type == nir_instr_type_phi) |
| return true; |
| return false; |
| } |
| |
| static void |
| store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan, |
| const struct dxil_value *value) |
| { |
| assert(ssa->index < ctx->num_defs); |
| assert(chan < ssa->num_components); |
| /* Insert bitcasts for phi srcs in the parent block */ |
| if (is_phi_src(ssa)) { |
| /* Prefer ints over floats if it could be both or if we have no type info */ |
| nir_alu_type expect_type = |
| BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int : |
| (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float : |
| nir_type_int); |
| assert(ssa->bit_size != 1 || expect_type == nir_type_int); |
| if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value))) |
| value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, |
| expect_type == nir_type_int ? |
| dxil_module_get_int_type(&ctx->mod, ssa->bit_size) : |
| dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value); |
| if (ssa->bit_size == 64) { |
| if (expect_type == nir_type_int) |
| ctx->mod.feats.int64_ops = true; |
| if (expect_type == nir_type_float) |
| ctx->mod.feats.doubles = true; |
| } |
| } |
| ctx->defs[ssa->index].chans[chan] = value; |
| } |
| |
| static void |
| store_def(struct ntd_context *ctx, nir_def *def, unsigned chan, |
| const struct dxil_value *value) |
| { |
| const struct dxil_type *type = dxil_value_get_type(value); |
| if (type == ctx->mod.float64_type) |
| ctx->mod.feats.doubles = true; |
| if (type == ctx->mod.float16_type || |
| type == ctx->mod.int16_type) |
| ctx->mod.feats.min_precision = true; |
| if (type == ctx->mod.int64_type) |
| ctx->mod.feats.int64_ops = true; |
| store_ssa_def(ctx, def, chan, value); |
| } |
| |
| static void |
| store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan, |
| const struct dxil_value *value) |
| { |
| store_def(ctx, &alu->def, chan, value); |
| } |
| |
| static const struct dxil_value * |
| get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan) |
| { |
| assert(ssa->index < ctx->num_defs); |
| assert(chan < ssa->num_components); |
| assert(ctx->defs[ssa->index].chans[chan]); |
| return ctx->defs[ssa->index].chans[chan]; |
| } |
| |
| static const struct dxil_value * |
| get_src(struct ntd_context *ctx, nir_src *src, unsigned chan, |
| nir_alu_type type) |
| { |
| const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan); |
| |
| const int bit_size = nir_src_bit_size(*src); |
| |
| switch (nir_alu_type_get_base_type(type)) { |
| case nir_type_int: |
| case nir_type_uint: { |
| const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size); |
| /* nohing to do */ |
| if (dxil_value_type_equal_to(value, expect_type)) { |
| assert(bit_size != 64 || ctx->mod.feats.int64_ops); |
| return value; |
| } |
| if (bit_size == 64) { |
| assert(ctx->mod.feats.doubles); |
| ctx->mod.feats.int64_ops = true; |
| } |
| if (bit_size == 16) |
| ctx->mod.feats.native_low_precision = true; |
| assert(dxil_value_type_bitsize_equal_to(value, bit_size)); |
| return bitcast_to_int(ctx, bit_size, value); |
| } |
| |
| case nir_type_float: |
| assert(nir_src_bit_size(*src) >= 16); |
| if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) { |
| assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles); |
| return value; |
| } |
| if (bit_size == 64) { |
| assert(ctx->mod.feats.int64_ops); |
| ctx->mod.feats.doubles = true; |
| } |
| if (bit_size == 16) |
| ctx->mod.feats.native_low_precision = true; |
| assert(dxil_value_type_bitsize_equal_to(value, bit_size)); |
| return bitcast_to_float(ctx, bit_size, value); |
| |
| case nir_type_bool: |
| if (!dxil_value_type_bitsize_equal_to(value, 1)) { |
| return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC, |
| dxil_module_get_int_type(&ctx->mod, 1), value); |
| } |
| return value; |
| |
| default: |
| unreachable("unexpected nir_alu_type"); |
| } |
| } |
| |
| static const struct dxil_value * |
| get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) |
| { |
| unsigned chan = alu->src[src].swizzle[0]; |
| return get_src(ctx, &alu->src[src].src, chan, |
| nir_op_infos[alu->op].input_types[src]); |
| } |
| |
| static bool |
| emit_binop(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_bin_opcode opcode, |
| const struct dxil_value *op0, const struct dxil_value *op1) |
| { |
| bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float; |
| |
| enum dxil_opt_flags flags = 0; |
| if (is_float_op && !alu->exact) |
| flags |= DXIL_UNSAFE_ALGEBRA; |
| |
| const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_shift(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_bin_opcode opcode, |
| const struct dxil_value *op0, const struct dxil_value *op1) |
| { |
| unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src); |
| unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src); |
| |
| uint64_t shift_mask = op0_bit_size - 1; |
| if (!nir_src_is_const(alu->src[1].src)) { |
| if (op0_bit_size != op1_bit_size) { |
| const struct dxil_type *type = |
| dxil_module_get_int_type(&ctx->mod, op0_bit_size); |
| enum dxil_cast_opcode cast_op = |
| op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC; |
| op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1); |
| } |
| op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, |
| op1, |
| dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size), |
| 0); |
| } else { |
| uint64_t val = nir_scalar_as_uint( |
| nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1)); |
| op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size); |
| } |
| |
| const struct dxil_value *v = |
| dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_cmp_pred pred, |
| const struct dxil_value *op0, const struct dxil_value *op1) |
| { |
| const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static enum dxil_cast_opcode |
| get_cast_op(nir_alu_instr *alu) |
| { |
| unsigned dst_bits = alu->def.bit_size; |
| unsigned src_bits = nir_src_bit_size(alu->src[0].src); |
| |
| switch (alu->op) { |
| /* bool -> int */ |
| case nir_op_b2i16: |
| case nir_op_b2i32: |
| case nir_op_b2i64: |
| return DXIL_CAST_ZEXT; |
| |
| /* float -> float */ |
| case nir_op_f2f16_rtz: |
| case nir_op_f2f16: |
| case nir_op_f2fmp: |
| case nir_op_f2f32: |
| case nir_op_f2f64: |
| assert(dst_bits != src_bits); |
| if (dst_bits < src_bits) |
| return DXIL_CAST_FPTRUNC; |
| else |
| return DXIL_CAST_FPEXT; |
| |
| /* int -> int */ |
| case nir_op_i2i1: |
| case nir_op_i2i16: |
| case nir_op_i2imp: |
| case nir_op_i2i32: |
| case nir_op_i2i64: |
| assert(dst_bits != src_bits); |
| if (dst_bits < src_bits) |
| return DXIL_CAST_TRUNC; |
| else |
| return DXIL_CAST_SEXT; |
| |
| /* uint -> uint */ |
| case nir_op_u2u1: |
| case nir_op_u2u16: |
| case nir_op_u2u32: |
| case nir_op_u2u64: |
| assert(dst_bits != src_bits); |
| if (dst_bits < src_bits) |
| return DXIL_CAST_TRUNC; |
| else |
| return DXIL_CAST_ZEXT; |
| |
| /* float -> int */ |
| case nir_op_f2i16: |
| case nir_op_f2imp: |
| case nir_op_f2i32: |
| case nir_op_f2i64: |
| return DXIL_CAST_FPTOSI; |
| |
| /* float -> uint */ |
| case nir_op_f2u16: |
| case nir_op_f2ump: |
| case nir_op_f2u32: |
| case nir_op_f2u64: |
| return DXIL_CAST_FPTOUI; |
| |
| /* int -> float */ |
| case nir_op_i2f16: |
| case nir_op_i2fmp: |
| case nir_op_i2f32: |
| case nir_op_i2f64: |
| return DXIL_CAST_SITOFP; |
| |
| /* uint -> float */ |
| case nir_op_u2f16: |
| case nir_op_u2fmp: |
| case nir_op_u2f32: |
| case nir_op_u2f64: |
| return DXIL_CAST_UITOFP; |
| |
| default: |
| unreachable("unexpected cast op"); |
| } |
| } |
| |
| static const struct dxil_type * |
| get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu) |
| { |
| unsigned dst_bits = alu->def.bit_size; |
| switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) { |
| case nir_type_bool: |
| assert(dst_bits == 1); |
| FALLTHROUGH; |
| case nir_type_int: |
| case nir_type_uint: |
| return dxil_module_get_int_type(&ctx->mod, dst_bits); |
| |
| case nir_type_float: |
| return dxil_module_get_float_type(&ctx->mod, dst_bits); |
| |
| default: |
| unreachable("unknown nir_alu_type"); |
| } |
| } |
| |
| static bool |
| is_double(nir_alu_type alu_type, unsigned bit_size) |
| { |
| return nir_alu_type_get_base_type(alu_type) == nir_type_float && |
| bit_size == 64; |
| } |
| |
| static bool |
| emit_cast(struct ntd_context *ctx, nir_alu_instr *alu, |
| const struct dxil_value *value) |
| { |
| enum dxil_cast_opcode opcode = get_cast_op(alu); |
| const struct dxil_type *type = get_cast_dest_type(ctx, alu); |
| if (!type) |
| return false; |
| |
| const nir_op_info *info = &nir_op_infos[alu->op]; |
| switch (opcode) { |
| case DXIL_CAST_UITOFP: |
| case DXIL_CAST_SITOFP: |
| if (is_double(info->output_type, alu->def.bit_size)) |
| ctx->mod.feats.dx11_1_double_extensions = true; |
| break; |
| case DXIL_CAST_FPTOUI: |
| case DXIL_CAST_FPTOSI: |
| if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src))) |
| ctx->mod.feats.dx11_1_double_extensions = true; |
| break; |
| default: |
| break; |
| } |
| |
| if (alu->def.bit_size == 16) { |
| switch (alu->op) { |
| case nir_op_f2fmp: |
| case nir_op_i2imp: |
| case nir_op_f2imp: |
| case nir_op_f2ump: |
| case nir_op_i2fmp: |
| case nir_op_u2fmp: |
| break; |
| default: |
| ctx->mod.feats.native_low_precision = true; |
| } |
| } |
| |
| const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type, |
| value); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static enum overload_type |
| get_overload(nir_alu_type alu_type, unsigned bit_size) |
| { |
| switch (nir_alu_type_get_base_type(alu_type)) { |
| case nir_type_int: |
| case nir_type_uint: |
| case nir_type_bool: |
| switch (bit_size) { |
| case 1: return DXIL_I1; |
| case 16: return DXIL_I16; |
| case 32: return DXIL_I32; |
| case 64: return DXIL_I64; |
| default: |
| unreachable("unexpected bit_size"); |
| } |
| case nir_type_float: |
| switch (bit_size) { |
| case 16: return DXIL_F16; |
| case 32: return DXIL_F32; |
| case 64: return DXIL_F64; |
| default: |
| unreachable("unexpected bit_size"); |
| } |
| case nir_type_invalid: |
| return DXIL_NONE; |
| default: |
| unreachable("unexpected output type"); |
| } |
| } |
| |
| static enum overload_type |
| get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr, |
| enum overload_type default_type) |
| { |
| if (BITSET_TEST(ctx->int_types, intr->def.index)) |
| return get_overload(nir_type_int, intr->def.bit_size); |
| if (BITSET_TEST(ctx->float_types, intr->def.index)) |
| return get_overload(nir_type_float, intr->def.bit_size); |
| return default_type; |
| } |
| |
| static enum overload_type |
| get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr, |
| nir_alu_type alu_type) |
| { |
| return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size)); |
| } |
| |
| static bool |
| emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_intr intr, const struct dxil_value *op) |
| { |
| const nir_op_info *info = &nir_op_infos[alu->op]; |
| unsigned src_bits = nir_src_bit_size(alu->src[0].src); |
| enum overload_type overload = get_overload(info->input_types[0], src_bits); |
| |
| const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_intr intr, |
| const struct dxil_value *op0, const struct dxil_value *op1) |
| { |
| const nir_op_info *info = &nir_op_infos[alu->op]; |
| assert(info->output_type == info->input_types[0]); |
| assert(info->output_type == info->input_types[1]); |
| unsigned dst_bits = alu->def.bit_size; |
| assert(nir_src_bit_size(alu->src[0].src) == dst_bits); |
| assert(nir_src_bit_size(alu->src[1].src) == dst_bits); |
| enum overload_type overload = get_overload(info->output_type, dst_bits); |
| |
| const struct dxil_value *v = emit_binary_call(ctx, overload, intr, |
| op0, op1); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_intr intr, |
| const struct dxil_value *op0, |
| const struct dxil_value *op1, |
| const struct dxil_value *op2) |
| { |
| const nir_op_info *info = &nir_op_infos[alu->op]; |
| unsigned dst_bits = alu->def.bit_size; |
| assert(nir_src_bit_size(alu->src[0].src) == dst_bits); |
| assert(nir_src_bit_size(alu->src[1].src) == dst_bits); |
| assert(nir_src_bit_size(alu->src[2].src) == dst_bits); |
| |
| assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits)); |
| assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits)); |
| assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits)); |
| |
| enum overload_type overload = get_overload(info->output_type, dst_bits); |
| |
| const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr, |
| op0, op1, op2); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr, |
| enum dxil_intr dxil_intr) |
| { |
| const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float); |
| enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size); |
| const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src); |
| if (!v) |
| return false; |
| store_def(ctx, &intr->def, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu, |
| const struct dxil_value *base, |
| const struct dxil_value *insert, |
| const struct dxil_value *offset, |
| const struct dxil_value *width) |
| { |
| /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */ |
| const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI, |
| width, offset, insert, base); |
| if (!v) |
| return false; |
| |
| /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */ |
| const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE, |
| width, dxil_module_get_int32_const(&ctx->mod, 32)); |
| v = dxil_emit_select(&ctx->mod, compare_width, insert, v); |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu, |
| enum dxil_intr intr, |
| const struct dxil_value *src0, |
| const struct dxil_value *src1, |
| const struct dxil_value *accum) |
| { |
| const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32); |
| if (!f) |
| return false; |
| const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 }; |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs)); |
| if (!v) |
| return false; |
| |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu, |
| const struct dxil_value *sel, |
| const struct dxil_value *val_true, |
| const struct dxil_value *val_false) |
| { |
| assert(sel); |
| assert(val_true); |
| assert(val_false); |
| |
| const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false); |
| if (!v) |
| return false; |
| |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) |
| { |
| assert(val); |
| |
| struct dxil_module *m = &ctx->mod; |
| |
| const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00); |
| const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0); |
| |
| if (!c0 || !c1) |
| return false; |
| |
| return emit_select(ctx, alu, val, c1, c0); |
| } |
| |
| static bool |
| emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) |
| { |
| assert(val); |
| |
| struct dxil_module *m = &ctx->mod; |
| |
| const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f); |
| const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f); |
| |
| if (!c0 || !c1) |
| return false; |
| |
| return emit_select(ctx, alu, val, c1, c0); |
| } |
| |
| static bool |
| emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) |
| { |
| assert(val); |
| |
| struct dxil_module *m = &ctx->mod; |
| |
| const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0); |
| const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0); |
| |
| if (!c0 || !c1) |
| return false; |
| |
| ctx->mod.feats.doubles = 1; |
| return emit_select(ctx, alu, val, c1, c0); |
| } |
| |
| static bool |
| emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift) |
| { |
| if (shift) { |
| val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val, |
| dxil_module_get_int32_const(&ctx->mod, 16), 0); |
| if (!val) |
| return false; |
| } |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, |
| "dx.op.legacyF16ToF32", |
| DXIL_NONE); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| val |
| }; |
| |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v) |
| return false; |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, |
| "dx.op.legacyF32ToF16", |
| DXIL_NONE); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| val0 |
| }; |
| |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v) |
| return false; |
| |
| if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) { |
| args[1] = val1; |
| const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v_high) |
| return false; |
| |
| v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high, |
| dxil_module_get_int32_const(&ctx->mod, 16), 0); |
| if (!v_high) |
| return false; |
| |
| v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0); |
| if (!v) |
| return false; |
| } |
| |
| store_alu_dest(ctx, alu, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs) |
| { |
| for (unsigned i = 0; i < num_inputs; i++) { |
| const struct dxil_value *src = |
| get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]); |
| if (!src) |
| return false; |
| |
| store_alu_dest(ctx, alu, i, src); |
| } |
| return true; |
| } |
| |
| static bool |
| emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[3] = { |
| opcode, |
| get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32), |
| get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32), |
| }; |
| if (!args[1] || !args[2]) |
| return false; |
| |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v) |
| return false; |
| store_def(ctx, &alu->def, 0, v); |
| return true; |
| } |
| |
| static bool |
| emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64) |
| }; |
| if (!args[1]) |
| return false; |
| |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v) |
| return false; |
| |
| const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0); |
| const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1); |
| if (!hi || !lo) |
| return false; |
| |
| store_def(ctx, &alu->def, 0, hi); |
| store_def(ctx, &alu->def, 1, lo); |
| return true; |
| } |
| |
| static bool |
| emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) |
| { |
| /* handle vec-instructions first; they are the only ones that produce |
| * vector results. |
| */ |
| switch (alu->op) { |
| case nir_op_vec2: |
| case nir_op_vec3: |
| case nir_op_vec4: |
| case nir_op_vec8: |
| case nir_op_vec16: |
| return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs); |
| case nir_op_mov: { |
| assert(alu->def.num_components == 1); |
| store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx, |
| alu->src->src.ssa, alu->src->swizzle[0])); |
| return true; |
| } |
| case nir_op_pack_double_2x32_dxil: |
| return emit_make_double(ctx, alu); |
| case nir_op_unpack_double_2x32_dxil: |
| return emit_split_double(ctx, alu); |
| case nir_op_bcsel: { |
| /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats. |
| * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */ |
| const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]); |
| nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1)); |
| return emit_select(ctx, alu, |
| get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool), |
| src1, |
| get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type)); |
| } |
| default: |
| /* silence warnings */ |
| ; |
| } |
| |
| /* other ops should be scalar */ |
| const struct dxil_value *src[4]; |
| assert(nir_op_infos[alu->op].num_inputs <= 4); |
| for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { |
| src[i] = get_alu_src(ctx, alu, i); |
| if (!src[i]) |
| return false; |
| } |
| |
| switch (alu->op) { |
| case nir_op_iadd: |
| case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]); |
| |
| case nir_op_isub: |
| case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]); |
| |
| case nir_op_imul: |
| case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]); |
| |
| case nir_op_fdiv: |
| if (alu->def.bit_size == 64) |
| ctx->mod.feats.dx11_1_double_extensions = 1; |
| return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]); |
| |
| case nir_op_idiv: |
| case nir_op_udiv: |
| if (nir_src_is_const(alu->src[1].src)) { |
| /* It's illegal to emit a literal divide by 0 in DXIL */ |
| nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1); |
| if (nir_scalar_as_int(divisor) == 0) { |
| store_alu_dest(ctx, alu, 0, |
| dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size)); |
| return true; |
| } |
| } |
| return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]); |
| |
| case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]); |
| case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); |
| case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); |
| case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]); |
| case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]); |
| case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]); |
| case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); |
| case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); |
| case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); |
| case nir_op_inot: { |
| unsigned bit_size = alu->def.bit_size; |
| intmax_t val = bit_size == 1 ? 1 : -1; |
| const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size); |
| return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one); |
| } |
| case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); |
| case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); |
| case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]); |
| case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]); |
| case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]); |
| case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]); |
| case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]); |
| case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]); |
| case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]); |
| case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]); |
| case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]); |
| case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]); |
| case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]); |
| case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]); |
| case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]); |
| case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]); |
| case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]); |
| case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]); |
| case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]); |
| case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]); |
| case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]); |
| |
| case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]); |
| case nir_op_frcp: { |
| const struct dxil_value *one; |
| switch (alu->def.bit_size) { |
| case 16: |
| one = dxil_module_get_float16_const(&ctx->mod, 0x3C00); |
| break; |
| case 32: |
| one = dxil_module_get_float_const(&ctx->mod, 1.0f); |
| break; |
| case 64: |
| one = dxil_module_get_double_const(&ctx->mod, 1.0); |
| break; |
| default: unreachable("Invalid float size"); |
| } |
| return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]); |
| } |
| case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]); |
| case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]); |
| case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]); |
| case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]); |
| case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]); |
| case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]); |
| case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]); |
| case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]); |
| case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]); |
| case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]); |
| case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]); |
| case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); |
| case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); |
| case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); |
| case nir_op_ffma: |
| if (alu->def.bit_size == 64) |
| ctx->mod.feats.dx11_1_double_extensions = 1; |
| return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); |
| |
| case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]); |
| case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]); |
| case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]); |
| |
| case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false); |
| case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true); |
| case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]); |
| |
| case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]); |
| case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]); |
| |
| case nir_op_i2i1: |
| case nir_op_u2u1: |
| case nir_op_b2i16: |
| case nir_op_i2i16: |
| case nir_op_i2imp: |
| case nir_op_f2i16: |
| case nir_op_f2imp: |
| case nir_op_f2u16: |
| case nir_op_f2ump: |
| case nir_op_u2u16: |
| case nir_op_u2f16: |
| case nir_op_u2fmp: |
| case nir_op_i2f16: |
| case nir_op_i2fmp: |
| case nir_op_f2f16_rtz: |
| case nir_op_f2f16: |
| case nir_op_f2fmp: |
| case nir_op_b2i32: |
| case nir_op_f2f32: |
| case nir_op_f2i32: |
| case nir_op_f2u32: |
| case nir_op_i2f32: |
| case nir_op_i2i32: |
| case nir_op_u2f32: |
| case nir_op_u2u32: |
| case nir_op_b2i64: |
| case nir_op_f2f64: |
| case nir_op_f2i64: |
| case nir_op_f2u64: |
| case nir_op_i2f64: |
| case nir_op_i2i64: |
| case nir_op_u2f64: |
| case nir_op_u2u64: |
| return emit_cast(ctx, alu, src[0]); |
| |
| case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]); |
| case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]); |
| case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]); |
| default: |
| log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction", |
| &alu->instr); |
| return false; |
| } |
| } |
| |
| static const struct dxil_value * |
| load_ubo(struct ntd_context *ctx, const struct dxil_value *handle, |
| const struct dxil_value *offset, enum overload_type overload) |
| { |
| assert(handle && offset); |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY); |
| if (!opcode) |
| return NULL; |
| |
| const struct dxil_value *args[] = { |
| opcode, handle, offset |
| }; |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload); |
| if (!func) |
| return NULL; |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope) |
| { |
| const struct dxil_value *opcode, *mode; |
| const struct dxil_func *func; |
| uint32_t flags = 0; |
| |
| if (execution_scope == SCOPE_WORKGROUP) |
| flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP; |
| |
| bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER; |
| |
| if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) && |
| (mem_scope > SCOPE_WORKGROUP || !is_compute)) { |
| flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL; |
| } else { |
| flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP; |
| } |
| |
| if ((modes & nir_var_mem_shared) && is_compute) |
| flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE; |
| |
| func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER); |
| if (!opcode) |
| return false; |
| |
| mode = dxil_module_get_int32_const(&ctx->mod, flags); |
| if (!mode) |
| return false; |
| |
| const struct dxil_value *args[] = { opcode, mode }; |
| |
| return dxil_emit_call_void(&ctx->mod, func, |
| args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| return emit_barrier_impl(ctx, |
| nir_intrinsic_memory_modes(intr), |
| nir_intrinsic_execution_scope(intr), |
| nir_intrinsic_memory_scope(intr)); |
| } |
| |
| static bool |
| emit_load_global_invocation_id(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr) |
| { |
| nir_component_mask_t comps = nir_def_components_read(&intr->def); |
| |
| for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { |
| if (comps & (1 << i)) { |
| const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); |
| if (!idx) |
| return false; |
| const struct dxil_value *globalid = emit_threadid_call(ctx, idx); |
| |
| if (!globalid) |
| return false; |
| |
| store_def(ctx, &intr->def, i, globalid); |
| } |
| } |
| return true; |
| } |
| |
| static bool |
| emit_load_local_invocation_id(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr) |
| { |
| nir_component_mask_t comps = nir_def_components_read(&intr->def); |
| |
| for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { |
| if (comps & (1 << i)) { |
| const struct dxil_value |
| *idx = dxil_module_get_int32_const(&ctx->mod, i); |
| if (!idx) |
| return false; |
| const struct dxil_value |
| *threadidingroup = emit_threadidingroup_call(ctx, idx); |
| if (!threadidingroup) |
| return false; |
| store_def(ctx, &intr->def, i, threadidingroup); |
| } |
| } |
| return true; |
| } |
| |
| static bool |
| emit_load_local_invocation_index(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value |
| *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx); |
| if (!flattenedthreadidingroup) |
| return false; |
| store_def(ctx, &intr->def, 0, flattenedthreadidingroup); |
| |
| return true; |
| } |
| |
| static bool |
| emit_load_local_workgroup_id(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr) |
| { |
| nir_component_mask_t comps = nir_def_components_read(&intr->def); |
| |
| for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { |
| if (comps & (1 << i)) { |
| const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); |
| if (!idx) |
| return false; |
| const struct dxil_value *groupid = emit_groupid_call(ctx, idx); |
| if (!groupid) |
| return false; |
| store_def(ctx, &intr->def, i, groupid); |
| } |
| } |
| return true; |
| } |
| |
| static const struct dxil_value * |
| call_unary_external_function(struct ntd_context *ctx, |
| const char *name, |
| int32_t dxil_intr, |
| enum overload_type overload) |
| { |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, name, overload); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = |
| dxil_module_get_int32_const(&ctx->mod, dxil_intr); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = {opcode}; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_load_unary_external_function(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr, const char *name, |
| int32_t dxil_intr, |
| nir_alu_type type) |
| { |
| const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr, |
| get_overload(type, intr->def.bit_size)); |
| store_def(ctx, &intr->def, 0, value); |
| |
| return true; |
| } |
| |
| static bool |
| emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *value = call_unary_external_function(ctx, |
| "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32); |
| |
| /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */ |
| if (ctx->mod.info.has_per_sample_input) { |
| value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value, |
| dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, |
| dxil_module_get_int32_const(&ctx->mod, 1), |
| call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0); |
| } |
| |
| store_def(ctx, &intr->def, 0, value); |
| return true; |
| } |
| |
| static bool |
| emit_load_tess_coord(struct ntd_context *ctx, |
| nir_intrinsic_instr *intr) |
| { |
| const struct dxil_func *func = |
| dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION); |
| if (!opcode) |
| return false; |
| |
| unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2; |
| for (unsigned i = 0; i < num_coords; ++i) { |
| unsigned component_idx = i; |
| |
| const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx); |
| if (!component) |
| return false; |
| |
| const struct dxil_value *args[] = { opcode, component }; |
| |
| const struct dxil_value *value = |
| dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| store_def(ctx, &intr->def, i, value); |
| } |
| |
| for (unsigned i = num_coords; i < intr->def.num_components; ++i) { |
| const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f); |
| store_def(ctx, &intr->def, i, value); |
| } |
| |
| return true; |
| } |
| |
| static const struct dxil_value * |
| get_int32_undef(struct dxil_module *m) |
| { |
| const struct dxil_type *int32_type = |
| dxil_module_get_int_type(m, 32); |
| if (!int32_type) |
| return NULL; |
| |
| return dxil_module_get_undef(m, int32_type); |
| } |
| |
| static const struct dxil_value * |
| get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, |
| enum dxil_resource_kind kind) |
| { |
| /* This source might be one of: |
| * 1. Constant resource index - just look it up in precomputed handle arrays |
| * If it's null in that array, create a handle |
| * 2. A handle from load_vulkan_descriptor - just get the stored SSA value |
| * 3. Dynamic resource index - create a handle for it here |
| */ |
| assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32); |
| nir_const_value *const_block_index = nir_src_as_const_value(*src); |
| const struct dxil_value *handle_entry = NULL; |
| if (const_block_index) { |
| assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN); |
| switch (kind) { |
| case DXIL_RESOURCE_KIND_CBUFFER: |
| handle_entry = ctx->cbv_handles[const_block_index->u32]; |
| break; |
| case DXIL_RESOURCE_KIND_RAW_BUFFER: |
| if (class == DXIL_RESOURCE_CLASS_UAV) |
| handle_entry = ctx->ssbo_handles[const_block_index->u32]; |
| else |
| handle_entry = ctx->srv_handles[const_block_index->u32]; |
| break; |
| case DXIL_RESOURCE_KIND_SAMPLER: |
| handle_entry = ctx->sampler_handles[const_block_index->u32]; |
| break; |
| default: |
| if (class == DXIL_RESOURCE_CLASS_UAV) |
| handle_entry = ctx->image_handles[const_block_index->u32]; |
| else |
| handle_entry = ctx->srv_handles[const_block_index->u32]; |
| break; |
| } |
| } |
| |
| if (handle_entry) |
| return handle_entry; |
| |
| if (nir_src_as_deref(*src) || |
| ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| return get_src_ssa(ctx, src->ssa, 0); |
| } |
| |
| unsigned space = 0; |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_GL && |
| class == DXIL_RESOURCE_CLASS_UAV) { |
| if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER) |
| space = 2; |
| else |
| space = 1; |
| } |
| |
| /* The base binding here will almost always be zero. The only cases where we end |
| * up in this type of dynamic indexing are: |
| * 1. GL UBOs |
| * 2. GL SSBOs |
| * 3. CL SSBOs |
| * In all cases except GL UBOs, the resources are a single zero-based array. |
| * In that case, the base is 1, because uniforms use 0 and cannot by dynamically |
| * indexed. All other cases should either fall into static indexing (first early return), |
| * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or |
| * load_vulkan_descriptor handle creation. |
| */ |
| unsigned base_binding = 0; |
| if (ctx->shader->info.first_ubo_is_default_ubo && |
| class == DXIL_RESOURCE_CLASS_CBV) |
| base_binding = 1; |
| |
| const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint); |
| const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class, |
| space, base_binding, value, !const_block_index); |
| |
| return handle; |
| } |
| |
| static const struct dxil_value * |
| create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr) |
| { |
| const struct dxil_value *unannotated_handle = |
| emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/); |
| const struct dxil_value *res_props = |
| dxil_module_get_uav_res_props_const(&ctx->mod, image_intr); |
| |
| if (!unannotated_handle || !res_props) |
| return NULL; |
| |
| return emit_annotate_handle(ctx, unannotated_handle, res_props); |
| } |
| |
| static const struct dxil_value * |
| create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src) |
| { |
| const struct dxil_value *unannotated_handle = |
| emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/); |
| const struct dxil_value *res_props = |
| dxil_module_get_srv_res_props_const(&ctx->mod, tex); |
| |
| if (!unannotated_handle || !res_props) |
| return NULL; |
| |
| return emit_annotate_handle(ctx, unannotated_handle, res_props); |
| } |
| |
| static const struct dxil_value * |
| create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src) |
| { |
| const struct dxil_value *unannotated_handle = |
| emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/); |
| const struct dxil_value *res_props = |
| dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow); |
| |
| if (!unannotated_handle || !res_props) |
| return NULL; |
| |
| return emit_annotate_handle(ctx, unannotated_handle, res_props); |
| } |
| |
| static bool |
| emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| |
| enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); |
| if (var && var->data.access & ACCESS_NON_WRITEABLE) |
| class = DXIL_RESOURCE_CLASS_SRV; |
| } |
| |
| const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); |
| const struct dxil_value *offset = |
| get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| if (!int32_undef || !handle || !offset) |
| return false; |
| |
| assert(nir_src_bit_size(intr->src[0]) == 32); |
| assert(nir_intrinsic_dest_components(intr) <= 4); |
| |
| const struct dxil_value *coord[2] = { |
| offset, |
| int32_undef |
| }; |
| |
| enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint); |
| const struct dxil_value *load = ctx->mod.minor_version >= 2 ? |
| emit_raw_bufferload_call(ctx, handle, coord, |
| overload, |
| nir_intrinsic_dest_components(intr), |
| intr->def.bit_size / 8) : |
| emit_bufferload_call(ctx, handle, coord, overload); |
| if (!load) |
| return false; |
| |
| for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { |
| const struct dxil_value *val = |
| dxil_emit_extractval(&ctx->mod, load, i); |
| if (!val) |
| return false; |
| store_def(ctx, &intr->def, i, val); |
| } |
| if (intr->def.bit_size == 16) |
| ctx->mod.feats.native_low_precision = true; |
| return true; |
| } |
| |
| static bool |
| emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); |
| const struct dxil_value *offset = |
| get_src(ctx, &intr->src[2], 0, nir_type_uint); |
| if (!handle || !offset) |
| return false; |
| |
| unsigned num_components = nir_src_num_components(intr->src[0]); |
| assert(num_components <= 4); |
| if (nir_src_bit_size(intr->src[0]) == 16) |
| ctx->mod.feats.native_low_precision = true; |
| |
| nir_alu_type type = |
| dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0))); |
| const struct dxil_value *value[4] = { 0 }; |
| for (unsigned i = 0; i < num_components; ++i) { |
| value[i] = get_src(ctx, &intr->src[0], i, type); |
| if (!value[i]) |
| return false; |
| } |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[2] = { |
| offset, |
| int32_undef |
| }; |
| |
| enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size); |
| if (num_components < 4) { |
| const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0])); |
| if (!value_undef) |
| return false; |
| |
| for (int i = num_components; i < 4; ++i) |
| value[i] = value_undef; |
| } |
| |
| const struct dxil_value *write_mask = |
| dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); |
| if (!write_mask) |
| return false; |
| |
| return ctx->mod.minor_version >= 2 ? |
| emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) : |
| emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); |
| } |
| |
| static bool |
| emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); |
| const struct dxil_value *offset = |
| get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| |
| if (!handle || !offset) |
| return false; |
| |
| enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint); |
| const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload); |
| if (!agg) |
| return false; |
| |
| unsigned first_component = nir_intrinsic_has_component(intr) ? |
| nir_intrinsic_component(intr) : 0; |
| for (unsigned i = 0; i < intr->def.num_components; i++) |
| store_def(ctx, &intr->def, i, |
| dxil_emit_extractval(&ctx->mod, agg, i + first_component)); |
| |
| if (intr->def.bit_size == 16) |
| ctx->mod.feats.native_low_precision = true; |
| return true; |
| } |
| |
| /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique |
| * between control points and patch variables in HS/DS |
| */ |
| static nir_variable * |
| find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch) |
| { |
| nir_foreach_variable_with_modes(var, s, mode) { |
| if (var->data.driver_location == driver_location && |
| var->data.patch == patch) |
| return var; |
| } |
| return NULL; |
| } |
| |
| static bool |
| emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| assert(intr->intrinsic == nir_intrinsic_store_output || |
| ctx->mod.shader_kind == DXIL_HULL_SHADER); |
| bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output && |
| ctx->mod.shader_kind == DXIL_HULL_SHADER; |
| nir_alu_type out_type = nir_intrinsic_src_type(intr); |
| enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size); |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ? |
| "dx.op.storePatchConstant" : "dx.op.storeOutput", |
| overload); |
| |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ? |
| DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT); |
| uint8_t *io_mappings = is_patch_constant ? ctx->mod.patch_mappings : ctx->mod.output_mappings; |
| uint8_t io_index = io_mappings[nir_intrinsic_base(intr)]; |
| const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, io_index); |
| unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2; |
| |
| /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature |
| * generation, so muck with them here too. |
| */ |
| nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); |
| bool is_tess_level = is_patch_constant && |
| (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || |
| semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); |
| |
| const struct dxil_value *row = NULL; |
| const struct dxil_value *col = NULL; |
| if (is_tess_level) |
| col = dxil_module_get_int8_const(&ctx->mod, 0); |
| else |
| row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); |
| |
| bool success = true; |
| uint32_t writemask = nir_intrinsic_write_mask(intr); |
| |
| nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant); |
| unsigned var_base_component = var->data.location_frac; |
| unsigned base_component = nir_intrinsic_component(intr) - var_base_component; |
| |
| if (ctx->mod.minor_validator >= 5) { |
| struct dxil_signature_record *sig_rec = is_patch_constant ? |
| &ctx->mod.patch_consts[io_index] : |
| &ctx->mod.outputs[io_index]; |
| unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1; |
| unsigned comp_mask = 0; |
| if (is_tess_level) |
| comp_mask = 1; |
| else if (comp_size == 1) |
| comp_mask = writemask << var_base_component; |
| else { |
| for (unsigned i = 0; i < intr->num_components; ++i) |
| if ((writemask & (1 << i))) |
| comp_mask |= 3 << ((i + var_base_component) * comp_size); |
| } |
| for (unsigned r = 0; r < sig_rec->num_elements; ++r) |
| sig_rec->elements[r].never_writes_mask &= ~comp_mask; |
| |
| if (!nir_src_is_const(intr->src[row_index])) { |
| struct dxil_psv_signature_element *psv_rec = is_patch_constant ? |
| &ctx->mod.psv_patch_consts[io_index] : |
| &ctx->mod.psv_outputs[io_index]; |
| psv_rec->dynamic_mask_and_stream |= comp_mask; |
| } |
| } |
| |
| for (unsigned i = 0; i < intr->num_components && success; ++i) { |
| if (writemask & (1 << i)) { |
| if (is_tess_level) |
| row = dxil_module_get_int32_const(&ctx->mod, i + base_component); |
| else |
| col = dxil_module_get_int8_const(&ctx->mod, i + base_component); |
| const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type); |
| if (!col || !row || !value) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, output_id, row, col, value |
| }; |
| success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| } |
| |
| return success; |
| } |
| |
| static bool |
| emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| bool attr_at_vertex = false; |
| if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER && |
| ctx->opts->interpolate_at_vertex && |
| ctx->opts->provoking_vertex != 0 && |
| (nir_intrinsic_dest_type(intr) & nir_type_float)) { |
| nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr)); |
| |
| attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT; |
| } |
| |
| bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER && |
| intr->intrinsic == nir_intrinsic_load_input) || |
| (ctx->mod.shader_kind == DXIL_HULL_SHADER && |
| intr->intrinsic == nir_intrinsic_load_output); |
| bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output; |
| |
| unsigned opcode_val; |
| const char *func_name; |
| if (attr_at_vertex) { |
| opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX; |
| func_name = "dx.op.attributeAtVertex"; |
| if (ctx->mod.minor_validator >= 6) |
| ctx->mod.feats.barycentrics = 1; |
| } else if (is_patch_constant) { |
| opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT; |
| func_name = "dx.op.loadPatchConstant"; |
| } else if (is_output_control_point) { |
| opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT; |
| func_name = "dx.op.loadOutputControlPoint"; |
| } else { |
| opcode_val = DXIL_INTR_LOAD_INPUT; |
| func_name = "dx.op.loadInput"; |
| } |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val); |
| if (!opcode) |
| return false; |
| |
| uint8_t *io_mappings = |
| is_patch_constant ? ctx->mod.patch_mappings : |
| is_output_control_point ? ctx->mod.output_mappings : |
| ctx->mod.input_mappings; |
| uint8_t io_index = io_mappings[nir_intrinsic_base(intr)]; |
| const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, io_index); |
| if (!input_id) |
| return false; |
| |
| bool is_per_vertex = |
| intr->intrinsic == nir_intrinsic_load_per_vertex_input || |
| intr->intrinsic == nir_intrinsic_load_per_vertex_output; |
| int row_index = is_per_vertex ? 1 : 0; |
| const struct dxil_value *vertex_id = NULL; |
| if (!is_patch_constant) { |
| if (is_per_vertex) { |
| vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int); |
| } else if (attr_at_vertex) { |
| vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex); |
| } else { |
| const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); |
| if (!int32_type) |
| return false; |
| |
| vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); |
| } |
| if (!vertex_id) |
| return false; |
| } |
| |
| /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature |
| * generation, so muck with them here too. |
| */ |
| nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); |
| bool is_tess_level = is_patch_constant && |
| (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || |
| semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); |
| |
| const struct dxil_value *row = NULL; |
| const struct dxil_value *comp = NULL; |
| if (is_tess_level) |
| comp = dxil_module_get_int8_const(&ctx->mod, 0); |
| else |
| row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); |
| |
| nir_alu_type out_type = nir_intrinsic_dest_type(intr); |
| enum overload_type overload = get_overload(out_type, intr->def.bit_size); |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload); |
| |
| if (!func) |
| return false; |
| |
| nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant); |
| unsigned var_base_component = var ? var->data.location_frac : 0; |
| unsigned base_component = nir_intrinsic_component(intr) - var_base_component; |
| |
| if (ctx->mod.minor_validator >= 5 && |
| !is_output_control_point && |
| intr->intrinsic != nir_intrinsic_load_output) { |
| struct dxil_signature_record *sig_rec = is_patch_constant ? |
| &ctx->mod.patch_consts[io_index] : |
| &ctx->mod.inputs[io_index]; |
| unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1; |
| unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1; |
| comp_mask <<= (var_base_component * comp_size); |
| if (is_tess_level) |
| comp_mask = 1; |
| for (unsigned r = 0; r < sig_rec->num_elements; ++r) |
| sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask); |
| |
| if (!nir_src_is_const(intr->src[row_index])) { |
| struct dxil_psv_signature_element *psv_rec = is_patch_constant ? |
| &ctx->mod.psv_patch_consts[io_index] : |
| &ctx->mod.psv_inputs[io_index]; |
| psv_rec->dynamic_mask_and_stream |= comp_mask; |
| } |
| } |
| |
| for (unsigned i = 0; i < intr->num_components; ++i) { |
| if (is_tess_level) |
| row = dxil_module_get_int32_const(&ctx->mod, i + base_component); |
| else |
| comp = dxil_module_get_int8_const(&ctx->mod, i + base_component); |
| |
| if (!row || !comp) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, input_id, row, comp, vertex_id |
| }; |
| |
| unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0); |
| const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); |
| if (!retval) |
| return false; |
| store_def(ctx, &intr->def, i, retval); |
| } |
| return true; |
| } |
| |
| static bool |
| emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]); |
| |
| const struct dxil_value *args[6] = { 0 }; |
| |
| unsigned opcode_val; |
| const char *func_name; |
| unsigned num_args; |
| switch (barycentric->intrinsic) { |
| case nir_intrinsic_load_barycentric_at_offset: |
| opcode_val = DXIL_INTR_EVAL_SNAPPED; |
| func_name = "dx.op.evalSnapped"; |
| num_args = 6; |
| for (unsigned i = 0; i < 2; ++i) { |
| const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float); |
| /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */ |
| const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod, |
| DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0); |
| args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI, |
| dxil_module_get_int_type(&ctx->mod, 32), offset_16); |
| } |
| break; |
| case nir_intrinsic_load_barycentric_pixel: |
| opcode_val = DXIL_INTR_EVAL_SNAPPED; |
| func_name = "dx.op.evalSnapped"; |
| num_args = 6; |
| args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0); |
| break; |
| case nir_intrinsic_load_barycentric_at_sample: |
| opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX; |
| func_name = "dx.op.evalSampleIndex"; |
| num_args = 5; |
| args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int); |
| break; |
| case nir_intrinsic_load_barycentric_centroid: |
| opcode_val = DXIL_INTR_EVAL_CENTROID; |
| func_name = "dx.op.evalCentroid"; |
| num_args = 4; |
| break; |
| default: |
| unreachable("Unsupported interpolation barycentric intrinsic"); |
| } |
| uint8_t io_index = ctx->mod.input_mappings[nir_intrinsic_base(intr)]; |
| args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val); |
| args[1] = dxil_module_get_int32_const(&ctx->mod, io_index); |
| args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int); |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32); |
| |
| if (!func) |
| return false; |
| |
| nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false); |
| unsigned var_base_component = var ? var->data.location_frac : 0; |
| unsigned base_component = nir_intrinsic_component(intr) - var_base_component; |
| |
| if (ctx->mod.minor_validator >= 5) { |
| struct dxil_signature_record *sig_rec = &ctx->mod.inputs[io_index]; |
| unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1; |
| unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1; |
| comp_mask <<= (var_base_component * comp_size); |
| for (unsigned r = 0; r < sig_rec->num_elements; ++r) |
| sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask); |
| |
| if (!nir_src_is_const(intr->src[1])) { |
| struct dxil_psv_signature_element *psv_rec = &ctx->mod.psv_inputs[io_index]; |
| psv_rec->dynamic_mask_and_stream |= comp_mask; |
| } |
| } |
| |
| for (unsigned i = 0; i < intr->num_components; ++i) { |
| args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component); |
| |
| const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); |
| if (!retval) |
| return false; |
| store_def(ctx, &intr->def, i, retval); |
| } |
| return true; |
| } |
| |
| static const struct dxil_value * |
| deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref) |
| { |
| nir_deref_path path; |
| nir_deref_path_init(&path, deref, ctx->ralloc_ctx); |
| assert(path.path[0]->deref_type == nir_deref_type_var); |
| uint32_t count = 0; |
| while (path.path[count]) |
| ++count; |
| |
| const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx, |
| const struct dxil_value *, |
| count + 1); |
| nir_variable *var = path.path[0]->var; |
| const struct dxil_value **var_array; |
| switch (deref->modes) { |
| case nir_var_mem_constant: var_array = ctx->consts; break; |
| case nir_var_mem_shared: var_array = ctx->sharedvars; break; |
| case nir_var_function_temp: var_array = ctx->scratchvars; break; |
| default: unreachable("Invalid deref mode"); |
| } |
| gep_indices[0] = var_array[var->data.driver_location]; |
| |
| for (uint32_t i = 0; i < count; ++i) |
| gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0); |
| |
| return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1); |
| } |
| |
| static bool |
| emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0])); |
| if (!ptr) |
| return false; |
| |
| const struct dxil_value *retval = |
| dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false); |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); |
| const struct dxil_value *ptr = deref_to_gep(ctx, deref); |
| if (!ptr) |
| return false; |
| |
| const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type)); |
| return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false); |
| } |
| |
| static bool |
| emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0])); |
| if (!ptr) |
| return false; |
| |
| const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| if (!value) |
| return false; |
| |
| enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr)); |
| const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false, |
| DXIL_ATOMIC_ORDERING_ACQREL, |
| DXIL_SYNC_SCOPE_CROSSTHREAD); |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0])); |
| if (!ptr) |
| return false; |
| |
| const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint); |
| if (!value) |
| return false; |
| |
| const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false, |
| DXIL_ATOMIC_ORDERING_ACQREL, |
| DXIL_SYNC_SCOPE_CROSSTHREAD); |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| value |
| }; |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool); |
| if (!value) |
| return false; |
| |
| return emit_discard_if_with_value(ctx, value); |
| } |
| |
| static bool |
| emit_discard(struct ntd_context *ctx) |
| { |
| const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true); |
| return emit_discard_if_with_value(ctx, value); |
| } |
| |
| static bool |
| emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM); |
| const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); |
| if (!opcode || !stream_id) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| stream_id |
| }; |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM); |
| const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); |
| if (!opcode || !stream_id) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| stream_id |
| }; |
| |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ? |
| create_image_handle(ctx, intr) : |
| get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); |
| if (!handle) |
| return false; |
| |
| bool is_array = false; |
| if (intr->intrinsic == nir_intrinsic_image_deref_store) |
| is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); |
| else |
| is_array = nir_intrinsic_image_array(intr); |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; |
| enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ? |
| glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) : |
| nir_intrinsic_image_dim(intr); |
| unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); |
| if (is_array) |
| ++num_coords; |
| |
| assert(num_coords <= nir_src_num_components(intr->src[1])); |
| for (unsigned i = 0; i < num_coords; ++i) { |
| coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); |
| if (!coord[i]) |
| return false; |
| } |
| |
| nir_alu_type in_type = nir_intrinsic_src_type(intr); |
| enum overload_type overload = get_overload(in_type, 32); |
| |
| assert(nir_src_bit_size(intr->src[3]) == 32); |
| unsigned num_components = nir_src_num_components(intr->src[3]); |
| assert(num_components <= 4); |
| const struct dxil_value *value[4]; |
| for (unsigned i = 0; i < num_components; ++i) { |
| value[i] = get_src(ctx, &intr->src[3], i, in_type); |
| if (!value[i]) |
| return false; |
| } |
| |
| for (int i = num_components; i < 4; ++i) |
| value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0])); |
| |
| const struct dxil_value *write_mask = |
| dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); |
| if (!write_mask) |
| return false; |
| |
| if (image_dim == GLSL_SAMPLER_DIM_BUF) { |
| coord[1] = int32_undef; |
| return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); |
| } else |
| return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload); |
| } |
| |
| static bool |
| emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ? |
| create_image_handle(ctx, intr) : |
| get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); |
| if (!handle) |
| return false; |
| |
| bool is_array = false; |
| if (intr->intrinsic == nir_intrinsic_image_deref_load) |
| is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); |
| else |
| is_array = nir_intrinsic_image_array(intr); |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; |
| enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ? |
| glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) : |
| nir_intrinsic_image_dim(intr); |
| unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); |
| if (is_array) |
| ++num_coords; |
| |
| assert(num_coords <= nir_src_num_components(intr->src[1])); |
| for (unsigned i = 0; i < num_coords; ++i) { |
| coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); |
| if (!coord[i]) |
| return false; |
| } |
| |
| nir_alu_type out_type = nir_intrinsic_dest_type(intr); |
| enum overload_type overload = get_overload(out_type, 32); |
| |
| const struct dxil_value *load_result; |
| if (image_dim == GLSL_SAMPLER_DIM_BUF) { |
| coord[1] = int32_undef; |
| load_result = emit_bufferload_call(ctx, handle, coord, overload); |
| } else |
| load_result = emit_textureload_call(ctx, handle, coord, overload); |
| |
| if (!load_result) |
| return false; |
| |
| assert(intr->def.bit_size == 32); |
| unsigned num_components = intr->def.num_components; |
| assert(num_components <= 4); |
| for (unsigned i = 0; i < num_components; ++i) { |
| const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i); |
| if (!component) |
| return false; |
| store_def(ctx, &intr->def, i, component); |
| } |
| |
| if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1) |
| ctx->mod.feats.typed_uav_load_additional_formats = true; |
| |
| return true; |
| } |
| |
| static bool |
| emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ? |
| create_image_handle(ctx, intr) : |
| get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); |
| if (!handle) |
| return false; |
| |
| bool is_array = false; |
| if (intr->intrinsic == nir_intrinsic_image_deref_atomic) |
| is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); |
| else |
| is_array = nir_intrinsic_image_array(intr); |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; |
| enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ? |
| glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) : |
| nir_intrinsic_image_dim(intr); |
| unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); |
| if (is_array) |
| ++num_coords; |
| |
| assert(num_coords <= nir_src_num_components(intr->src[1])); |
| for (unsigned i = 0; i < num_coords; ++i) { |
| coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); |
| if (!coord[i]) |
| return false; |
| } |
| |
| nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr); |
| enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op); |
| nir_alu_type type = nir_atomic_op_type(nir_op); |
| const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type); |
| if (!value) |
| return false; |
| |
| const struct dxil_value *retval = |
| emit_atomic_binop(ctx, handle, dxil_op, coord, value); |
| |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ? |
| create_image_handle(ctx, intr) : |
| get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); |
| if (!handle) |
| return false; |
| |
| bool is_array = false; |
| if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) |
| is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); |
| else |
| is_array = nir_intrinsic_image_array(intr); |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; |
| enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ? |
| glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) : |
| nir_intrinsic_image_dim(intr); |
| unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); |
| if (is_array) |
| ++num_coords; |
| |
| assert(num_coords <= nir_src_num_components(intr->src[1])); |
| for (unsigned i = 0; i < num_coords; ++i) { |
| coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); |
| if (!coord[i]) |
| return false; |
| } |
| |
| const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint); |
| const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint); |
| if (!cmpval || !newval) |
| return false; |
| |
| const struct dxil_value *retval = |
| emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); |
| |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| struct texop_parameters { |
| const struct dxil_value *tex; |
| const struct dxil_value *sampler; |
| const struct dxil_value *bias, *lod_or_sample, *min_lod; |
| const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3]; |
| const struct dxil_value *cmp; |
| enum overload_type overload; |
| }; |
| |
| static const struct dxil_value * |
| emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE), |
| params->tex, |
| params->lod_or_sample |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static bool |
| emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ? |
| create_image_handle(ctx, intr) : |
| get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); |
| if (!handle) |
| return false; |
| |
| enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ? |
| glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) : |
| nir_intrinsic_image_dim(intr); |
| const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ? |
| dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) : |
| get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| if (!lod) |
| return false; |
| |
| struct texop_parameters params = { |
| .tex = handle, |
| .lod_or_sample = lod |
| }; |
| const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); |
| if (!dimensions) |
| return false; |
| |
| for (unsigned i = 0; i < intr->def.num_components; ++i) { |
| const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i); |
| store_def(ctx, &intr->def, i, retval); |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); |
| if (var && var->data.access & ACCESS_NON_WRITEABLE) |
| class = DXIL_RESOURCE_CLASS_SRV; |
| } |
| |
| const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); |
| if (!handle) |
| return false; |
| |
| struct texop_parameters params = { |
| .tex = handle, |
| .lod_or_sample = dxil_module_get_undef( |
| &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) |
| }; |
| |
| const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); |
| if (!dimensions) |
| return false; |
| |
| const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0); |
| store_def(ctx, &intr->def, 0, retval); |
| |
| return true; |
| } |
| |
| static bool |
| emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr); |
| enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op); |
| nir_alu_type type = nir_atomic_op_type(nir_op); |
| const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); |
| const struct dxil_value *offset = |
| get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| const struct dxil_value *value = |
| get_src(ctx, &intr->src[2], 0, type); |
| |
| if (!value || !handle || !offset) |
| return false; |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { |
| offset, int32_undef, int32_undef |
| }; |
| |
| const struct dxil_value *retval = |
| emit_atomic_binop(ctx, handle, dxil_op, coord, value); |
| |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); |
| const struct dxil_value *offset = |
| get_src(ctx, &intr->src[1], 0, nir_type_uint); |
| const struct dxil_value *cmpval = |
| get_src(ctx, &intr->src[2], 0, nir_type_int); |
| const struct dxil_value *newval = |
| get_src(ctx, &intr->src[3], 0, nir_type_int); |
| |
| if (!cmpval || !newval || !handle || !offset) |
| return false; |
| |
| const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); |
| if (!int32_undef) |
| return false; |
| |
| const struct dxil_value *coord[3] = { |
| offset, int32_undef, int32_undef |
| }; |
| |
| const struct dxil_value *retval = |
| emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); |
| |
| if (!retval) |
| return false; |
| |
| store_def(ctx, &intr->def, 0, retval); |
| return true; |
| } |
| |
| static bool |
| emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| unsigned int binding = nir_intrinsic_binding(intr); |
| |
| bool const_index = nir_src_is_const(intr->src[0]); |
| if (const_index) { |
| binding += nir_src_as_const_value(intr->src[0])->u32; |
| } |
| |
| const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding); |
| if (!index_value) |
| return false; |
| |
| if (!const_index) { |
| const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32); |
| if (!offset) |
| return false; |
| |
| index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0); |
| if (!index_value) |
| return false; |
| } |
| |
| store_def(ctx, &intr->def, 0, index_value); |
| store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0)); |
| return true; |
| } |
| |
| static bool |
| emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]); |
| const struct dxil_value *handle = NULL; |
| |
| enum dxil_resource_class resource_class; |
| enum dxil_resource_kind resource_kind; |
| switch (nir_intrinsic_desc_type(intr)) { |
| case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: |
| resource_class = DXIL_RESOURCE_CLASS_CBV; |
| resource_kind = DXIL_RESOURCE_KIND_CBUFFER; |
| break; |
| case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: |
| resource_class = DXIL_RESOURCE_CLASS_UAV; |
| resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER; |
| break; |
| default: |
| unreachable("unknown descriptor type"); |
| return false; |
| } |
| |
| if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) { |
| unsigned binding = nir_intrinsic_binding(index); |
| unsigned space = nir_intrinsic_desc_set(index); |
| |
| /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */ |
| assert(space < 32); |
| |
| nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); |
| if (resource_class == DXIL_RESOURCE_CLASS_UAV && |
| (var->data.access & ACCESS_NON_WRITEABLE)) |
| resource_class = DXIL_RESOURCE_CLASS_SRV; |
| |
| const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32); |
| if (!index_value) |
| return false; |
| |
| handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false); |
| } else { |
| const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32); |
| if (!heap_index_value) |
| return false; |
| const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true); |
| const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind); |
| if (!unannotated_handle || !res_props) |
| return false; |
| handle = emit_annotate_handle(ctx, unannotated_handle, res_props); |
| } |
| |
| store_ssa_def(ctx, &intr->def, 0, handle); |
| store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32)); |
| |
| return true; |
| } |
| |
| static bool |
| emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION); |
| if (!opcode) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| opcode, |
| get_src(ctx, &intr->src[0], 0, nir_type_uint32), |
| }; |
| if (!args[1]) |
| return false; |
| |
| const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!v) |
| return false; |
| |
| for (unsigned i = 0; i < 2; ++i) { |
| /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */ |
| const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, |
| dxil_emit_extractval(&ctx->mod, v, i), |
| dxil_module_get_float_const(&ctx->mod, 0.5f), 0); |
| store_def(ctx, &intr->def, i, coord); |
| } |
| return true; |
| } |
| |
| static bool |
| emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| assert(ctx->mod.info.has_per_sample_input || |
| intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample); |
| |
| if (ctx->mod.info.has_per_sample_input) |
| return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex", |
| DXIL_INTR_SAMPLE_INDEX, nir_type_int); |
| |
| store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0)); |
| return true; |
| } |
| |
| static bool |
| emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst", |
| get_overload(nir_type_uint, intr->def.bit_size)); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST), |
| get_src(ctx, intr->src, 0, nir_type_uint), |
| }; |
| if (!func || !args[0] || !args[1]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static bool |
| emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt", |
| get_overload(nir_type_uint, intr->def.bit_size)); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT), |
| get_src(ctx, &intr->src[0], 0, nir_type_uint), |
| get_src(ctx, &intr->src[1], 0, nir_type_uint), |
| }; |
| if (!func || !args[0] || !args[1] || !args[2]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static bool |
| emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual", |
| get_overload(alu_type, intr->src[0].ssa->bit_size)); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL), |
| get_src(ctx, intr->src, 0, alu_type), |
| }; |
| if (!func || !args[0] || !args[1]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static bool |
| emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| bool any = intr->intrinsic == nir_intrinsic_vote_any; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, |
| any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue", |
| DXIL_NONE); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE), |
| get_src(ctx, intr->src, 0, nir_type_bool), |
| }; |
| if (!func || !args[0] || !args[1]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static bool |
| emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT), |
| get_src(ctx, intr->src, 0, nir_type_bool), |
| }; |
| if (!func || !args[0] || !args[1]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| for (uint32_t i = 0; i < 4; ++i) |
| store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i)); |
| return true; |
| } |
| |
| static bool |
| emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp", |
| get_overload(nir_type_uint, intr->def.bit_size)); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP), |
| get_src(ctx, intr->src, 0, nir_type_uint), |
| dxil_module_get_int8_const(&ctx->mod, op), |
| }; |
| if (!func || !args[0] || !args[1] || !args[2]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static enum dxil_wave_bit_op_kind |
| get_reduce_bit_op(nir_op op) |
| { |
| switch (op) { |
| case nir_op_ior: return DXIL_WAVE_BIT_OP_OR; |
| case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR; |
| case nir_op_iand: return DXIL_WAVE_BIT_OP_AND; |
| default: |
| unreachable("Invalid bit op"); |
| } |
| } |
| |
| static bool |
| emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr)); |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit", |
| get_overload(nir_type_uint, intr->def.bit_size)); |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT), |
| get_src(ctx, intr->src, 0, nir_type_uint), |
| dxil_module_get_int8_const(&ctx->mod, wave_bit_op), |
| }; |
| if (!func || !args[0] || !args[1] || !args[2]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static enum dxil_wave_op_kind |
| get_reduce_op(nir_op op) |
| { |
| switch (op) { |
| case nir_op_iadd: |
| case nir_op_fadd: |
| return DXIL_WAVE_OP_SUM; |
| case nir_op_imul: |
| case nir_op_fmul: |
| return DXIL_WAVE_OP_PRODUCT; |
| case nir_op_imax: |
| case nir_op_umax: |
| case nir_op_fmax: |
| return DXIL_WAVE_OP_MAX; |
| case nir_op_imin: |
| case nir_op_umin: |
| case nir_op_fmin: |
| return DXIL_WAVE_OP_MIN; |
| default: |
| unreachable("Unexpected reduction op"); |
| } |
| } |
| |
| static bool |
| emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| ctx->mod.feats.wave_ops = 1; |
| bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan; |
| nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr); |
| switch (reduction_op) { |
| case nir_op_ior: |
| case nir_op_ixor: |
| case nir_op_iand: |
| assert(!is_prefix); |
| return emit_reduce_bitwise(ctx, intr); |
| default: |
| break; |
| } |
| nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0]; |
| enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op); |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp", |
| get_overload(alu_type, intr->def.bit_size)); |
| bool is_unsigned = alu_type == nir_type_uint; |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP), |
| get_src(ctx, intr->src, 0, alu_type), |
| dxil_module_get_int8_const(&ctx->mod, wave_op), |
| dxil_module_get_int8_const(&ctx->mod, is_unsigned), |
| }; |
| if (!func || !args[0] || !args[1] || !args[2] || !args[3]) |
| return false; |
| |
| const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| if (!ret) |
| return false; |
| store_def(ctx, &intr->def, 0, ret); |
| return true; |
| } |
| |
| static bool |
| emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) |
| { |
| switch (intr->intrinsic) { |
| case nir_intrinsic_load_global_invocation_id: |
| return emit_load_global_invocation_id(ctx, intr); |
| case nir_intrinsic_load_local_invocation_id: |
| return emit_load_local_invocation_id(ctx, intr); |
| case nir_intrinsic_load_local_invocation_index: |
| return emit_load_local_invocation_index(ctx, intr); |
| case nir_intrinsic_load_workgroup_id: |
| return emit_load_local_workgroup_id(ctx, intr); |
| case nir_intrinsic_load_ssbo: |
| return emit_load_ssbo(ctx, intr); |
| case nir_intrinsic_store_ssbo: |
| return emit_store_ssbo(ctx, intr); |
| case nir_intrinsic_load_deref: |
| return emit_load_deref(ctx, intr); |
| case nir_intrinsic_store_deref: |
| return emit_store_deref(ctx, intr); |
| case nir_intrinsic_deref_atomic: |
| return emit_atomic_deref(ctx, intr); |
| case nir_intrinsic_deref_atomic_swap: |
| return emit_atomic_deref_swap(ctx, intr); |
| case nir_intrinsic_load_ubo_vec4: |
| return emit_load_ubo_vec4(ctx, intr); |
| case nir_intrinsic_load_primitive_id: |
| return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID", |
| DXIL_INTR_PRIMITIVE_ID, nir_type_int); |
| case nir_intrinsic_load_sample_id: |
| case nir_intrinsic_load_sample_id_no_per_sample: |
| return emit_load_sample_id(ctx, intr); |
| case nir_intrinsic_load_invocation_id: |
| switch (ctx->mod.shader_kind) { |
| case DXIL_HULL_SHADER: |
| return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID", |
| DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int); |
| case DXIL_GEOMETRY_SHADER: |
| return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID", |
| DXIL_INTR_GS_INSTANCE_ID, nir_type_int); |
| default: |
| unreachable("Unexpected shader kind for invocation ID"); |
| } |
| case nir_intrinsic_load_view_index: |
| ctx->mod.feats.view_id = true; |
| return emit_load_unary_external_function(ctx, intr, "dx.op.viewID", |
| DXIL_INTR_VIEW_ID, nir_type_int); |
| case nir_intrinsic_load_sample_mask_in: |
| return emit_load_sample_mask_in(ctx, intr); |
| case nir_intrinsic_load_tess_coord: |
| return emit_load_tess_coord(ctx, intr); |
| case nir_intrinsic_terminate_if: |
| case nir_intrinsic_demote_if: |
| return emit_discard_if(ctx, intr); |
| case nir_intrinsic_terminate: |
| case nir_intrinsic_demote: |
| return emit_discard(ctx); |
| case nir_intrinsic_emit_vertex: |
| return emit_emit_vertex(ctx, intr); |
| case nir_intrinsic_end_primitive: |
| return emit_end_primitive(ctx, intr); |
| case nir_intrinsic_barrier: |
| return emit_barrier(ctx, intr); |
| case nir_intrinsic_ssbo_atomic: |
| return emit_ssbo_atomic(ctx, intr); |
| case nir_intrinsic_ssbo_atomic_swap: |
| return emit_ssbo_atomic_comp_swap(ctx, intr); |
| case nir_intrinsic_image_deref_atomic: |
| case nir_intrinsic_image_atomic: |
| case nir_intrinsic_bindless_image_atomic: |
| return emit_image_atomic(ctx, intr); |
| case nir_intrinsic_image_deref_atomic_swap: |
| case nir_intrinsic_image_atomic_swap: |
| case nir_intrinsic_bindless_image_atomic_swap: |
| return emit_image_atomic_comp_swap(ctx, intr); |
| case nir_intrinsic_image_store: |
| case nir_intrinsic_image_deref_store: |
| case nir_intrinsic_bindless_image_store: |
| return emit_image_store(ctx, intr); |
| case nir_intrinsic_image_load: |
| case nir_intrinsic_image_deref_load: |
| case nir_intrinsic_bindless_image_load: |
| return emit_image_load(ctx, intr); |
| case nir_intrinsic_image_size: |
| case nir_intrinsic_image_deref_size: |
| case nir_intrinsic_bindless_image_size: |
| return emit_image_size(ctx, intr); |
| case nir_intrinsic_get_ssbo_size: |
| return emit_get_ssbo_size(ctx, intr); |
| case nir_intrinsic_load_input: |
| case nir_intrinsic_load_per_vertex_input: |
| case nir_intrinsic_load_output: |
| case nir_intrinsic_load_per_vertex_output: |
| return emit_load_input_via_intrinsic(ctx, intr); |
| case nir_intrinsic_store_output: |
| case nir_intrinsic_store_per_vertex_output: |
| return emit_store_output_via_intrinsic(ctx, intr); |
| |
| case nir_intrinsic_load_barycentric_at_offset: |
| case nir_intrinsic_load_barycentric_at_sample: |
| case nir_intrinsic_load_barycentric_centroid: |
| case nir_intrinsic_load_barycentric_pixel: |
| /* Emit nothing, we only support these as inputs to load_interpolated_input */ |
| return true; |
| case nir_intrinsic_load_interpolated_input: |
| return emit_load_interpolated_input(ctx, intr); |
| break; |
| |
| case nir_intrinsic_vulkan_resource_index: |
| return emit_vulkan_resource_index(ctx, intr); |
| case nir_intrinsic_load_vulkan_descriptor: |
| return emit_load_vulkan_descriptor(ctx, intr); |
| |
| case nir_intrinsic_load_sample_pos_from_id: |
| return emit_load_sample_pos_from_id(ctx, intr); |
| |
| case nir_intrinsic_is_helper_invocation: |
| return emit_load_unary_external_function( |
| ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int); |
| case nir_intrinsic_elect: |
| ctx->mod.feats.wave_ops = 1; |
| return emit_load_unary_external_function( |
| ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid); |
| case nir_intrinsic_load_subgroup_size: |
| ctx->mod.feats.wave_ops = 1; |
| return emit_load_unary_external_function( |
| ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid); |
| case nir_intrinsic_load_subgroup_invocation: |
| ctx->mod.feats.wave_ops = 1; |
| return emit_load_unary_external_function( |
| ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid); |
| |
| case nir_intrinsic_vote_feq: |
| case nir_intrinsic_vote_ieq: |
| return emit_vote_eq(ctx, intr); |
| case nir_intrinsic_vote_any: |
| case nir_intrinsic_vote_all: |
| return emit_vote(ctx, intr); |
| |
| case nir_intrinsic_ballot: |
| return emit_ballot(ctx, intr); |
| |
| case nir_intrinsic_read_first_invocation: |
| return emit_read_first_invocation(ctx, intr); |
| case nir_intrinsic_read_invocation: |
| case nir_intrinsic_shuffle: |
| case nir_intrinsic_quad_broadcast: |
| return emit_read_invocation(ctx, intr); |
| |
| case nir_intrinsic_quad_swap_horizontal: |
| return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X); |
| case nir_intrinsic_quad_swap_vertical: |
| return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y); |
| case nir_intrinsic_quad_swap_diagonal: |
| return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL); |
| |
| case nir_intrinsic_reduce: |
| case nir_intrinsic_exclusive_scan: |
| return emit_reduce(ctx, intr); |
| |
| case nir_intrinsic_ddx: |
| case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE); |
| case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE); |
| case nir_intrinsic_ddy: |
| case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE); |
| case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE); |
| |
| case nir_intrinsic_load_first_vertex: |
| ctx->mod.feats.extended_command_info = true; |
| return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation", |
| DXIL_INTR_START_VERTEX_LOCATION, nir_type_int); |
| case nir_intrinsic_load_base_instance: |
| ctx->mod.feats.extended_command_info = true; |
| return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation", |
| DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int); |
| |
| case nir_intrinsic_load_num_workgroups: |
| case nir_intrinsic_load_workgroup_size: |
| default: |
| log_nir_instr_unsupported( |
| ctx->logger, "Unimplemented intrinsic instruction", &intr->instr); |
| return false; |
| } |
| } |
| |
| static const struct dxil_type * |
| dxil_type_for_const(struct ntd_context *ctx, nir_def *def) |
| { |
| if (BITSET_TEST(ctx->int_types, def->index) || |
| !BITSET_TEST(ctx->float_types, def->index)) |
| return dxil_module_get_int_type(&ctx->mod, def->bit_size); |
| return dxil_module_get_float_type(&ctx->mod, def->bit_size); |
| } |
| |
| static bool |
| emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const) |
| { |
| for (uint32_t i = 0; i < load_const->def.num_components; ++i) { |
| const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def); |
| store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type)); |
| } |
| return true; |
| } |
| |
| static bool |
| emit_deref(struct ntd_context* ctx, nir_deref_instr* instr) |
| { |
| /* There's two possible reasons we might be walking through derefs: |
| * 1. Computing an index to be used for a texture/sampler/image binding, which |
| * can only do array indexing and should compute the indices along the way with |
| * array-of-array sizes. |
| * 2. Storing an index to be used in a GEP for access to a variable. |
| */ |
| nir_variable *var = nir_deref_instr_get_variable(instr); |
| assert(var); |
| |
| bool is_aoa_size = |
| glsl_type_is_sampler(glsl_without_array(var->type)) || |
| glsl_type_is_image(glsl_without_array(var->type)) || |
| glsl_type_is_texture(glsl_without_array(var->type)); |
| |
| if (!is_aoa_size) { |
| /* Just store the values, we'll use these to build a GEP in the load or store */ |
| switch (instr->deref_type) { |
| case nir_deref_type_var: |
| store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size)); |
| return true; |
| case nir_deref_type_array: |
| store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int)); |
| return true; |
| case nir_deref_type_struct: |
| store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32)); |
| return true; |
| default: |
| unreachable("Other deref types not supported"); |
| } |
| } |
| |
| /* In the CL environment, there's nothing to emit. Any references to |
| * derefs will emit the necessary logic to handle scratch/shared GEP addressing |
| */ |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_CL) |
| return true; |
| |
| const struct glsl_type *type = instr->type; |
| const struct dxil_value *binding; |
| unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ? |
| var->data.driver_location : var->data.binding; |
| |
| if (instr->deref_type == nir_deref_type_var) { |
| binding = dxil_module_get_int32_const(&ctx->mod, binding_val); |
| } else { |
| const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32); |
| const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32); |
| if (!base || !offset) |
| return false; |
| |
| if (glsl_type_is_array(instr->type)) { |
| offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset, |
| dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0); |
| if (!offset) |
| return false; |
| } |
| binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0); |
| } |
| |
| if (!binding) |
| return false; |
| |
| /* Haven't finished chasing the deref chain yet, just store the value */ |
| if (glsl_type_is_array(type)) { |
| store_def(ctx, &instr->def, 0, binding); |
| return true; |
| } |
| |
| assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type)); |
| enum dxil_resource_class res_class; |
| if (glsl_type_is_image(type)) |
| res_class = DXIL_RESOURCE_CLASS_UAV; |
| else if (glsl_type_is_sampler(type)) |
| res_class = DXIL_RESOURCE_CLASS_SAMPLER; |
| else |
| res_class = DXIL_RESOURCE_CLASS_SRV; |
| |
| unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ? |
| var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0); |
| const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class, |
| descriptor_set, binding_val, binding, false); |
| if (!handle) |
| return false; |
| |
| store_ssa_def(ctx, &instr->def, 0, handle); |
| return true; |
| } |
| |
| static bool |
| emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond, |
| int true_block, int false_block) |
| { |
| assert(cond); |
| assert(true_block >= 0); |
| assert(false_block >= 0); |
| return dxil_emit_branch(&ctx->mod, cond, true_block, false_block); |
| } |
| |
| static bool |
| emit_branch(struct ntd_context *ctx, int block) |
| { |
| assert(block >= 0); |
| return dxil_emit_branch(&ctx->mod, NULL, block, -1); |
| } |
| |
| static bool |
| emit_jump(struct ntd_context *ctx, nir_jump_instr *instr) |
| { |
| switch (instr->type) { |
| case nir_jump_break: |
| case nir_jump_continue: |
| assert(instr->instr.block->successors[0]); |
| assert(!instr->instr.block->successors[1]); |
| return emit_branch(ctx, instr->instr.block->successors[0]->index); |
| |
| default: |
| unreachable("Unsupported jump type\n"); |
| } |
| } |
| |
| struct phi_block { |
| unsigned num_components; |
| struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS]; |
| }; |
| |
| static bool |
| emit_phi(struct ntd_context *ctx, nir_phi_instr *instr) |
| { |
| const struct dxil_type *type = NULL; |
| nir_foreach_phi_src(src, instr) { |
| /* All sources have the same type, just use the first one */ |
| type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]); |
| break; |
| } |
| |
| struct phi_block *vphi = ralloc(ctx->phis, struct phi_block); |
| vphi->num_components = instr->def.num_components; |
| |
| for (unsigned i = 0; i < vphi->num_components; ++i) { |
| struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type); |
| if (!phi) |
| return false; |
| store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi)); |
| } |
| _mesa_hash_table_insert(ctx->phis, instr, vphi); |
| return true; |
| } |
| |
| static bool |
| fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr, |
| struct phi_block *vphi) |
| { |
| const struct dxil_value *values[16]; |
| unsigned blocks[16]; |
| for (unsigned i = 0; i < vphi->num_components; ++i) { |
| size_t num_incoming = 0; |
| nir_foreach_phi_src(src, instr) { |
| const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i); |
| values[num_incoming] = val; |
| blocks[num_incoming] = src->pred->index; |
| ++num_incoming; |
| if (num_incoming == ARRAY_SIZE(values)) { |
| if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks, |
| num_incoming)) |
| return false; |
| num_incoming = 0; |
| } |
| } |
| if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values, |
| blocks, num_incoming)) |
| return false; |
| } |
| return true; |
| } |
| |
| static unsigned |
| get_n_src(struct ntd_context *ctx, const struct dxil_value **values, |
| unsigned max_components, nir_tex_src *src, nir_alu_type type) |
| { |
| unsigned num_components = nir_src_num_components(src->src); |
| unsigned i = 0; |
| |
| assert(num_components <= max_components); |
| |
| for (i = 0; i < num_components; ++i) { |
| values[i] = get_src(ctx, &src->src, i, type); |
| if (!values[i]) |
| return 0; |
| } |
| |
| return num_components; |
| } |
| |
| #define PAD_SRC(ctx, array, components, undef) \ |
| for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \ |
| array[i] = undef; \ |
| } |
| |
| static const struct dxil_value * |
| emit_sample(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload); |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *args[11] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload); |
| if (!func) |
| return NULL; |
| |
| assert(params->bias != NULL); |
| |
| const struct dxil_value *args[12] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->bias, params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload); |
| if (!func) |
| return NULL; |
| |
| assert(params->lod_or_sample != NULL); |
| |
| const struct dxil_value *args[11] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->lod_or_sample |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func; |
| enum dxil_intr opcode; |
| |
| func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32); |
| opcode = DXIL_INTR_SAMPLE_CMP; |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *args[12] = { |
| dxil_module_get_int32_const(&ctx->mod, opcode), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->cmp, params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func; |
| enum dxil_intr opcode; |
| |
| func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32); |
| opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO; |
| |
| if (!func) |
| return NULL; |
| |
| const struct dxil_value *args[11] = { |
| dxil_module_get_int32_const(&ctx->mod, opcode), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->cmp |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| ctx->mod.feats.advanced_texture_ops = true; |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload); |
| if (!func) |
| return NULL; |
| |
| assert(params->lod_or_sample != NULL); |
| |
| const struct dxil_value *args[12] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->cmp, params->lod_or_sample |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload); |
| if (!func) |
| return NULL; |
| |
| assert(params->bias != NULL); |
| ctx->mod.feats.sample_cmp_bias_gradient = 1; |
| |
| const struct dxil_value *args[13] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->cmp, params->bias, params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *args[17] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->dx[0], params->dx[1], params->dx[2], |
| params->dy[0], params->dy[1], params->dy[2], |
| params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload); |
| if (!func) |
| return false; |
| |
| ctx->mod.feats.sample_cmp_bias_gradient = 1; |
| |
| const struct dxil_value *args[18] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD), |
| params->tex, params->sampler, |
| params->coord[0], params->coord[1], params->coord[2], params->coord[3], |
| params->offset[0], params->offset[1], params->offset[2], |
| params->cmp, |
| params->dx[0], params->dx[1], params->dx[2], |
| params->dy[0], params->dy[1], params->dy[2], |
| params->min_lod |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload); |
| if (!func) |
| return false; |
| |
| if (!params->lod_or_sample) |
| params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)); |
| |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD), |
| params->tex, |
| params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2], |
| params->offset[0], params->offset[1], params->offset[2] |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD), |
| params->tex, |
| params->sampler, |
| params->coord[0], |
| params->coord[1], |
| params->coord[2], |
| dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0) |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); |
| } |
| |
| static const struct dxil_value * |
| emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component) |
| { |
| const struct dxil_func *func = dxil_get_function(&ctx->mod, |
| params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload); |
| if (!func) |
| return false; |
| |
| const struct dxil_value *args[] = { |
| dxil_module_get_int32_const(&ctx->mod, params->cmp ? |
| DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER), |
| params->tex, |
| params->sampler, |
| params->coord[0], |
| params->coord[1], |
| params->coord[2], |
| params->coord[3], |
| params->offset[0], |
| params->offset[1], |
| dxil_module_get_int32_const(&ctx->mod, component), |
| params->cmp |
| }; |
| |
| return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1)); |
| } |
| |
| static bool |
| emit_tex(struct ntd_context *ctx, nir_tex_instr *instr) |
| { |
| struct texop_parameters params; |
| memset(¶ms, 0, sizeof(struct texop_parameters)); |
| if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) { |
| params.tex = ctx->srv_handles[instr->texture_index]; |
| params.sampler = ctx->sampler_handles[instr->sampler_index]; |
| } |
| |
| const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); |
| const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); |
| const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); |
| const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); |
| |
| unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0; |
| params.overload = get_overload(instr->dest_type, 32); |
| |
| bool lod_is_zero = false; |
| for (unsigned i = 0; i < instr->num_srcs; i++) { |
| nir_alu_type type = nir_tex_instr_src_type(instr, i); |
| |
| switch (instr->src[i].src_type) { |
| case nir_tex_src_coord: |
| coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord), |
| &instr->src[i], type); |
| if (!coord_components) |
| return false; |
| break; |
| |
| case nir_tex_src_offset: |
| offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset), |
| &instr->src[i], nir_type_int); |
| if (!offset_components) |
| return false; |
| |
| /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */ |
| if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4) |
| ctx->mod.feats.advanced_texture_ops = true; |
| break; |
| |
| case nir_tex_src_bias: |
| assert(instr->op == nir_texop_txb); |
| assert(nir_src_num_components(instr->src[i].src) == 1); |
| params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float); |
| if (!params.bias) |
| return false; |
| break; |
| |
| case nir_tex_src_lod: |
| assert(nir_src_num_components(instr->src[i].src) == 1); |
| if (instr->op == nir_texop_txf_ms) { |
| assert(nir_src_as_int(instr->src[i].src) == 0); |
| break; |
| } |
| |
| /* Buffers don't have a LOD */ |
| if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) |
| params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type); |
| else |
| params.lod_or_sample = int_undef; |
| if (!params.lod_or_sample) |
| return false; |
| |
| if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f) |
| lod_is_zero = true; |
| break; |
| |
| case nir_tex_src_min_lod: |
| assert(nir_src_num_components(instr->src[i].src) == 1); |
| params.min_lod = get_src(ctx, &instr->src[i].src, 0, type); |
| if (!params.min_lod) |
| return false; |
| break; |
| |
| case nir_tex_src_comparator: |
| assert(nir_src_num_components(instr->src[i].src) == 1); |
| params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float); |
| if (!params.cmp) |
| return false; |
| break; |
| |
| case nir_tex_src_ddx: |
| dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx), |
| &instr->src[i], nir_type_float); |
| if (!dx_components) |
| return false; |
| break; |
| |
| case nir_tex_src_ddy: |
| dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy), |
| &instr->src[i], nir_type_float); |
| if (!dy_components) |
| return false; |
| break; |
| |
| case nir_tex_src_ms_index: |
| params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int); |
| if (!params.lod_or_sample) |
| return false; |
| break; |
| |
| case nir_tex_src_texture_deref: |
| assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); |
| params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0); |
| break; |
| |
| case nir_tex_src_sampler_deref: |
| assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); |
| params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0); |
| break; |
| |
| case nir_tex_src_texture_offset: |
| params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV, |
| 0, instr->texture_index, |
| dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, |
| get_src(ctx, &instr->src[i].src, 0, nir_type_uint), |
| dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0), |
| instr->texture_non_uniform); |
| break; |
| |
| case nir_tex_src_sampler_offset: |
| if (nir_tex_instr_need_sampler(instr)) { |
| params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER, |
| 0, instr->sampler_index, |
| dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, |
| get_src(ctx, &instr->src[i].src, 0, nir_type_uint), |
| dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0), |
| instr->sampler_non_uniform); |
| } |
| break; |
| |
| case nir_tex_src_texture_handle: |
| params.tex = create_srv_handle(ctx, instr, &instr->src[i].src); |
| break; |
| |
| case nir_tex_src_sampler_handle: |
| if (nir_tex_instr_need_sampler(instr)) |
| params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src); |
| break; |
| |
| case nir_tex_src_projector: |
| unreachable("Texture projector should have been lowered"); |
| |
| default: |
| fprintf(stderr, "texture source: %d\n", instr->src[i].src_type); |
| unreachable("unknown texture source"); |
| } |
| } |
| |
| assert(params.tex != NULL); |
| assert(instr->op == nir_texop_txf || |
| instr->op == nir_texop_txf_ms || |
| nir_tex_instr_is_query(instr) || |
| params.sampler != NULL); |
| |
| PAD_SRC(ctx, params.coord, coord_components, float_undef); |
| PAD_SRC(ctx, params.offset, offset_components, int_undef); |
| if (!params.min_lod) params.min_lod = float_undef; |
| |
| const struct dxil_value *sample = NULL; |
| switch (instr->op) { |
| case nir_texop_txb: |
| if (params.cmp != NULL && ctx->mod.minor_version >= 8) |
| sample = emit_sample_cmp_bias(ctx, ¶ms); |
| else |
| sample = emit_sample_bias(ctx, ¶ms); |
| break; |
| |
| case nir_texop_tex: |
| if (params.cmp != NULL) { |
| sample = emit_sample_cmp(ctx, ¶ms); |
| break; |
| } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { |
| sample = emit_sample(ctx, ¶ms); |
| break; |
| } |
| params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0); |
| lod_is_zero = true; |
| FALLTHROUGH; |
| case nir_texop_txl: |
| if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) { |
| /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument, |
| * so level-less DXIL instructions are used. This is needed to avoid emitting |
| * dx.op.sampleCmpLevel, which would not be available. |
| */ |
| sample = emit_sample_cmp_level_zero(ctx, ¶ms); |
| } else { |
| if (params.cmp != NULL) |
| sample = emit_sample_cmp_level(ctx, ¶ms); |
| else |
| sample = emit_sample_level(ctx, ¶ms); |
| } |
| break; |
| |
| case nir_texop_txd: |
| PAD_SRC(ctx, params.dx, dx_components, float_undef); |
| PAD_SRC(ctx, params.dy, dy_components,float_undef); |
| if (params.cmp != NULL && ctx->mod.minor_version >= 8) |
| sample = emit_sample_cmp_grad(ctx, ¶ms); |
| else |
| sample = emit_sample_grad(ctx, ¶ms); |
| break; |
| |
| case nir_texop_txf: |
| case nir_texop_txf_ms: |
| if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { |
| params.coord[1] = int_undef; |
| sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload); |
| } else { |
| PAD_SRC(ctx, params.coord, coord_components, int_undef); |
| sample = emit_texel_fetch(ctx, ¶ms); |
| } |
| break; |
| |
| case nir_texop_txs: |
| sample = emit_texture_size(ctx, ¶ms); |
| break; |
| |
| case nir_texop_tg4: |
| sample = emit_texture_gather(ctx, ¶ms, instr->component); |
| break; |
| |
| case nir_texop_lod: |
| sample = emit_texture_lod(ctx, ¶ms, true); |
| store_def(ctx, &instr->def, 0, sample); |
| sample = emit_texture_lod(ctx, ¶ms, false); |
| store_def(ctx, &instr->def, 1, sample); |
| return true; |
| |
| case nir_texop_query_levels: { |
| params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32); |
| sample = emit_texture_size(ctx, ¶ms); |
| const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); |
| store_def(ctx, &instr->def, 0, retval); |
| return true; |
| } |
| |
| case nir_texop_texture_samples: { |
| params.lod_or_sample = int_undef; |
| sample = emit_texture_size(ctx, ¶ms); |
| const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); |
| store_def(ctx, &instr->def, 0, retval); |
| return true; |
| } |
| |
| default: |
| fprintf(stderr, "texture op: %d\n", instr->op); |
| unreachable("unknown texture op"); |
| } |
| |
| if (!sample) |
| return false; |
| |
| for (unsigned i = 0; i < instr->def.num_components; ++i) { |
| const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i); |
| store_def(ctx, &instr->def, i, retval); |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef) |
| { |
| for (unsigned i = 0; i < undef->def.num_components; ++i) |
| store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0)); |
| return true; |
| } |
| |
| static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr) |
| { |
| switch (instr->type) { |
| case nir_instr_type_alu: |
| return emit_alu(ctx, nir_instr_as_alu(instr)); |
| case nir_instr_type_intrinsic: |
| return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); |
| case nir_instr_type_load_const: |
| return emit_load_const(ctx, nir_instr_as_load_const(instr)); |
| case nir_instr_type_deref: |
| return emit_deref(ctx, nir_instr_as_deref(instr)); |
| case nir_instr_type_jump: |
| return emit_jump(ctx, nir_instr_as_jump(instr)); |
| case nir_instr_type_phi: |
| return emit_phi(ctx, nir_instr_as_phi(instr)); |
| case nir_instr_type_tex: |
| return emit_tex(ctx, nir_instr_as_tex(instr)); |
| case nir_instr_type_undef: |
| return emit_undefined(ctx, nir_instr_as_undef(instr)); |
| default: |
| log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type", |
| instr); |
| return false; |
| } |
| } |
| |
| |
| static bool |
| emit_block(struct ntd_context *ctx, struct nir_block *block) |
| { |
| assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids); |
| ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block; |
| |
| nir_foreach_instr(instr, block) { |
| TRACE_CONVERSION(instr); |
| |
| if (!emit_instr(ctx, instr)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| static bool |
| emit_cf_list(struct ntd_context *ctx, struct exec_list *list); |
| |
| static bool |
| emit_if(struct ntd_context *ctx, struct nir_if *if_stmt) |
| { |
| assert(nir_src_num_components(if_stmt->condition) == 1); |
| const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0, |
| nir_type_bool); |
| if (!cond) |
| return false; |
| |
| /* prepare blocks */ |
| nir_block *then_block = nir_if_first_then_block(if_stmt); |
| assert(nir_if_last_then_block(if_stmt)->successors[0]); |
| assert(!nir_if_last_then_block(if_stmt)->successors[1]); |
| int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index; |
| |
| nir_block *else_block = NULL; |
| int else_succ = -1; |
| if (!exec_list_is_empty(&if_stmt->else_list)) { |
| else_block = nir_if_first_else_block(if_stmt); |
| assert(nir_if_last_else_block(if_stmt)->successors[0]); |
| assert(!nir_if_last_else_block(if_stmt)->successors[1]); |
| else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index; |
| } |
| |
| if (!emit_cond_branch(ctx, cond, then_block->index, |
| else_block ? else_block->index : then_succ)) |
| return false; |
| |
| /* handle then-block */ |
| if (!emit_cf_list(ctx, &if_stmt->then_list) || |
| (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) && |
| !emit_branch(ctx, then_succ))) |
| return false; |
| |
| if (else_block) { |
| /* handle else-block */ |
| if (!emit_cf_list(ctx, &if_stmt->else_list) || |
| (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) && |
| !emit_branch(ctx, else_succ))) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_loop(struct ntd_context *ctx, nir_loop *loop) |
| { |
| assert(!nir_loop_has_continue_construct(loop)); |
| nir_block *first_block = nir_loop_first_block(loop); |
| nir_block *last_block = nir_loop_last_block(loop); |
| |
| assert(last_block->successors[0]); |
| assert(!last_block->successors[1]); |
| |
| if (!emit_branch(ctx, first_block->index)) |
| return false; |
| |
| if (!emit_cf_list(ctx, &loop->body)) |
| return false; |
| |
| /* If the loop's last block doesn't explicitly jump somewhere, then there's |
| * an implicit continue that should take it back to the first loop block |
| */ |
| nir_instr *last_instr = nir_block_last_instr(last_block); |
| if ((!last_instr || last_instr->type != nir_instr_type_jump) && |
| !emit_branch(ctx, first_block->index)) |
| return false; |
| |
| return true; |
| } |
| |
| static bool |
| emit_cf_list(struct ntd_context *ctx, struct exec_list *list) |
| { |
| foreach_list_typed(nir_cf_node, node, node, list) { |
| switch (node->type) { |
| case nir_cf_node_block: |
| if (!emit_block(ctx, nir_cf_node_as_block(node))) |
| return false; |
| break; |
| |
| case nir_cf_node_if: |
| if (!emit_if(ctx, nir_cf_node_as_if(node))) |
| return false; |
| break; |
| |
| case nir_cf_node_loop: |
| if (!emit_loop(ctx, nir_cf_node_as_loop(node))) |
| return false; |
| break; |
| |
| default: |
| unreachable("unsupported cf-list node"); |
| break; |
| } |
| } |
| return true; |
| } |
| |
| static void |
| insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var) |
| { |
| nir_foreach_variable_in_list(var, var_list) { |
| if (var->data.binding > new_var->data.binding) { |
| exec_node_insert_node_before(&var->node, &new_var->node); |
| return; |
| } |
| } |
| exec_list_push_tail(var_list, &new_var->node); |
| } |
| |
| |
| static void |
| sort_uniforms_by_binding_and_remove_structs(nir_shader *s) |
| { |
| struct exec_list new_list; |
| exec_list_make_empty(&new_list); |
| |
| nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { |
| exec_node_remove(&var->node); |
| const struct glsl_type *type = glsl_without_array(var->type); |
| if (!glsl_type_is_struct(type)) |
| insert_sorted_by_binding(&new_list, var); |
| } |
| exec_list_append(&s->variables, &new_list); |
| } |
| |
| static bool |
| emit_cbvs(struct ntd_context *ctx) |
| { |
| if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) { |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) { |
| if (!emit_ubo_var(ctx, var)) |
| return false; |
| } |
| } else { |
| if (ctx->shader->info.num_ubos) { |
| const unsigned ubo_size = 16384 /*4096 vec4's*/; |
| uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0; |
| bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo; |
| bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed; |
| unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base - |
| (has_state_vars ? 1 : 0); |
| |
| if (has_ubo0 && |
| !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms")) |
| return false; |
| if (ubo1_array_size && |
| !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos")) |
| return false; |
| if (has_state_vars && |
| !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars")) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_scratch(struct ntd_context *ctx, nir_function_impl *impl) |
| { |
| uint32_t index = 0; |
| nir_foreach_function_temp_variable(var, impl) |
| var->data.driver_location = index++; |
| |
| if (ctx->scratchvars) |
| ralloc_free((void *)ctx->scratchvars); |
| |
| ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index); |
| |
| nir_foreach_function_temp_variable(var, impl) { |
| const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type); |
| const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1); |
| const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16); |
| if (!ptr) |
| return false; |
| |
| ctx->scratchvars[var->data.driver_location] = ptr; |
| } |
| |
| return true; |
| } |
| |
| static bool |
| emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl) |
| { |
| assert(func->num_params == 0); |
| nir_metadata_require(impl, nir_metadata_block_index); |
| |
| const char *attr_keys[2] = { NULL }; |
| const char *attr_values[2] = { NULL }; |
| if (ctx->shader->info.float_controls_execution_mode & |
| (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32)) |
| attr_keys[0] = "fp32-denorm-mode"; |
| if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) |
| attr_values[0] = "ftz"; |
| else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) |
| attr_values[0] = "preserve"; |
| |
| const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod); |
| const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0); |
| struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values); |
| if (!func_def) |
| return false; |
| |
| if (func->is_entrypoint) |
| ctx->main_func_def = func_def; |
| else if (func == ctx->tess_ctrl_patch_constant_func) |
| ctx->tess_ctrl_patch_constant_func_def = func_def; |
| |
| ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc); |
| ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc)); |
| ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc)); |
| if (!ctx->defs || !ctx->float_types || !ctx->int_types) |
| return false; |
| ctx->num_defs = impl->ssa_alloc; |
| |
| ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); |
| if (!ctx->phis) |
| return false; |
| |
| nir_gather_types(impl, ctx->float_types, ctx->int_types); |
| |
| if (!emit_scratch(ctx, impl)) |
| return false; |
| |
| if (!emit_static_indexing_handles(ctx)) |
| return false; |
| |
| if (!emit_cf_list(ctx, &impl->body)) |
| return false; |
| |
| hash_table_foreach(ctx->phis, entry) { |
| if (!fixup_phi(ctx, (nir_phi_instr *)entry->key, |
| (struct phi_block *)entry->data)) |
| return false; |
| } |
| |
| if (!dxil_emit_ret_void(&ctx->mod)) |
| return false; |
| |
| ralloc_free(ctx->defs); |
| ctx->defs = NULL; |
| _mesa_hash_table_destroy(ctx->phis, NULL); |
| return true; |
| } |
| |
| static bool |
| emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts) |
| { |
| /* The validator forces us to emit resources in a specific order: |
| * CBVs, Samplers, SRVs, UAVs. While we are at it also remove |
| * stale struct uniforms, they are lowered but might not have been removed */ |
| sort_uniforms_by_binding_and_remove_structs(ctx->shader); |
| |
| /* CBVs */ |
| if (!emit_cbvs(ctx)) |
| return false; |
| |
| /* Samplers */ |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { |
| unsigned count = glsl_type_get_sampler_count(var->type); |
| assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type))); |
| if (count > 0 && !emit_sampler(ctx, var, count)) |
| return false; |
| } |
| |
| /* SRVs */ |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { |
| unsigned count = glsl_type_get_texture_count(var->type); |
| assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type))); |
| if (count > 0 && !emit_srv(ctx, var, count)) |
| return false; |
| } |
| |
| /* Handle read-only SSBOs as SRVs */ |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { |
| if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) { |
| unsigned count = 1; |
| if (glsl_type_is_array(var->type)) |
| count = glsl_get_length(var->type); |
| if (!emit_srv(ctx, var, count)) |
| return false; |
| } |
| } |
| } |
| |
| if (!emit_shared_vars(ctx)) |
| return false; |
| if (!emit_global_consts(ctx)) |
| return false; |
| |
| /* UAVs */ |
| if (ctx->shader->info.stage == MESA_SHADER_KERNEL) { |
| if (!emit_globals(ctx, opts->num_kernel_globals)) |
| return false; |
| |
| } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| /* Handle read/write SSBOs as UAVs */ |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { |
| if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) { |
| unsigned count = 1; |
| if (glsl_type_is_array(var->type)) |
| count = glsl_get_length(var->type); |
| if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set, |
| count, DXIL_COMP_TYPE_INVALID, 1, |
| DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name)) |
| return false; |
| |
| } |
| } |
| } else { |
| for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) { |
| char name[64]; |
| snprintf(name, sizeof(name), "__ssbo%d", i); |
| if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1, |
| DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name)) |
| return false; |
| } |
| /* To work around a WARP bug, bind these descriptors a second time in descriptor |
| * space 2. Space 0 will be used for static indexing, while space 2 will be used |
| * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while |
| * space 2 will be a single array. |
| */ |
| if (ctx->shader->info.num_ssbos && |
| !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1, |
| DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic")) |
| return false; |
| } |
| |
| nir_foreach_image_variable(var, ctx->shader) { |
| if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type))) |
| return false; |
| } |
| |
| ctx->mod.info.has_per_sample_input = |
| BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || |
| ctx->shader->info.fs.uses_sample_shading || |
| ctx->shader->info.fs.uses_sample_qualifier; |
| if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) { |
| if (var->data.sample) { |
| ctx->mod.info.has_per_sample_input = true; |
| break; |
| } |
| } |
| } |
| |
| /* From the Vulkan spec 1.3.238, section 15.8: |
| * When Sample Shading is enabled, the x and y components of FragCoord reflect the location |
| * of one of the samples corresponding to the shader invocation. |
| * |
| * In other words, if the fragment shader is executing per-sample, then the position variable |
| * should always be per-sample, |
| * |
| * Also: |
| * The Centroid interpolation decoration is ignored, but allowed, on FragCoord. |
| */ |
| if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { |
| nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS); |
| if (pos_var) { |
| if (ctx->mod.info.has_per_sample_input) |
| pos_var->data.sample = true; |
| pos_var->data.centroid = false; |
| } |
| } |
| |
| unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ? |
| ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size; |
| preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size); |
| |
| nir_foreach_function_with_impl(func, impl, ctx->shader) { |
| if (!emit_function(ctx, func, impl)) |
| return false; |
| } |
| |
| if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) { |
| if (var->data.location == FRAG_RESULT_STENCIL) { |
| ctx->mod.feats.stencil_ref = true; |
| } |
| } |
| } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX || |
| ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) { |
| if (ctx->shader->info.outputs_written & |
| (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER)) |
| ctx->mod.feats.array_layer_from_vs_or_ds = true; |
| } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY || |
| ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) { |
| if (ctx->shader->info.inputs_read & |
| (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER)) |
| ctx->mod.feats.array_layer_from_vs_or_ds = true; |
| } |
| |
| if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) { |
| ctx->logger->log(ctx->logger->priv, |
| "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n"); |
| return false; |
| } |
| |
| return emit_metadata(ctx) && |
| dxil_emit_module(&ctx->mod); |
| } |
| |
| static unsigned int |
| get_dxil_shader_kind(struct nir_shader *s) |
| { |
| switch (s->info.stage) { |
| case MESA_SHADER_VERTEX: |
| return DXIL_VERTEX_SHADER; |
| case MESA_SHADER_TESS_CTRL: |
| return DXIL_HULL_SHADER; |
| case MESA_SHADER_TESS_EVAL: |
| return DXIL_DOMAIN_SHADER; |
| case MESA_SHADER_GEOMETRY: |
| return DXIL_GEOMETRY_SHADER; |
| case MESA_SHADER_FRAGMENT: |
| return DXIL_PIXEL_SHADER; |
| case MESA_SHADER_KERNEL: |
| case MESA_SHADER_COMPUTE: |
| return DXIL_COMPUTE_SHADER; |
| default: |
| unreachable("unknown shader stage in nir_to_dxil"); |
| return DXIL_COMPUTE_SHADER; |
| } |
| } |
| |
| static unsigned |
| lower_bit_size_callback(const nir_instr* instr, void *data) |
| { |
| if (instr->type != nir_instr_type_alu) |
| return 0; |
| nir_alu_instr *alu = nir_instr_as_alu(instr); |
| |
| if (nir_op_infos[alu->op].is_conversion) |
| return 0; |
| |
| if (nir_op_is_vec_or_mov(alu->op)) |
| return 0; |
| |
| unsigned num_inputs = nir_op_infos[alu->op].num_inputs; |
| const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data; |
| unsigned min_bit_size = opts->lower_int16 ? 32 : 16; |
| |
| unsigned ret = 0; |
| for (unsigned i = 0; i < num_inputs; i++) { |
| unsigned bit_size = nir_src_bit_size(alu->src[i].src); |
| if (bit_size != 1 && bit_size < min_bit_size) |
| ret = min_bit_size; |
| } |
| |
| return ret; |
| } |
| |
| static bool |
| vectorize_filter( |
| unsigned align_mul, |
| unsigned align_offset, |
| unsigned bit_size, |
| unsigned num_components, |
| int64_t hole_size, |
| nir_intrinsic_instr *low, nir_intrinsic_instr *high, |
| void *data) |
| { |
| return hole_size <= 0 && util_is_power_of_two_nonzero(num_components); |
| } |
| |
| struct lower_mem_bit_sizes_data { |
| const nir_shader_compiler_options *nir_options; |
| const struct nir_to_dxil_options *dxil_options; |
| }; |
| |
| static nir_mem_access_size_align |
| lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin, |
| uint8_t bytes, |
| uint8_t bit_size_in, |
| uint32_t align_mul, |
| uint32_t align_offset, |
| bool offset_is_const, |
| enum gl_access_qualifier access, |
| const void *cb_data) |
| { |
| const struct lower_mem_bit_sizes_data *data = cb_data; |
| unsigned max_bit_size = 32; |
| unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16; |
| unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in)); |
| if (intrin == nir_intrinsic_load_ubo) { |
| /* UBO loads can be done at whatever (supported) bit size, but require 16 byte |
| * alignment and can load up to 16 bytes per instruction. However this pass requires |
| * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4 |
| * which can deal with unaligned vec4s, so for this pass let's just deal with bit size |
| * and total size restrictions. */ |
| return (nir_mem_access_size_align) { |
| .align = closest_bit_size / 8, |
| .bit_size = closest_bit_size, |
| .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size), |
| .shift = nir_mem_access_shift_method_scalar, |
| }; |
| } |
| |
| assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo); |
| uint32_t align = nir_combined_align(align_mul, align_offset); |
| if (align < min_bit_size / 8) { |
| /* Unaligned load/store, use the minimum bit size, up to 4 components */ |
| unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ? |
| DIV_ROUND_UP(bytes * 8, min_bit_size) : |
| (32 / min_bit_size); |
| return (nir_mem_access_size_align) { |
| .align = min_bit_size / 8, |
| .bit_size = min_bit_size, |
| .num_components = MIN2(4, ideal_num_components), |
| .shift = nir_mem_access_shift_method_scalar, |
| }; |
| } |
| |
| /* Increase/decrease bit size to try to get closer to the requested byte size/align */ |
| unsigned bit_size = closest_bit_size; |
| unsigned target = MIN2(bytes, align); |
| while (target < bit_size / 8 && bit_size > min_bit_size) |
| bit_size /= 2; |
| while (target > bit_size / 8 * 4 && bit_size < max_bit_size) |
| bit_size *= 2; |
| |
| /* This is the best we can do */ |
| unsigned num_components = intrin == nir_intrinsic_load_ssbo ? |
| DIV_ROUND_UP(bytes * 8, bit_size) : |
| MAX2(1, (bytes * 8 / bit_size)); |
| return (nir_mem_access_size_align) { |
| .align = bit_size / 8, |
| .bit_size = bit_size, |
| .num_components = MIN2(4, num_components), |
| .shift = nir_mem_access_shift_method_scalar, |
| }; |
| } |
| |
| static void |
| optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts) |
| { |
| bool progress; |
| do { |
| progress = false; |
| NIR_PASS_V(s, nir_lower_vars_to_ssa); |
| NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4); |
| NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); |
| NIR_PASS(progress, s, nir_copy_prop); |
| NIR_PASS(progress, s, nir_opt_copy_prop_vars); |
| NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts); |
| NIR_PASS(progress, s, dxil_nir_lower_8bit_conv); |
| if (opts->lower_int16) |
| NIR_PASS(progress, s, dxil_nir_lower_16bit_conv); |
| NIR_PASS(progress, s, nir_opt_remove_phis); |
| NIR_PASS(progress, s, nir_opt_dce); |
| NIR_PASS(progress, s, nir_opt_if, |
| nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis); |
| NIR_PASS(progress, s, nir_opt_dead_cf); |
| NIR_PASS(progress, s, nir_opt_cse); |
| NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); |
| NIR_PASS(progress, s, nir_opt_algebraic); |
| NIR_PASS(progress, s, dxil_nir_algebraic); |
| if (s->options->lower_int64_options) |
| NIR_PASS(progress, s, nir_lower_int64); |
| NIR_PASS(progress, s, nir_lower_alu); |
| NIR_PASS(progress, s, nir_opt_constant_folding); |
| NIR_PASS(progress, s, nir_opt_undef); |
| NIR_PASS(progress, s, nir_opt_deref); |
| NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16); |
| NIR_PASS(progress, s, nir_lower_64bit_phis); |
| NIR_PASS(progress, s, nir_lower_phis_to_scalar, true); |
| NIR_PASS(progress, s, nir_opt_loop_unroll); |
| NIR_PASS(progress, s, nir_lower_pack); |
| NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses); |
| NIR_PASS_V(s, nir_lower_system_values); |
| } while (progress); |
| |
| do { |
| progress = false; |
| NIR_PASS(progress, s, nir_opt_algebraic_late); |
| } while (progress); |
| |
| NIR_PASS_V(s, nir_lower_undef_to_zero); |
| } |
| |
| static |
| void dxil_fill_validation_state(struct ntd_context *ctx, |
| struct dxil_validation_state *state) |
| { |
| unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? |
| sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); |
| state->num_resources = ctx->resources.size / resource_element_size; |
| state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data; |
| if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) { |
| state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size; |
| state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size; |
| } else { |
| state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX; |
| } |
| state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind; |
| state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id; |
| state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs; |
| state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs; |
| state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts; |
| |
| switch (ctx->mod.shader_kind) { |
| case DXIL_VERTEX_SHADER: |
| state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position; |
| break; |
| case DXIL_PIXEL_SHADER: |
| /* TODO: handle depth outputs */ |
| state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth; |
| state->state.psv1.psv0.ps.sample_frequency = |
| ctx->mod.info.has_per_sample_input; |
| break; |
| case DXIL_COMPUTE_SHADER: |
| state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1); |
| state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1); |
| state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1); |
| break; |
| case DXIL_GEOMETRY_SHADER: |
| state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out; |
| state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive); |
| state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive); |
| state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1); |
| state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position; |
| break; |
| case DXIL_HULL_SHADER: |
| state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count; |
| state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out; |
| state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); |
| state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info); |
| state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; |
| break; |
| case DXIL_DOMAIN_SHADER: |
| state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out; |
| state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); |
| state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position; |
| state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; |
| break; |
| default: |
| assert(0 && "Shader type not (yet) supported"); |
| } |
| } |
| |
| static nir_variable * |
| add_sysvalue(struct ntd_context *ctx, |
| uint8_t value, char *name, |
| int driver_location) |
| { |
| |
| nir_variable *var = rzalloc(ctx->shader, nir_variable); |
| if (!var) |
| return NULL; |
| var->data.driver_location = driver_location; |
| var->data.location = value; |
| var->type = glsl_uint_type(); |
| var->name = name; |
| var->data.mode = nir_var_system_value; |
| var->data.interpolation = INTERP_MODE_FLAT; |
| return var; |
| } |
| |
| static bool |
| append_input_or_sysvalue(struct ntd_context *ctx, |
| int input_loc, int sv_slot, |
| char *name, int driver_location) |
| { |
| if (input_loc >= 0) { |
| /* Check inputs whether a variable is available the corresponds |
| * to the sysvalue */ |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { |
| if (var->data.location == input_loc) { |
| ctx->system_value[sv_slot] = var; |
| return true; |
| } |
| } |
| } |
| |
| ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location); |
| if (!ctx->system_value[sv_slot]) |
| return false; |
| |
| nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]); |
| return true; |
| } |
| |
| struct sysvalue_name { |
| gl_system_value value; |
| int slot; |
| char *name; |
| gl_shader_stage only_in_shader; |
| } possible_sysvalues[] = { |
| {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE}, |
| {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE}, |
| {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE}, |
| {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY}, |
| {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE}, |
| }; |
| |
| static bool |
| allocate_sysvalues(struct ntd_context *ctx) |
| { |
| unsigned driver_location = 0; |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) |
| driver_location = MAX2(driver_location, var->data.driver_location + 1); |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value) |
| driver_location = MAX2(driver_location, var->data.driver_location + 1); |
| |
| if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && |
| !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) { |
| bool need_sample_id = ctx->shader->info.fs.uses_sample_shading; |
| |
| /* "var->data.sample = true" sometimes just mean, "I want per-sample |
| * shading", which explains why we can end up with vars having flat |
| * interpolation with the per-sample bit set. If there's only such |
| * type of variables, we need to tell DXIL that we read SV_SampleIndex |
| * to make DXIL validation happy. |
| */ |
| nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { |
| bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT; |
| /* If there's an input that will actually force sample-rate shading, then we don't |
| * need SV_SampleIndex. */ |
| if (var->data.sample && var_can_be_sample_rate) { |
| need_sample_id = false; |
| break; |
| } |
| /* If there's an input that wants to be sample-rate, but can't be, then we might |
| * need SV_SampleIndex. */ |
| if (var->data.sample && !var_can_be_sample_rate) |
| need_sample_id = true; |
| } |
| |
| if (need_sample_id) |
| BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); |
| } |
| |
| for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) { |
| struct sysvalue_name *info = &possible_sysvalues[i]; |
| if (info->only_in_shader != MESA_SHADER_NONE && |
| info->only_in_shader != ctx->shader->info.stage) |
| continue; |
| if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) { |
| if (!append_input_or_sysvalue(ctx, info->slot, |
| info->value, info->name, |
| driver_location++)) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| static int |
| type_size_vec4(const struct glsl_type *type, bool bindless) |
| { |
| return glsl_count_attribute_slots(type, false); |
| } |
| |
| static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4; |
| static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8; |
| static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0; |
| static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8; |
| |
| bool |
| nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, |
| const struct dxil_logger *logger, struct blob *blob) |
| { |
| assert(opts); |
| bool retval = true; |
| debug_dxil = (int)debug_get_option_debug_dxil(); |
| blob_init(blob); |
| |
| if (opts->shader_model_max < dxil_min_shader_model) { |
| debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n", |
| dxil_min_shader_model >> 16, |
| dxil_min_shader_model & 0xffff); |
| return false; |
| } |
| |
| if (opts->shader_model_max > dxil_max_shader_model) { |
| debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n", |
| dxil_max_shader_model >> 16, |
| dxil_max_shader_model & 0xffff); |
| return false; |
| } |
| |
| if (opts->validator_version_max != NO_DXIL_VALIDATION && |
| opts->validator_version_max < dxil_validator_min_capable_version) { |
| debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n", |
| opts->validator_version_max >> 16, |
| opts->validator_version_max & 0xffff); |
| return false; |
| } |
| |
| /* If no validation, write a blob as if it was going to be validated by the newest understood validator. |
| * Same if the validator is newer than we know how to write for. |
| */ |
| uint32_t validator_version = |
| opts->validator_version_max == NO_DXIL_VALIDATION || |
| opts->validator_version_max > dxil_validator_max_capable_version ? |
| dxil_validator_max_capable_version : opts->validator_version_max; |
| |
| struct ntd_context *ctx = calloc(1, sizeof(*ctx)); |
| if (!ctx) |
| return false; |
| |
| ctx->opts = opts; |
| ctx->shader = s; |
| ctx->logger = logger ? logger : &default_logger; |
| |
| ctx->ralloc_ctx = ralloc_context(NULL); |
| if (!ctx->ralloc_ctx) { |
| retval = false; |
| goto out; |
| } |
| |
| util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx); |
| util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx); |
| util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx); |
| util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx); |
| util_dynarray_init(&ctx->resources, ctx->ralloc_ctx); |
| dxil_module_init(&ctx->mod, ctx->ralloc_ctx); |
| ctx->mod.shader_kind = get_dxil_shader_kind(s); |
| ctx->mod.major_version = 6; |
| /* Use the highest shader model that's supported and can be validated */ |
| ctx->mod.minor_version = |
| MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff); |
| ctx->mod.major_validator = validator_version >> 16; |
| ctx->mod.minor_validator = validator_version & 0xffff; |
| |
| if (s->info.stage <= MESA_SHADER_FRAGMENT) { |
| uint64_t in_mask = |
| s->info.stage == MESA_SHADER_VERTEX ? |
| 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); |
| uint64_t out_mask = |
| s->info.stage == MESA_SHADER_FRAGMENT ? |
| ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) : |
| (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); |
| |
| NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask); |
| } |
| |
| NIR_PASS_V(s, dxil_nir_lower_fquantize2f16); |
| NIR_PASS_V(s, nir_lower_frexp); |
| NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true); |
| NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); |
| NIR_PASS_V(s, dxil_nir_ensure_position_writes); |
| NIR_PASS_V(s, dxil_nir_lower_system_values); |
| NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL); |
| |
| /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads |
| * might be too opaque for the pass to see that they're next to each other. */ |
| optimize_nir(s, opts); |
| |
| /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better |
| * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll |
| * address them with lower_mem_access_bit_sizes */ |
| nir_load_store_vectorize_options vectorize_opts = { |
| .callback = vectorize_filter, |
| .modes = nir_var_mem_ubo | nir_var_mem_ssbo, |
| }; |
| NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts); |
| |
| /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for |
| * a single load/store op. */ |
| struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts }; |
| nir_lower_mem_access_bit_sizes_options mem_size_options = { |
| .modes = nir_var_mem_ubo | nir_var_mem_ssbo, |
| .callback = lower_mem_access_bit_sizes_cb, |
| .may_lower_unaligned_stores_to_atomics = true, |
| .cb_data = &mem_size_data |
| }; |
| NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options); |
| |
| /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub- |
| * components from the load and dealing with vec-straddling loads. */ |
| NIR_PASS_V(s, nir_lower_ubo_vec4); |
| |
| if (opts->shader_model_max < SHADER_MODEL_6_6) { |
| /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback, |
| * so both load- and is- will be emulated eventually. |
| */ |
| NIR_PASS_V(s, nir_lower_is_helper_invocation); |
| } |
| |
| if (ctx->mod.shader_kind == DXIL_HULL_SHADER) |
| NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func); |
| |
| if (ctx->mod.shader_kind == DXIL_HULL_SHADER || |
| ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { |
| /* Make sure any derefs are gone after lower_io before updating tess level vars */ |
| NIR_PASS_V(s, nir_opt_dce); |
| NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain); |
| } |
| |
| optimize_nir(s, opts); |
| |
| NIR_PASS_V(s, nir_remove_dead_variables, |
| nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL); |
| |
| if (!allocate_sysvalues(ctx)) |
| return false; |
| |
| NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value); |
| NIR_PASS_V(s, nir_opt_dce); |
| |
| /* This needs to be after any copy prop is done to prevent these movs from being erased */ |
| NIR_PASS_V(s, dxil_nir_move_consts); |
| NIR_PASS_V(s, nir_opt_dce); |
| |
| NIR_PASS_V(s, dxil_nir_guess_image_formats); |
| |
| if (debug_dxil & DXIL_DEBUG_VERBOSE) |
| nir_print_shader(s, stderr); |
| |
| if (!emit_module(ctx, opts)) { |
| debug_printf("D3D12: dxil_container_add_module failed\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) { |
| struct dxil_dumper *dumper = dxil_dump_create(); |
| dxil_dump_module(dumper, &ctx->mod); |
| fprintf(stderr, "\n"); |
| dxil_dump_buf_to_file(dumper, stderr); |
| fprintf(stderr, "\n\n"); |
| dxil_dump_free(dumper); |
| } |
| |
| struct dxil_container container; |
| dxil_container_init(&container); |
| /* Native low precision disables min-precision */ |
| if (ctx->mod.feats.native_low_precision) |
| ctx->mod.feats.min_precision = false; |
| if (!dxil_container_add_features(&container, &ctx->mod.feats)) { |
| debug_printf("D3D12: dxil_container_add_features failed\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if (!dxil_container_add_io_signature(&container, |
| DXIL_ISG1, |
| ctx->mod.num_sig_inputs, |
| ctx->mod.inputs, |
| ctx->mod.minor_validator >= 7)) { |
| debug_printf("D3D12: failed to write input signature\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if (!dxil_container_add_io_signature(&container, |
| DXIL_OSG1, |
| ctx->mod.num_sig_outputs, |
| ctx->mod.outputs, |
| ctx->mod.minor_validator >= 7)) { |
| debug_printf("D3D12: failed to write output signature\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if ((ctx->mod.shader_kind == DXIL_HULL_SHADER || |
| ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) && |
| !dxil_container_add_io_signature(&container, |
| DXIL_PSG1, |
| ctx->mod.num_sig_patch_consts, |
| ctx->mod.patch_consts, |
| ctx->mod.minor_validator >= 7)) { |
| debug_printf("D3D12: failed to write patch constant signature\n"); |
| retval = false; |
| goto out; |
| } |
| |
| struct dxil_validation_state validation_state; |
| memset(&validation_state, 0, sizeof(validation_state)); |
| dxil_fill_validation_state(ctx, &validation_state); |
| |
| if (!dxil_container_add_state_validation(&container,&ctx->mod, |
| &validation_state)) { |
| debug_printf("D3D12: failed to write state-validation\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if (!dxil_container_add_module(&container, &ctx->mod)) { |
| debug_printf("D3D12: failed to write module\n"); |
| retval = false; |
| goto out; |
| } |
| |
| if (!dxil_container_write(&container, blob)) { |
| debug_printf("D3D12: dxil_container_write failed\n"); |
| retval = false; |
| goto out; |
| } |
| dxil_container_finish(&container); |
| |
| if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) { |
| static int shader_id = 0; |
| char buffer[64]; |
| snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob", |
| get_shader_kind_str(ctx->mod.shader_kind), shader_id++); |
| debug_printf("Try to write blob to %s\n", buffer); |
| FILE *f = fopen(buffer, "wb"); |
| if (f) { |
| fwrite(blob->data, 1, blob->size, f); |
| fclose(f); |
| } |
| } |
| |
| out: |
| dxil_module_release(&ctx->mod); |
| ralloc_free(ctx->ralloc_ctx); |
| free(ctx); |
| return retval; |
| } |