| /* |
| * Copyright © 2017 Connor Abbott |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "nir_serialize.h" |
| #include "util/u_dynarray.h" |
| #include "util/u_math.h" |
| #include "nir_control_flow.h" |
| #include "nir_xfb_info.h" |
| |
| #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) |
| #define MAX_OBJECT_IDS (1 << 20) |
| |
| typedef struct { |
| size_t blob_offset; |
| nir_def *src; |
| nir_block *block; |
| } write_phi_fixup; |
| |
| typedef struct { |
| const nir_shader *nir; |
| |
| struct blob *blob; |
| |
| /* maps pointer to index */ |
| struct hash_table *remap_table; |
| |
| /* the next index to assign to a NIR in-memory object */ |
| uint32_t next_idx; |
| |
| /* Array of write_phi_fixup structs representing phi sources that need to |
| * be resolved in the second pass. |
| */ |
| struct util_dynarray phi_fixups; |
| |
| /* The last serialized type. */ |
| const struct glsl_type *last_type; |
| const struct glsl_type *last_interface_type; |
| struct nir_variable_data last_var_data; |
| |
| /* For skipping equal ALU headers (typical after scalarization). */ |
| nir_instr_type last_instr_type; |
| uintptr_t last_alu_header_offset; |
| uint32_t last_alu_header; |
| |
| /* Don't write optional data such as variable names. */ |
| bool strip; |
| } write_ctx; |
| |
| typedef struct { |
| nir_shader *nir; |
| |
| struct blob_reader *blob; |
| |
| /* the next index to assign to a NIR in-memory object */ |
| uint32_t next_idx; |
| |
| /* The length of the index -> object table */ |
| uint32_t idx_table_len; |
| |
| /* map from index to deserialized pointer */ |
| void **idx_table; |
| |
| /* List of phi sources. */ |
| struct list_head phi_srcs; |
| |
| /* The last deserialized type. */ |
| const struct glsl_type *last_type; |
| const struct glsl_type *last_interface_type; |
| struct nir_variable_data last_var_data; |
| } read_ctx; |
| |
| static void |
| write_add_object(write_ctx *ctx, const void *obj) |
| { |
| uint32_t index = ctx->next_idx++; |
| assert(index != MAX_OBJECT_IDS); |
| _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index); |
| } |
| |
| static uint32_t |
| write_lookup_object(write_ctx *ctx, const void *obj) |
| { |
| struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); |
| assert(entry); |
| return (uint32_t)(uintptr_t)entry->data; |
| } |
| |
| static void |
| read_add_object(read_ctx *ctx, void *obj) |
| { |
| assert(ctx->next_idx < ctx->idx_table_len); |
| ctx->idx_table[ctx->next_idx++] = obj; |
| } |
| |
| static void * |
| read_lookup_object(read_ctx *ctx, uint32_t idx) |
| { |
| assert(idx < ctx->idx_table_len); |
| return ctx->idx_table[idx]; |
| } |
| |
| static void * |
| read_object(read_ctx *ctx) |
| { |
| return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); |
| } |
| |
| static uint32_t |
| encode_bit_size_3bits(uint8_t bit_size) |
| { |
| /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ |
| assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); |
| if (bit_size) |
| return util_logbase2(bit_size) + 1; |
| return 0; |
| } |
| |
| static uint8_t |
| decode_bit_size_3bits(uint8_t bit_size) |
| { |
| if (bit_size) |
| return 1 << (bit_size - 1); |
| return 0; |
| } |
| |
| #define NUM_COMPONENTS_IS_SEPARATE_7 7 |
| |
| static uint8_t |
| encode_num_components_in_3bits(uint8_t num_components) |
| { |
| if (num_components <= 4) |
| return num_components; |
| if (num_components == 8) |
| return 5; |
| if (num_components == 16) |
| return 6; |
| |
| /* special value indicating that num_components is in the next uint32 */ |
| return NUM_COMPONENTS_IS_SEPARATE_7; |
| } |
| |
| static uint8_t |
| decode_num_components_in_3bits(uint8_t value) |
| { |
| if (value <= 4) |
| return value; |
| if (value == 5) |
| return 8; |
| if (value == 6) |
| return 16; |
| |
| unreachable("invalid num_components encoding"); |
| return 0; |
| } |
| |
| static void |
| write_constant(write_ctx *ctx, const nir_constant *c) |
| { |
| blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); |
| blob_write_uint32(ctx->blob, c->num_elements); |
| for (unsigned i = 0; i < c->num_elements; i++) |
| write_constant(ctx, c->elements[i]); |
| } |
| |
| static nir_constant * |
| read_constant(read_ctx *ctx, nir_variable *nvar) |
| { |
| nir_constant *c = ralloc(nvar, nir_constant); |
| |
| static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 }; |
| blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); |
| c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0; |
| c->num_elements = blob_read_uint32(ctx->blob); |
| c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); |
| for (unsigned i = 0; i < c->num_elements; i++) { |
| c->elements[i] = read_constant(ctx, nvar); |
| c->is_null_constant &= c->elements[i]->is_null_constant; |
| } |
| |
| return c; |
| } |
| |
| enum var_data_encoding { |
| var_encode_full, |
| var_encode_location_diff, |
| }; |
| |
| union packed_var { |
| uint32_t u32; |
| struct { |
| unsigned has_name : 1; |
| unsigned has_constant_initializer : 1; |
| unsigned has_pointer_initializer : 1; |
| unsigned has_interface_type : 1; |
| unsigned num_state_slots : 7; |
| unsigned data_encoding : 2; |
| unsigned type_same_as_last : 1; |
| unsigned interface_type_same_as_last : 1; |
| unsigned ray_query : 1; |
| unsigned num_members : 16; |
| } u; |
| }; |
| |
| union packed_var_data_diff { |
| uint32_t u32; |
| struct { |
| int location : 13; |
| int location_frac : 3; |
| int driver_location : 16; |
| } u; |
| }; |
| |
| static void |
| write_variable(write_ctx *ctx, const nir_variable *var) |
| { |
| write_add_object(ctx, var); |
| |
| assert(var->num_state_slots < (1 << 7)); |
| |
| STATIC_ASSERT(sizeof(union packed_var) == 4); |
| union packed_var flags; |
| flags.u32 = 0; |
| |
| flags.u.has_name = !ctx->strip && var->name; |
| flags.u.has_constant_initializer = !!(var->constant_initializer); |
| flags.u.has_pointer_initializer = !!(var->pointer_initializer); |
| flags.u.has_interface_type = !!(var->interface_type); |
| flags.u.type_same_as_last = var->type == ctx->last_type; |
| flags.u.interface_type_same_as_last = |
| var->interface_type && var->interface_type == ctx->last_interface_type; |
| flags.u.num_state_slots = var->num_state_slots; |
| flags.u.num_members = var->num_members; |
| |
| struct nir_variable_data data = var->data; |
| |
| /* When stripping, we expect that the location is no longer needed, |
| * which is typically after shaders are linked. |
| */ |
| if (ctx->strip && |
| data.mode != nir_var_system_value && |
| data.mode != nir_var_shader_in && |
| data.mode != nir_var_shader_out) |
| data.location = 0; |
| |
| struct nir_variable_data tmp = data; |
| |
| tmp.location = ctx->last_var_data.location; |
| tmp.location_frac = ctx->last_var_data.location_frac; |
| tmp.driver_location = ctx->last_var_data.driver_location; |
| |
| /* See if we can encode only the difference in locations from the last |
| * variable. |
| */ |
| if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && |
| abs((int)data.location - |
| (int)ctx->last_var_data.location) < (1 << 12) && |
| abs((int)data.driver_location - |
| (int)ctx->last_var_data.driver_location) < (1 << 15)) |
| flags.u.data_encoding = var_encode_location_diff; |
| else |
| flags.u.data_encoding = var_encode_full; |
| |
| flags.u.ray_query = var->data.ray_query; |
| |
| blob_write_uint32(ctx->blob, flags.u32); |
| |
| if (!flags.u.type_same_as_last) { |
| encode_type_to_blob(ctx->blob, var->type); |
| ctx->last_type = var->type; |
| } |
| |
| if (var->interface_type && !flags.u.interface_type_same_as_last) { |
| encode_type_to_blob(ctx->blob, var->interface_type); |
| ctx->last_interface_type = var->interface_type; |
| } |
| |
| if (flags.u.has_name) |
| blob_write_string(ctx->blob, var->name); |
| |
| if (flags.u.data_encoding == var_encode_full) { |
| blob_write_bytes(ctx->blob, &data, sizeof(data)); |
| } else { |
| /* Serialize only the difference in locations from the last variable. |
| */ |
| union packed_var_data_diff diff; |
| |
| diff.u.location = data.location - ctx->last_var_data.location; |
| diff.u.location_frac = data.location_frac - |
| ctx->last_var_data.location_frac; |
| diff.u.driver_location = data.driver_location - |
| ctx->last_var_data.driver_location; |
| |
| blob_write_uint32(ctx->blob, diff.u32); |
| } |
| |
| ctx->last_var_data = data; |
| |
| for (unsigned i = 0; i < var->num_state_slots; i++) { |
| blob_write_bytes(ctx->blob, &var->state_slots[i], |
| sizeof(var->state_slots[i])); |
| } |
| if (var->constant_initializer) |
| write_constant(ctx, var->constant_initializer); |
| if (var->pointer_initializer) |
| blob_write_uint32(ctx->blob, |
| write_lookup_object(ctx, var->pointer_initializer)); |
| if (var->num_members > 0) { |
| blob_write_bytes(ctx->blob, (uint8_t *)var->members, |
| var->num_members * sizeof(*var->members)); |
| } |
| } |
| |
| static nir_variable * |
| read_variable(read_ctx *ctx) |
| { |
| nir_variable *var = rzalloc(ctx->nir, nir_variable); |
| read_add_object(ctx, var); |
| |
| union packed_var flags; |
| flags.u32 = blob_read_uint32(ctx->blob); |
| |
| if (flags.u.type_same_as_last) { |
| var->type = ctx->last_type; |
| } else { |
| var->type = decode_type_from_blob(ctx->blob); |
| ctx->last_type = var->type; |
| } |
| |
| if (flags.u.has_interface_type) { |
| if (flags.u.interface_type_same_as_last) { |
| var->interface_type = ctx->last_interface_type; |
| } else { |
| var->interface_type = decode_type_from_blob(ctx->blob); |
| ctx->last_interface_type = var->interface_type; |
| } |
| } |
| |
| if (flags.u.has_name) { |
| const char *name = blob_read_string(ctx->blob); |
| var->name = ralloc_strdup(var, name); |
| } else { |
| var->name = NULL; |
| } |
| |
| if (flags.u.data_encoding == var_encode_full) { |
| blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data)); |
| ctx->last_var_data = var->data; |
| } else { /* var_encode_location_diff */ |
| union packed_var_data_diff diff; |
| diff.u32 = blob_read_uint32(ctx->blob); |
| |
| var->data = ctx->last_var_data; |
| var->data.location += diff.u.location; |
| var->data.location_frac += diff.u.location_frac; |
| var->data.driver_location += diff.u.driver_location; |
| |
| ctx->last_var_data = var->data; |
| } |
| |
| var->data.ray_query = flags.u.ray_query; |
| |
| var->num_state_slots = flags.u.num_state_slots; |
| if (var->num_state_slots != 0) { |
| var->state_slots = ralloc_array(var, nir_state_slot, |
| var->num_state_slots); |
| for (unsigned i = 0; i < var->num_state_slots; i++) { |
| blob_copy_bytes(ctx->blob, &var->state_slots[i], |
| sizeof(var->state_slots[i])); |
| } |
| } |
| if (flags.u.has_constant_initializer) |
| var->constant_initializer = read_constant(ctx, var); |
| else |
| var->constant_initializer = NULL; |
| |
| if (flags.u.has_pointer_initializer) |
| var->pointer_initializer = read_object(ctx); |
| else |
| var->pointer_initializer = NULL; |
| |
| var->num_members = flags.u.num_members; |
| if (var->num_members > 0) { |
| var->members = ralloc_array(var, struct nir_variable_data, |
| var->num_members); |
| blob_copy_bytes(ctx->blob, (uint8_t *)var->members, |
| var->num_members * sizeof(*var->members)); |
| } |
| |
| return var; |
| } |
| |
| static void |
| write_var_list(write_ctx *ctx, const struct exec_list *src) |
| { |
| blob_write_uint32(ctx->blob, exec_list_length(src)); |
| foreach_list_typed(nir_variable, var, node, src) { |
| write_variable(ctx, var); |
| } |
| } |
| |
| static void |
| read_var_list(read_ctx *ctx, struct exec_list *dst) |
| { |
| exec_list_make_empty(dst); |
| unsigned num_vars = blob_read_uint32(ctx->blob); |
| for (unsigned i = 0; i < num_vars; i++) { |
| nir_variable *var = read_variable(ctx); |
| exec_list_push_tail(dst, &var->node); |
| } |
| } |
| |
| union packed_src { |
| uint32_t u32; |
| struct { |
| unsigned _pad : 2; /* <-- Header */ |
| unsigned object_idx : 20; |
| unsigned _footer : 10; /* <-- Footer */ |
| } any; |
| struct { |
| unsigned _header : 22; /* <-- Header */ |
| unsigned _pad : 2; /* <-- Footer */ |
| unsigned swizzle_x : 2; |
| unsigned swizzle_y : 2; |
| unsigned swizzle_z : 2; |
| unsigned swizzle_w : 2; |
| } alu; |
| struct { |
| unsigned _header : 22; /* <-- Header */ |
| unsigned src_type : 5; /* <-- Footer */ |
| unsigned _pad : 5; |
| } tex; |
| }; |
| |
| static void |
| write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) |
| { |
| header.any.object_idx = write_lookup_object(ctx, src->ssa); |
| blob_write_uint32(ctx->blob, header.u32); |
| } |
| |
| static void |
| write_src(write_ctx *ctx, const nir_src *src) |
| { |
| union packed_src header = { 0 }; |
| write_src_full(ctx, src, header); |
| } |
| |
| static union packed_src |
| read_src(read_ctx *ctx, nir_src *src) |
| { |
| STATIC_ASSERT(sizeof(union packed_src) == 4); |
| union packed_src header; |
| header.u32 = blob_read_uint32(ctx->blob); |
| |
| src->ssa = read_lookup_object(ctx, header.any.object_idx); |
| return header; |
| } |
| |
| union packed_def { |
| uint8_t u8; |
| struct { |
| uint8_t num_components : 3; |
| uint8_t bit_size : 3; |
| uint8_t divergent : 1; |
| uint8_t loop_invariant : 1; |
| }; |
| }; |
| |
| enum intrinsic_const_indices_encoding { |
| /* Use packed_const_indices to store tightly packed indices. |
| * |
| * The common case for load_ubo is 0, 0, 0, which is trivially represented. |
| * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 |
| */ |
| const_indices_all_combined, |
| |
| const_indices_8bit, /* 8 bits per element */ |
| const_indices_16bit, /* 16 bits per element */ |
| const_indices_32bit, /* 32 bits per element */ |
| }; |
| |
| enum load_const_packing { |
| /* Constants are not packed and are stored in following dwords. */ |
| load_const_full, |
| |
| /* packed_value contains high 19 bits, low bits are 0, |
| * good for floating-point decimals |
| */ |
| load_const_scalar_hi_19bits, |
| |
| /* packed_value contains low 19 bits, high bits are sign-extended */ |
| load_const_scalar_lo_19bits_sext, |
| }; |
| |
| union packed_instr { |
| uint32_t u32; |
| struct { |
| unsigned instr_type : 4; /* always present */ |
| unsigned _pad : 20; |
| unsigned def : 8; /* always last */ |
| } any; |
| struct { |
| unsigned instr_type : 4; |
| unsigned exact : 1; |
| unsigned no_signed_wrap : 1; |
| unsigned no_unsigned_wrap : 1; |
| unsigned padding : 1; |
| /* Swizzles for 2 srcs */ |
| unsigned two_swizzles : 4; |
| unsigned op : 9; |
| unsigned packed_src_ssa_16bit : 1; |
| /* Scalarized ALUs always have the same header. */ |
| unsigned num_followup_alu_sharing_header : 2; |
| unsigned def : 8; |
| } alu; |
| struct { |
| unsigned instr_type : 4; |
| unsigned deref_type : 3; |
| unsigned cast_type_same_as_last : 1; |
| unsigned modes : 6; /* See (de|en)code_deref_modes() */ |
| unsigned _pad : 8; |
| unsigned in_bounds : 1; |
| unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */ |
| unsigned def : 8; |
| } deref; |
| struct { |
| unsigned instr_type : 4; |
| unsigned deref_type : 3; |
| unsigned _pad : 1; |
| unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */ |
| unsigned def : 8; |
| } deref_var; |
| struct { |
| unsigned instr_type : 4; |
| unsigned intrinsic : 10; |
| unsigned const_indices_encoding : 2; |
| unsigned packed_const_indices : 8; |
| unsigned def : 8; |
| } intrinsic; |
| struct { |
| unsigned instr_type : 4; |
| unsigned last_component : 4; |
| unsigned bit_size : 3; |
| unsigned packing : 2; /* enum load_const_packing */ |
| unsigned packed_value : 19; /* meaning determined by packing */ |
| } load_const; |
| struct { |
| unsigned instr_type : 4; |
| unsigned last_component : 4; |
| unsigned bit_size : 3; |
| unsigned _pad : 21; |
| } undef; |
| struct { |
| unsigned instr_type : 4; |
| unsigned num_srcs : 4; |
| unsigned op : 5; |
| unsigned _pad : 11; |
| unsigned def : 8; |
| } tex; |
| struct { |
| unsigned instr_type : 4; |
| unsigned num_srcs : 20; |
| unsigned def : 8; |
| } phi; |
| struct { |
| unsigned instr_type : 4; |
| unsigned type : 2; |
| unsigned _pad : 26; |
| } jump; |
| struct { |
| unsigned instr_type : 4; |
| unsigned type : 4; |
| unsigned string_length : 16; |
| unsigned def : 8; |
| } debug_info; |
| }; |
| |
| /* Write "lo24" as low 24 bits in the first uint32. */ |
| static void |
| write_def(write_ctx *ctx, const nir_def *def, union packed_instr header, |
| nir_instr_type instr_type) |
| { |
| STATIC_ASSERT(sizeof(union packed_def) == 1); |
| union packed_def pdef; |
| pdef.u8 = 0; |
| |
| pdef.num_components = |
| encode_num_components_in_3bits(def->num_components); |
| pdef.bit_size = encode_bit_size_3bits(def->bit_size); |
| pdef.divergent = def->divergent; |
| pdef.loop_invariant = def->loop_invariant; |
| header.any.def = pdef.u8; |
| |
| /* Check if the current ALU instruction has the same header as the previous |
| * instruction that is also ALU. If it is, we don't have to write |
| * the current header. This is a typical occurence after scalarization. |
| */ |
| if (instr_type == nir_instr_type_alu) { |
| bool equal_header = false; |
| |
| if (ctx->last_instr_type == nir_instr_type_alu) { |
| assert(ctx->last_alu_header_offset); |
| union packed_instr last_header; |
| last_header.u32 = ctx->last_alu_header; |
| |
| /* Clear the field that counts ALUs with equal headers. */ |
| union packed_instr clean_header; |
| clean_header.u32 = last_header.u32; |
| clean_header.alu.num_followup_alu_sharing_header = 0; |
| |
| /* There can be at most 4 consecutive ALU instructions |
| * sharing the same header. |
| */ |
| if (last_header.alu.num_followup_alu_sharing_header < 3 && |
| header.u32 == clean_header.u32) { |
| last_header.alu.num_followup_alu_sharing_header++; |
| blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, |
| last_header.u32); |
| ctx->last_alu_header = last_header.u32; |
| equal_header = true; |
| } |
| } |
| |
| if (!equal_header) { |
| ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob); |
| blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32); |
| ctx->last_alu_header = header.u32; |
| } |
| } else { |
| blob_write_uint32(ctx->blob, header.u32); |
| } |
| |
| if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7) |
| blob_write_uint32(ctx->blob, def->num_components); |
| |
| write_add_object(ctx, def); |
| } |
| |
| static void |
| read_def(read_ctx *ctx, nir_def *def, nir_instr *instr, |
| union packed_instr header) |
| { |
| union packed_def pdef; |
| pdef.u8 = header.any.def; |
| |
| unsigned bit_size = decode_bit_size_3bits(pdef.bit_size); |
| unsigned num_components; |
| if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7) |
| num_components = blob_read_uint32(ctx->blob); |
| else |
| num_components = decode_num_components_in_3bits(pdef.num_components); |
| nir_def_init(instr, def, num_components, bit_size); |
| def->divergent = pdef.divergent; |
| def->loop_invariant = pdef.loop_invariant; |
| read_add_object(ctx, def); |
| } |
| |
| static bool |
| are_object_ids_16bit(write_ctx *ctx) |
| { |
| /* Check the highest object ID, because they are monotonic. */ |
| return ctx->next_idx < (1 << 16); |
| } |
| |
| static bool |
| is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) |
| { |
| unsigned num_srcs = nir_op_infos[alu->op].num_inputs; |
| |
| for (unsigned i = 0; i < num_srcs; i++) { |
| unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); |
| |
| for (unsigned chan = 0; chan < src_components; chan++) { |
| /* The swizzles for src0.x and src1.x are stored |
| * in two_swizzles for SSA ALUs. |
| */ |
| if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4) |
| continue; |
| |
| if (alu->src[i].swizzle[chan] != chan) |
| return false; |
| } |
| } |
| |
| return are_object_ids_16bit(ctx); |
| } |
| |
| static void |
| write_alu(write_ctx *ctx, const nir_alu_instr *alu) |
| { |
| unsigned num_srcs = nir_op_infos[alu->op].num_inputs; |
| |
| /* 9 bits for nir_op */ |
| STATIC_ASSERT(nir_num_opcodes <= 512); |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.alu.instr_type = alu->instr.type; |
| header.alu.exact = alu->exact; |
| header.alu.no_signed_wrap = alu->no_signed_wrap; |
| header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; |
| header.alu.op = alu->op; |
| header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); |
| |
| if (header.alu.packed_src_ssa_16bit) { |
| /* For packed srcs of SSA ALUs, this field stores the swizzles. */ |
| header.alu.two_swizzles = alu->src[0].swizzle[0]; |
| if (num_srcs > 1) |
| header.alu.two_swizzles |= alu->src[1].swizzle[0] << 2; |
| } |
| |
| write_def(ctx, &alu->def, header, alu->instr.type); |
| blob_write_uint32(ctx->blob, alu->fp_fast_math); |
| |
| if (header.alu.packed_src_ssa_16bit) { |
| for (unsigned i = 0; i < num_srcs; i++) { |
| unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); |
| assert(idx < (1 << 16)); |
| blob_write_uint16(ctx->blob, idx); |
| } |
| } else { |
| for (unsigned i = 0; i < num_srcs; i++) { |
| unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); |
| unsigned src_components = nir_src_num_components(alu->src[i].src); |
| union packed_src src; |
| bool packed = src_components <= 4 && src_channels <= 4; |
| src.u32 = 0; |
| |
| if (packed) { |
| src.alu.swizzle_x = alu->src[i].swizzle[0]; |
| src.alu.swizzle_y = alu->src[i].swizzle[1]; |
| src.alu.swizzle_z = alu->src[i].swizzle[2]; |
| src.alu.swizzle_w = alu->src[i].swizzle[3]; |
| } |
| |
| write_src_full(ctx, &alu->src[i].src, src); |
| |
| /* Store swizzles for vec8 and vec16. */ |
| if (!packed) { |
| for (unsigned o = 0; o < src_channels; o += 8) { |
| unsigned value = 0; |
| |
| for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { |
| value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */ |
| } |
| |
| blob_write_uint32(ctx->blob, value); |
| } |
| } |
| } |
| } |
| } |
| |
| static nir_alu_instr * |
| read_alu(read_ctx *ctx, union packed_instr header) |
| { |
| unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; |
| nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); |
| |
| alu->exact = header.alu.exact; |
| alu->no_signed_wrap = header.alu.no_signed_wrap; |
| alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; |
| |
| read_def(ctx, &alu->def, &alu->instr, header); |
| alu->fp_fast_math = blob_read_uint32(ctx->blob); |
| |
| if (header.alu.packed_src_ssa_16bit) { |
| for (unsigned i = 0; i < num_srcs; i++) { |
| nir_alu_src *src = &alu->src[i]; |
| src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); |
| |
| memset(&src->swizzle, 0, sizeof(src->swizzle)); |
| |
| unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); |
| |
| for (unsigned chan = 0; chan < src_components; chan++) |
| src->swizzle[chan] = chan; |
| } |
| } else { |
| for (unsigned i = 0; i < num_srcs; i++) { |
| union packed_src src = read_src(ctx, &alu->src[i].src); |
| unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); |
| unsigned src_components = nir_src_num_components(alu->src[i].src); |
| bool packed = src_components <= 4 && src_channels <= 4; |
| |
| memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); |
| |
| if (packed) { |
| alu->src[i].swizzle[0] = src.alu.swizzle_x; |
| alu->src[i].swizzle[1] = src.alu.swizzle_y; |
| alu->src[i].swizzle[2] = src.alu.swizzle_z; |
| alu->src[i].swizzle[3] = src.alu.swizzle_w; |
| } else { |
| /* Load swizzles for vec8 and vec16. */ |
| for (unsigned o = 0; o < src_channels; o += 8) { |
| unsigned value = blob_read_uint32(ctx->blob); |
| |
| for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { |
| alu->src[i].swizzle[o + j] = |
| (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ |
| } |
| } |
| } |
| } |
| } |
| |
| if (header.alu.packed_src_ssa_16bit) { |
| alu->src[0].swizzle[0] = header.alu.two_swizzles & 0x3; |
| if (num_srcs > 1) |
| alu->src[1].swizzle[0] = header.alu.two_swizzles >> 2; |
| } |
| |
| return alu; |
| } |
| |
| #define NUM_GENERIC_MODES 4 |
| #define MODE_ENC_GENERIC_BIT (1 << 5) |
| |
| static nir_variable_mode |
| decode_deref_modes(unsigned modes) |
| { |
| if (modes & MODE_ENC_GENERIC_BIT) { |
| modes &= ~MODE_ENC_GENERIC_BIT; |
| return modes << (ffs(nir_var_mem_generic) - 1); |
| } else { |
| return 1 << modes; |
| } |
| } |
| |
| static unsigned |
| encode_deref_modes(nir_variable_mode modes) |
| { |
| /* Mode sets on derefs generally come in two forms. For certain OpenCL |
| * cases, we can have more than one of the generic modes set. In this |
| * case, we need the full bitfield. Fortunately, there are only 4 of |
| * these. For all other modes, we can only have one mode at a time so we |
| * can compress them by only storing the bit position. This, plus one bit |
| * to select encoding, lets us pack the entire bitfield in 6 bits. |
| */ |
| |
| /* Assert that the modes we are compressing fit along with the generic bit |
| */ |
| STATIC_ASSERT((nir_num_variable_modes - NUM_GENERIC_MODES) < |
| MODE_ENC_GENERIC_BIT); |
| |
| /* Assert that the generic modes are defined at the end of the modes enum |
| */ |
| STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) < |
| (1 << (nir_num_variable_modes - NUM_GENERIC_MODES))); |
| |
| unsigned enc; |
| if (modes == 0 || (modes & nir_var_mem_generic)) { |
| assert(!(modes & ~nir_var_mem_generic)); |
| enc = modes >> (ffs(nir_var_mem_generic) - 1); |
| assert(enc < MODE_ENC_GENERIC_BIT); |
| enc |= MODE_ENC_GENERIC_BIT; |
| } else { |
| assert(util_is_power_of_two_nonzero(modes)); |
| enc = ffs(modes) - 1; |
| assert(enc < MODE_ENC_GENERIC_BIT); |
| } |
| assert(modes == decode_deref_modes(enc)); |
| return enc; |
| } |
| |
| static void |
| write_deref(write_ctx *ctx, const nir_deref_instr *deref) |
| { |
| assert(deref->deref_type < 8); |
| |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.deref.instr_type = deref->instr.type; |
| header.deref.deref_type = deref->deref_type; |
| |
| if (deref->deref_type == nir_deref_type_cast) { |
| header.deref.modes = encode_deref_modes(deref->modes); |
| header.deref.cast_type_same_as_last = deref->type == ctx->last_type; |
| } |
| |
| unsigned var_idx = 0; |
| if (deref->deref_type == nir_deref_type_var) { |
| var_idx = write_lookup_object(ctx, deref->var); |
| if (var_idx && var_idx < (1 << 16)) |
| header.deref_var.object_idx = var_idx; |
| } |
| |
| if (deref->deref_type == nir_deref_type_array || |
| deref->deref_type == nir_deref_type_ptr_as_array) { |
| header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx); |
| |
| header.deref.in_bounds = deref->arr.in_bounds; |
| } |
| |
| write_def(ctx, &deref->def, header, deref->instr.type); |
| |
| switch (deref->deref_type) { |
| case nir_deref_type_var: |
| if (!header.deref_var.object_idx) |
| blob_write_uint32(ctx->blob, var_idx); |
| break; |
| |
| case nir_deref_type_struct: |
| write_src(ctx, &deref->parent); |
| blob_write_uint32(ctx->blob, deref->strct.index); |
| break; |
| |
| case nir_deref_type_array: |
| case nir_deref_type_ptr_as_array: |
| if (header.deref.packed_src_ssa_16bit) { |
| blob_write_uint16(ctx->blob, |
| write_lookup_object(ctx, deref->parent.ssa)); |
| blob_write_uint16(ctx->blob, |
| write_lookup_object(ctx, deref->arr.index.ssa)); |
| } else { |
| write_src(ctx, &deref->parent); |
| write_src(ctx, &deref->arr.index); |
| } |
| break; |
| |
| case nir_deref_type_cast: |
| write_src(ctx, &deref->parent); |
| blob_write_uint32(ctx->blob, deref->cast.ptr_stride); |
| blob_write_uint32(ctx->blob, deref->cast.align_mul); |
| blob_write_uint32(ctx->blob, deref->cast.align_offset); |
| if (!header.deref.cast_type_same_as_last) { |
| encode_type_to_blob(ctx->blob, deref->type); |
| ctx->last_type = deref->type; |
| } |
| break; |
| |
| case nir_deref_type_array_wildcard: |
| write_src(ctx, &deref->parent); |
| break; |
| |
| default: |
| unreachable("Invalid deref type"); |
| } |
| } |
| |
| static nir_deref_instr * |
| read_deref(read_ctx *ctx, union packed_instr header) |
| { |
| nir_deref_type deref_type = header.deref.deref_type; |
| nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); |
| |
| read_def(ctx, &deref->def, &deref->instr, header); |
| |
| nir_deref_instr *parent; |
| |
| switch (deref->deref_type) { |
| case nir_deref_type_var: |
| if (header.deref_var.object_idx) |
| deref->var = read_lookup_object(ctx, header.deref_var.object_idx); |
| else |
| deref->var = read_object(ctx); |
| |
| deref->type = deref->var->type; |
| break; |
| |
| case nir_deref_type_struct: |
| read_src(ctx, &deref->parent); |
| parent = nir_src_as_deref(deref->parent); |
| deref->strct.index = blob_read_uint32(ctx->blob); |
| deref->type = glsl_get_struct_field(parent->type, deref->strct.index); |
| break; |
| |
| case nir_deref_type_array: |
| case nir_deref_type_ptr_as_array: |
| if (header.deref.packed_src_ssa_16bit) { |
| deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); |
| deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); |
| } else { |
| read_src(ctx, &deref->parent); |
| read_src(ctx, &deref->arr.index); |
| } |
| |
| deref->arr.in_bounds = header.deref.in_bounds; |
| |
| parent = nir_src_as_deref(deref->parent); |
| if (deref->deref_type == nir_deref_type_array) |
| deref->type = glsl_get_array_element(parent->type); |
| else |
| deref->type = parent->type; |
| break; |
| |
| case nir_deref_type_cast: |
| read_src(ctx, &deref->parent); |
| deref->cast.ptr_stride = blob_read_uint32(ctx->blob); |
| deref->cast.align_mul = blob_read_uint32(ctx->blob); |
| deref->cast.align_offset = blob_read_uint32(ctx->blob); |
| if (header.deref.cast_type_same_as_last) { |
| deref->type = ctx->last_type; |
| } else { |
| deref->type = decode_type_from_blob(ctx->blob); |
| ctx->last_type = deref->type; |
| } |
| break; |
| |
| case nir_deref_type_array_wildcard: |
| read_src(ctx, &deref->parent); |
| parent = nir_src_as_deref(deref->parent); |
| deref->type = glsl_get_array_element(parent->type); |
| break; |
| |
| default: |
| unreachable("Invalid deref type"); |
| } |
| |
| if (deref_type == nir_deref_type_var) { |
| deref->modes = deref->var->data.mode; |
| } else if (deref->deref_type == nir_deref_type_cast) { |
| deref->modes = decode_deref_modes(header.deref.modes); |
| } else { |
| deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; |
| } |
| |
| return deref; |
| } |
| |
| static void |
| write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) |
| { |
| /* 10 bits for nir_intrinsic_op */ |
| STATIC_ASSERT(nir_num_intrinsics <= 1024); |
| unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; |
| unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; |
| assert(intrin->intrinsic < 1024); |
| |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.intrinsic.instr_type = intrin->instr.type; |
| header.intrinsic.intrinsic = intrin->intrinsic; |
| |
| /* Analyze constant indices to decide how to encode them. */ |
| if (num_indices) { |
| unsigned max_bits = 0; |
| for (unsigned i = 0; i < num_indices; i++) { |
| unsigned max = util_last_bit(intrin->const_index[i]); |
| max_bits = MAX2(max_bits, max); |
| } |
| |
| if (max_bits * num_indices <= 8) { |
| header.intrinsic.const_indices_encoding = const_indices_all_combined; |
| |
| /* Pack all const indices into 8 bits. */ |
| unsigned bit_size = 8 / num_indices; |
| for (unsigned i = 0; i < num_indices; i++) { |
| header.intrinsic.packed_const_indices |= |
| intrin->const_index[i] << (i * bit_size); |
| } |
| } else if (max_bits <= 8) |
| header.intrinsic.const_indices_encoding = const_indices_8bit; |
| else if (max_bits <= 16) |
| header.intrinsic.const_indices_encoding = const_indices_16bit; |
| else |
| header.intrinsic.const_indices_encoding = const_indices_32bit; |
| } |
| |
| if (nir_intrinsic_infos[intrin->intrinsic].has_dest) |
| write_def(ctx, &intrin->def, header, intrin->instr.type); |
| else |
| blob_write_uint32(ctx->blob, header.u32); |
| |
| for (unsigned i = 0; i < num_srcs; i++) |
| write_src(ctx, &intrin->src[i]); |
| |
| if (num_indices) { |
| switch (header.intrinsic.const_indices_encoding) { |
| case const_indices_8bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| blob_write_uint8(ctx->blob, intrin->const_index[i]); |
| break; |
| case const_indices_16bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| blob_write_uint16(ctx->blob, intrin->const_index[i]); |
| break; |
| case const_indices_32bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| blob_write_uint32(ctx->blob, intrin->const_index[i]); |
| break; |
| } |
| } |
| } |
| |
| static nir_intrinsic_instr * |
| read_intrinsic(read_ctx *ctx, union packed_instr header) |
| { |
| nir_intrinsic_op op = header.intrinsic.intrinsic; |
| nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); |
| |
| unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; |
| unsigned num_indices = nir_intrinsic_infos[op].num_indices; |
| |
| if (nir_intrinsic_infos[op].has_dest) |
| read_def(ctx, &intrin->def, &intrin->instr, header); |
| |
| for (unsigned i = 0; i < num_srcs; i++) |
| read_src(ctx, &intrin->src[i]); |
| |
| /* Vectorized instrinsics have num_components same as dst or src that has |
| * 0 components in the info. Find it. |
| */ |
| if (nir_intrinsic_infos[op].has_dest && |
| nir_intrinsic_infos[op].dest_components == 0) { |
| intrin->num_components = intrin->def.num_components; |
| } else { |
| for (unsigned i = 0; i < num_srcs; i++) { |
| if (nir_intrinsic_infos[op].src_components[i] == 0) { |
| intrin->num_components = nir_src_num_components(intrin->src[i]); |
| break; |
| } |
| } |
| } |
| |
| if (num_indices) { |
| switch (header.intrinsic.const_indices_encoding) { |
| case const_indices_all_combined: { |
| unsigned bit_size = 8 / num_indices; |
| unsigned bit_mask = u_bit_consecutive(0, bit_size); |
| for (unsigned i = 0; i < num_indices; i++) { |
| intrin->const_index[i] = |
| (header.intrinsic.packed_const_indices >> (i * bit_size)) & |
| bit_mask; |
| } |
| break; |
| } |
| case const_indices_8bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| intrin->const_index[i] = blob_read_uint8(ctx->blob); |
| break; |
| case const_indices_16bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| intrin->const_index[i] = blob_read_uint16(ctx->blob); |
| break; |
| case const_indices_32bit: |
| for (unsigned i = 0; i < num_indices; i++) |
| intrin->const_index[i] = blob_read_uint32(ctx->blob); |
| break; |
| } |
| } |
| |
| return intrin; |
| } |
| |
| static void |
| write_load_const(write_ctx *ctx, const nir_load_const_instr *lc) |
| { |
| assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.load_const.instr_type = lc->instr.type; |
| header.load_const.last_component = lc->def.num_components - 1; |
| header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); |
| header.load_const.packing = load_const_full; |
| |
| /* Try to pack 1-component constants into the 19 free bits in the header. */ |
| if (lc->def.num_components == 1) { |
| switch (lc->def.bit_size) { |
| case 64: |
| if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { |
| /* packed_value contains high 19 bits, low bits are 0 */ |
| header.load_const.packing = load_const_scalar_hi_19bits; |
| header.load_const.packed_value = lc->value[0].u64 >> 45; |
| } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) { |
| /* packed_value contains low 19 bits, high bits are sign-extended */ |
| header.load_const.packing = load_const_scalar_lo_19bits_sext; |
| header.load_const.packed_value = lc->value[0].u64; |
| } |
| break; |
| |
| case 32: |
| if ((lc->value[0].u32 & 0x1fff) == 0) { |
| header.load_const.packing = load_const_scalar_hi_19bits; |
| header.load_const.packed_value = lc->value[0].u32 >> 13; |
| } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) { |
| header.load_const.packing = load_const_scalar_lo_19bits_sext; |
| header.load_const.packed_value = lc->value[0].u32; |
| } |
| break; |
| |
| case 16: |
| header.load_const.packing = load_const_scalar_lo_19bits_sext; |
| header.load_const.packed_value = lc->value[0].u16; |
| break; |
| case 8: |
| header.load_const.packing = load_const_scalar_lo_19bits_sext; |
| header.load_const.packed_value = lc->value[0].u8; |
| break; |
| case 1: |
| header.load_const.packing = load_const_scalar_lo_19bits_sext; |
| header.load_const.packed_value = lc->value[0].b; |
| break; |
| default: |
| unreachable("invalid bit_size"); |
| } |
| } |
| |
| blob_write_uint32(ctx->blob, header.u32); |
| |
| if (header.load_const.packing == load_const_full) { |
| switch (lc->def.bit_size) { |
| case 64: |
| blob_write_bytes(ctx->blob, lc->value, |
| sizeof(*lc->value) * lc->def.num_components); |
| break; |
| |
| case 32: |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| blob_write_uint32(ctx->blob, lc->value[i].u32); |
| break; |
| |
| case 16: |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| blob_write_uint16(ctx->blob, lc->value[i].u16); |
| break; |
| |
| default: |
| assert(lc->def.bit_size <= 8); |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| blob_write_uint8(ctx->blob, lc->value[i].u8); |
| break; |
| } |
| } |
| |
| write_add_object(ctx, &lc->def); |
| } |
| |
| static nir_load_const_instr * |
| read_load_const(read_ctx *ctx, union packed_instr header) |
| { |
| nir_load_const_instr *lc = |
| nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, |
| decode_bit_size_3bits(header.load_const.bit_size)); |
| lc->def.divergent = false; |
| lc->def.loop_invariant = true; |
| |
| switch (header.load_const.packing) { |
| case load_const_scalar_hi_19bits: |
| switch (lc->def.bit_size) { |
| case 64: |
| lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; |
| break; |
| case 32: |
| lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; |
| break; |
| default: |
| unreachable("invalid bit_size"); |
| } |
| break; |
| |
| case load_const_scalar_lo_19bits_sext: |
| switch (lc->def.bit_size) { |
| case 64: |
| lc->value[0].u64 = header.load_const.packed_value; |
| if (lc->value[0].u64 >> 18) |
| lc->value[0].u64 |= UINT64_C(0xfffffffffff80000); |
| break; |
| case 32: |
| lc->value[0].u32 = header.load_const.packed_value; |
| if (lc->value[0].u32 >> 18) |
| lc->value[0].u32 |= 0xfff80000; |
| break; |
| case 16: |
| lc->value[0].u16 = header.load_const.packed_value; |
| break; |
| case 8: |
| lc->value[0].u8 = header.load_const.packed_value; |
| break; |
| case 1: |
| lc->value[0].b = header.load_const.packed_value; |
| break; |
| default: |
| unreachable("invalid bit_size"); |
| } |
| break; |
| |
| case load_const_full: |
| switch (lc->def.bit_size) { |
| case 64: |
| blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); |
| break; |
| |
| case 32: |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| lc->value[i].u32 = blob_read_uint32(ctx->blob); |
| break; |
| |
| case 16: |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| lc->value[i].u16 = blob_read_uint16(ctx->blob); |
| break; |
| |
| default: |
| assert(lc->def.bit_size <= 8); |
| for (unsigned i = 0; i < lc->def.num_components; i++) |
| lc->value[i].u8 = blob_read_uint8(ctx->blob); |
| break; |
| } |
| break; |
| } |
| |
| read_add_object(ctx, &lc->def); |
| return lc; |
| } |
| |
| static void |
| write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef) |
| { |
| assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); |
| |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.undef.instr_type = undef->instr.type; |
| header.undef.last_component = undef->def.num_components - 1; |
| header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); |
| |
| blob_write_uint32(ctx->blob, header.u32); |
| write_add_object(ctx, &undef->def); |
| } |
| |
| static nir_undef_instr * |
| read_ssa_undef(read_ctx *ctx, union packed_instr header) |
| { |
| nir_undef_instr *undef = |
| nir_undef_instr_create(ctx->nir, header.undef.last_component + 1, |
| decode_bit_size_3bits(header.undef.bit_size)); |
| |
| undef->def.divergent = false; |
| undef->def.loop_invariant = true; |
| |
| read_add_object(ctx, &undef->def); |
| return undef; |
| } |
| |
| union packed_tex_data { |
| uint32_t u32; |
| struct { |
| unsigned sampler_dim : 4; |
| unsigned dest_type : 8; |
| unsigned coord_components : 3; |
| unsigned is_array : 1; |
| unsigned is_shadow : 1; |
| unsigned is_new_style_shadow : 1; |
| unsigned is_sparse : 1; |
| unsigned component : 2; |
| unsigned texture_non_uniform : 1; |
| unsigned sampler_non_uniform : 1; |
| unsigned array_is_lowered_cube : 1; |
| unsigned is_gather_implicit_lod : 1; |
| unsigned unused : 5; /* Mark unused for valgrind. */ |
| } u; |
| }; |
| |
| static void |
| write_tex(write_ctx *ctx, const nir_tex_instr *tex) |
| { |
| assert(tex->num_srcs < 16); |
| assert(tex->op < 32); |
| |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.tex.instr_type = tex->instr.type; |
| header.tex.num_srcs = tex->num_srcs; |
| header.tex.op = tex->op; |
| |
| write_def(ctx, &tex->def, header, tex->instr.type); |
| |
| blob_write_uint32(ctx->blob, tex->texture_index); |
| blob_write_uint32(ctx->blob, tex->sampler_index); |
| blob_write_uint32(ctx->blob, tex->backend_flags); |
| if (tex->op == nir_texop_tg4) |
| blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); |
| |
| STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); |
| union packed_tex_data packed = { |
| .u.sampler_dim = tex->sampler_dim, |
| .u.dest_type = tex->dest_type, |
| .u.coord_components = tex->coord_components, |
| .u.is_array = tex->is_array, |
| .u.is_shadow = tex->is_shadow, |
| .u.is_new_style_shadow = tex->is_new_style_shadow, |
| .u.is_sparse = tex->is_sparse, |
| .u.component = tex->component, |
| .u.texture_non_uniform = tex->texture_non_uniform, |
| .u.sampler_non_uniform = tex->sampler_non_uniform, |
| .u.array_is_lowered_cube = tex->array_is_lowered_cube, |
| .u.is_gather_implicit_lod = tex->is_gather_implicit_lod, |
| }; |
| blob_write_uint32(ctx->blob, packed.u32); |
| |
| for (unsigned i = 0; i < tex->num_srcs; i++) { |
| union packed_src src; |
| src.u32 = 0; |
| src.tex.src_type = tex->src[i].src_type; |
| write_src_full(ctx, &tex->src[i].src, src); |
| } |
| } |
| |
| static nir_tex_instr * |
| read_tex(read_ctx *ctx, union packed_instr header) |
| { |
| nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); |
| |
| read_def(ctx, &tex->def, &tex->instr, header); |
| |
| tex->op = header.tex.op; |
| tex->texture_index = blob_read_uint32(ctx->blob); |
| tex->sampler_index = blob_read_uint32(ctx->blob); |
| tex->backend_flags = blob_read_uint32(ctx->blob); |
| if (tex->op == nir_texop_tg4) |
| blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); |
| |
| union packed_tex_data packed; |
| packed.u32 = blob_read_uint32(ctx->blob); |
| tex->sampler_dim = packed.u.sampler_dim; |
| tex->dest_type = packed.u.dest_type; |
| tex->coord_components = packed.u.coord_components; |
| tex->is_array = packed.u.is_array; |
| tex->is_shadow = packed.u.is_shadow; |
| tex->is_new_style_shadow = packed.u.is_new_style_shadow; |
| tex->is_sparse = packed.u.is_sparse; |
| tex->component = packed.u.component; |
| tex->texture_non_uniform = packed.u.texture_non_uniform; |
| tex->sampler_non_uniform = packed.u.sampler_non_uniform; |
| tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; |
| tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod; |
| |
| for (unsigned i = 0; i < tex->num_srcs; i++) { |
| union packed_src src = read_src(ctx, &tex->src[i].src); |
| tex->src[i].src_type = src.tex.src_type; |
| } |
| |
| return tex; |
| } |
| |
| static void |
| write_phi(write_ctx *ctx, const nir_phi_instr *phi) |
| { |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.phi.instr_type = phi->instr.type; |
| header.phi.num_srcs = exec_list_length(&phi->srcs); |
| |
| /* Phi nodes are special, since they may reference SSA definitions and |
| * basic blocks that don't exist yet. We leave two empty uint32_t's here, |
| * and then store enough information so that a later fixup pass can fill |
| * them in correctly. |
| */ |
| write_def(ctx, &phi->def, header, phi->instr.type); |
| |
| nir_foreach_phi_src(src, phi) { |
| size_t blob_offset = blob_reserve_uint32(ctx->blob); |
| ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); |
| assert(blob_offset + sizeof(uint32_t) == blob_offset2); |
| write_phi_fixup fixup = { |
| .blob_offset = blob_offset, |
| .src = src->src.ssa, |
| .block = src->pred, |
| }; |
| util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); |
| } |
| } |
| |
| static void |
| write_fixup_phis(write_ctx *ctx) |
| { |
| util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { |
| blob_overwrite_uint32(ctx->blob, fixup->blob_offset, |
| write_lookup_object(ctx, fixup->src)); |
| blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t), |
| write_lookup_object(ctx, fixup->block)); |
| } |
| |
| util_dynarray_clear(&ctx->phi_fixups); |
| } |
| |
| static nir_phi_instr * |
| read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) |
| { |
| nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); |
| |
| read_def(ctx, &phi->def, &phi->instr, header); |
| |
| /* For similar reasons as before, we just store the index directly into the |
| * pointer, and let a later pass resolve the phi sources. |
| * |
| * In order to ensure that the copied sources (which are just the indices |
| * from the blob for now) don't get inserted into the old shader's use-def |
| * lists, we have to add the phi instruction *before* we set up its |
| * sources. |
| */ |
| nir_instr_insert_after_block(blk, &phi->instr); |
| |
| for (unsigned i = 0; i < header.phi.num_srcs; i++) { |
| nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob); |
| nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob); |
| nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def); |
| |
| /* Since we're not letting nir_insert_instr handle use/def stuff for us, |
| * we have to set the parent_instr manually. It doesn't really matter |
| * when we do it, so we might as well do it here. |
| */ |
| nir_src_set_parent_instr(&src->src, &phi->instr); |
| |
| /* Stash it in the list of phi sources. We'll walk this list and fix up |
| * sources at the very end of read_function_impl. |
| */ |
| list_add(&src->src.use_link, &ctx->phi_srcs); |
| } |
| |
| return phi; |
| } |
| |
| static void |
| read_fixup_phis(read_ctx *ctx) |
| { |
| list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { |
| src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); |
| src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); |
| |
| /* Remove from this list */ |
| list_del(&src->src.use_link); |
| |
| list_addtail(&src->src.use_link, &src->src.ssa->uses); |
| } |
| assert(list_is_empty(&ctx->phi_srcs)); |
| } |
| |
| static void |
| write_jump(write_ctx *ctx, const nir_jump_instr *jmp) |
| { |
| /* These aren't handled because they require special block linking */ |
| assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); |
| |
| assert(jmp->type < 4); |
| |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.jump.instr_type = jmp->instr.type; |
| header.jump.type = jmp->type; |
| |
| blob_write_uint32(ctx->blob, header.u32); |
| } |
| |
| static nir_jump_instr * |
| read_jump(read_ctx *ctx, union packed_instr header) |
| { |
| /* These aren't handled because they require special block linking */ |
| assert(header.jump.type != nir_jump_goto && |
| header.jump.type != nir_jump_goto_if); |
| |
| nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); |
| return jmp; |
| } |
| |
| static void |
| write_call(write_ctx *ctx, const nir_call_instr *call) |
| { |
| blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); |
| |
| for (unsigned i = 0; i < call->num_params; i++) |
| write_src(ctx, &call->params[i]); |
| } |
| |
| static nir_call_instr * |
| read_call(read_ctx *ctx) |
| { |
| nir_function *callee = read_object(ctx); |
| nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); |
| |
| for (unsigned i = 0; i < call->num_params; i++) |
| read_src(ctx, &call->params[i]); |
| |
| return call; |
| } |
| |
| static void |
| write_debug_info(write_ctx *ctx, const nir_debug_info_instr *di) |
| { |
| union packed_instr header; |
| header.u32 = 0; |
| |
| header.debug_info.instr_type = nir_instr_type_debug_info; |
| header.debug_info.type = di->type; |
| header.debug_info.string_length = di->string_length; |
| |
| switch (di->type) { |
| case nir_debug_info_src_loc: |
| blob_write_uint32(ctx->blob, header.u32); |
| blob_write_uint32(ctx->blob, di->src_loc.line); |
| blob_write_uint32(ctx->blob, di->src_loc.column); |
| blob_write_uint32(ctx->blob, di->src_loc.spirv_offset); |
| blob_write_uint8(ctx->blob, di->src_loc.source); |
| if (di->src_loc.line) |
| write_src(ctx, &di->src_loc.filename); |
| return; |
| case nir_debug_info_string: |
| write_def(ctx, &di->def, header, di->instr.type); |
| blob_write_bytes(ctx->blob, di->string, di->string_length); |
| return; |
| } |
| |
| unreachable("Unimplemented nir_debug_info_type"); |
| } |
| |
| static nir_debug_info_instr * |
| read_debug_info(read_ctx *ctx, union packed_instr header) |
| { |
| nir_debug_info_type type = header.debug_info.type; |
| |
| switch (type) { |
| case nir_debug_info_src_loc: { |
| nir_debug_info_instr *di = nir_debug_info_instr_create(ctx->nir, type, 0); |
| di->src_loc.line = blob_read_uint32(ctx->blob); |
| di->src_loc.column = blob_read_uint32(ctx->blob); |
| di->src_loc.spirv_offset = blob_read_uint32(ctx->blob); |
| di->src_loc.source = blob_read_uint8(ctx->blob); |
| if (di->src_loc.line) |
| read_src(ctx, &di->src_loc.filename); |
| return di; |
| } |
| case nir_debug_info_string: { |
| nir_debug_info_instr *di = |
| nir_debug_info_instr_create(ctx->nir, type, header.debug_info.string_length); |
| read_def(ctx, &di->def, &di->instr, header); |
| memcpy(di->string, blob_read_bytes(ctx->blob, di->string_length), di->string_length); |
| return di; |
| } |
| } |
| |
| unreachable("Unimplemented nir_debug_info_type"); |
| } |
| |
| static void |
| write_instr(write_ctx *ctx, const nir_instr *instr) |
| { |
| /* We have only 4 bits for the instruction type. */ |
| assert(instr->type < 16); |
| |
| switch (instr->type) { |
| case nir_instr_type_alu: |
| write_alu(ctx, nir_instr_as_alu(instr)); |
| break; |
| case nir_instr_type_deref: |
| write_deref(ctx, nir_instr_as_deref(instr)); |
| break; |
| case nir_instr_type_intrinsic: |
| write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); |
| break; |
| case nir_instr_type_load_const: |
| write_load_const(ctx, nir_instr_as_load_const(instr)); |
| break; |
| case nir_instr_type_undef: |
| write_ssa_undef(ctx, nir_instr_as_undef(instr)); |
| break; |
| case nir_instr_type_tex: |
| write_tex(ctx, nir_instr_as_tex(instr)); |
| break; |
| case nir_instr_type_phi: |
| write_phi(ctx, nir_instr_as_phi(instr)); |
| break; |
| case nir_instr_type_jump: |
| write_jump(ctx, nir_instr_as_jump(instr)); |
| break; |
| case nir_instr_type_call: |
| blob_write_uint32(ctx->blob, instr->type); |
| write_call(ctx, nir_instr_as_call(instr)); |
| break; |
| case nir_instr_type_debug_info: |
| write_debug_info(ctx, nir_instr_as_debug_info(instr)); |
| break; |
| case nir_instr_type_parallel_copy: |
| unreachable("Cannot write parallel copies"); |
| default: |
| unreachable("bad instr type"); |
| } |
| } |
| |
| /* Return the number of instructions read. */ |
| static unsigned |
| read_instr(read_ctx *ctx, nir_block *block) |
| { |
| STATIC_ASSERT(sizeof(union packed_instr) == 4); |
| union packed_instr header; |
| header.u32 = blob_read_uint32(ctx->blob); |
| nir_instr *instr; |
| |
| switch (header.any.instr_type) { |
| case nir_instr_type_alu: |
| for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) |
| nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); |
| return header.alu.num_followup_alu_sharing_header + 1; |
| case nir_instr_type_deref: |
| instr = &read_deref(ctx, header)->instr; |
| break; |
| case nir_instr_type_intrinsic: |
| instr = &read_intrinsic(ctx, header)->instr; |
| break; |
| case nir_instr_type_load_const: |
| instr = &read_load_const(ctx, header)->instr; |
| break; |
| case nir_instr_type_undef: |
| instr = &read_ssa_undef(ctx, header)->instr; |
| break; |
| case nir_instr_type_tex: |
| instr = &read_tex(ctx, header)->instr; |
| break; |
| case nir_instr_type_phi: |
| /* Phi instructions are a bit of a special case when reading because we |
| * don't want inserting the instruction to automatically handle use/defs |
| * for us. Instead, we need to wait until all the blocks/instructions |
| * are read so that we can set their sources up. |
| */ |
| read_phi(ctx, block, header); |
| return 1; |
| case nir_instr_type_jump: |
| instr = &read_jump(ctx, header)->instr; |
| break; |
| case nir_instr_type_call: |
| instr = &read_call(ctx)->instr; |
| break; |
| case nir_instr_type_debug_info: |
| instr = &read_debug_info(ctx, header)->instr; |
| break; |
| case nir_instr_type_parallel_copy: |
| unreachable("Cannot read parallel copies"); |
| default: |
| unreachable("bad instr type"); |
| } |
| |
| nir_instr_insert_after_block(block, instr); |
| return 1; |
| } |
| |
| static void |
| write_block(write_ctx *ctx, const nir_block *block) |
| { |
| write_add_object(ctx, block); |
| blob_write_uint8(ctx->blob, block->divergent); |
| blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); |
| |
| ctx->last_instr_type = ~0; |
| ctx->last_alu_header_offset = 0; |
| |
| nir_foreach_instr(instr, block) { |
| write_instr(ctx, instr); |
| ctx->last_instr_type = instr->type; |
| } |
| } |
| |
| static void |
| read_block(read_ctx *ctx, struct exec_list *cf_list) |
| { |
| /* Don't actually create a new block. Just use the one from the tail of |
| * the list. NIR guarantees that the tail of the list is a block and that |
| * no two blocks are side-by-side in the IR; It should be empty. |
| */ |
| nir_block *block = |
| exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); |
| |
| read_add_object(ctx, block); |
| block->divergent = blob_read_uint8(ctx->blob); |
| unsigned num_instrs = blob_read_uint32(ctx->blob); |
| for (unsigned i = 0; i < num_instrs;) { |
| i += read_instr(ctx, block); |
| } |
| } |
| |
| static void |
| write_cf_list(write_ctx *ctx, const struct exec_list *cf_list); |
| |
| static void |
| read_cf_list(read_ctx *ctx, struct exec_list *cf_list); |
| |
| static void |
| write_if(write_ctx *ctx, nir_if *nif) |
| { |
| write_src(ctx, &nif->condition); |
| blob_write_uint8(ctx->blob, nif->control); |
| |
| write_cf_list(ctx, &nif->then_list); |
| write_cf_list(ctx, &nif->else_list); |
| } |
| |
| static void |
| read_if(read_ctx *ctx, struct exec_list *cf_list) |
| { |
| nir_if *nif = nir_if_create(ctx->nir); |
| |
| read_src(ctx, &nif->condition); |
| nif->control = blob_read_uint8(ctx->blob); |
| |
| nir_cf_node_insert_end(cf_list, &nif->cf_node); |
| |
| read_cf_list(ctx, &nif->then_list); |
| read_cf_list(ctx, &nif->else_list); |
| } |
| |
| static void |
| write_loop(write_ctx *ctx, nir_loop *loop) |
| { |
| blob_write_uint8(ctx->blob, loop->control); |
| blob_write_uint8(ctx->blob, loop->divergent_continue); |
| blob_write_uint8(ctx->blob, loop->divergent_break); |
| bool has_continue_construct = nir_loop_has_continue_construct(loop); |
| blob_write_uint8(ctx->blob, has_continue_construct); |
| |
| write_cf_list(ctx, &loop->body); |
| if (has_continue_construct) { |
| write_cf_list(ctx, &loop->continue_list); |
| } |
| } |
| |
| static void |
| read_loop(read_ctx *ctx, struct exec_list *cf_list) |
| { |
| nir_loop *loop = nir_loop_create(ctx->nir); |
| |
| nir_cf_node_insert_end(cf_list, &loop->cf_node); |
| |
| loop->control = blob_read_uint8(ctx->blob); |
| loop->divergent_continue = blob_read_uint8(ctx->blob); |
| loop->divergent_break = blob_read_uint8(ctx->blob); |
| bool has_continue_construct = blob_read_uint8(ctx->blob); |
| |
| read_cf_list(ctx, &loop->body); |
| if (has_continue_construct) { |
| nir_loop_add_continue_construct(loop); |
| read_cf_list(ctx, &loop->continue_list); |
| } |
| } |
| |
| static void |
| write_cf_node(write_ctx *ctx, nir_cf_node *cf) |
| { |
| blob_write_uint32(ctx->blob, cf->type); |
| |
| switch (cf->type) { |
| case nir_cf_node_block: |
| write_block(ctx, nir_cf_node_as_block(cf)); |
| break; |
| case nir_cf_node_if: |
| write_if(ctx, nir_cf_node_as_if(cf)); |
| break; |
| case nir_cf_node_loop: |
| write_loop(ctx, nir_cf_node_as_loop(cf)); |
| break; |
| default: |
| unreachable("bad cf type"); |
| } |
| } |
| |
| static void |
| read_cf_node(read_ctx *ctx, struct exec_list *list) |
| { |
| nir_cf_node_type type = blob_read_uint32(ctx->blob); |
| |
| switch (type) { |
| case nir_cf_node_block: |
| read_block(ctx, list); |
| break; |
| case nir_cf_node_if: |
| read_if(ctx, list); |
| break; |
| case nir_cf_node_loop: |
| read_loop(ctx, list); |
| break; |
| default: |
| unreachable("bad cf type"); |
| } |
| } |
| |
| static void |
| write_cf_list(write_ctx *ctx, const struct exec_list *cf_list) |
| { |
| blob_write_uint32(ctx->blob, exec_list_length(cf_list)); |
| foreach_list_typed(nir_cf_node, cf, node, cf_list) { |
| write_cf_node(ctx, cf); |
| } |
| } |
| |
| static void |
| read_cf_list(read_ctx *ctx, struct exec_list *cf_list) |
| { |
| uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); |
| for (unsigned i = 0; i < num_cf_nodes; i++) |
| read_cf_node(ctx, cf_list); |
| } |
| |
| static void |
| write_function_impl(write_ctx *ctx, const nir_function_impl *fi) |
| { |
| blob_write_uint8(ctx->blob, fi->structured); |
| blob_write_uint8(ctx->blob, !!fi->preamble); |
| |
| if (fi->preamble) |
| blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble)); |
| |
| write_var_list(ctx, &fi->locals); |
| |
| write_cf_list(ctx, &fi->body); |
| write_fixup_phis(ctx); |
| } |
| |
| static nir_function_impl * |
| read_function_impl(read_ctx *ctx) |
| { |
| nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); |
| |
| fi->structured = blob_read_uint8(ctx->blob); |
| bool preamble = blob_read_uint8(ctx->blob); |
| |
| if (preamble) |
| fi->preamble = read_object(ctx); |
| |
| read_var_list(ctx, &fi->locals); |
| |
| read_cf_list(ctx, &fi->body); |
| read_fixup_phis(ctx); |
| |
| fi->valid_metadata = 0; |
| |
| return fi; |
| } |
| |
| static void |
| write_function(write_ctx *ctx, const nir_function *fxn) |
| { |
| uint32_t flags = 0; |
| if (fxn->is_entrypoint) |
| flags |= 0x1; |
| if (fxn->is_preamble) |
| flags |= 0x2; |
| if (fxn->name) |
| flags |= 0x4; |
| if (fxn->impl) |
| flags |= 0x8; |
| if (fxn->should_inline) |
| flags |= 0x10; |
| if (fxn->dont_inline) |
| flags |= 0x20; |
| if (fxn->is_subroutine) |
| flags |= 0x40; |
| if (fxn->is_tmp_globals_wrapper) |
| flags |= 0x80; |
| if (fxn->workgroup_size[0] || fxn->workgroup_size[1] || fxn->workgroup_size[2]) |
| flags |= 0x100; |
| blob_write_uint32(ctx->blob, flags); |
| if (fxn->name) |
| blob_write_string(ctx->blob, fxn->name); |
| |
| if (flags & 0x100) { |
| blob_write_uint32(ctx->blob, fxn->workgroup_size[0]); |
| blob_write_uint32(ctx->blob, fxn->workgroup_size[1]); |
| blob_write_uint32(ctx->blob, fxn->workgroup_size[2]); |
| } |
| |
| blob_write_uint32(ctx->blob, fxn->subroutine_index); |
| blob_write_uint32(ctx->blob, fxn->num_subroutine_types); |
| for (unsigned i = 0; i < fxn->num_subroutine_types; i++) { |
| encode_type_to_blob(ctx->blob, fxn->subroutine_types[i]); |
| } |
| |
| write_add_object(ctx, fxn); |
| |
| blob_write_uint32(ctx->blob, fxn->num_params); |
| for (unsigned i = 0; i < fxn->num_params; i++) { |
| uint32_t val = |
| ((uint32_t)fxn->params[i].num_components) | |
| ((uint32_t)fxn->params[i].bit_size) << 8; |
| |
| bool has_name = fxn->params[i].name && !ctx->strip; |
| if (has_name) |
| val |= 0x10000; |
| |
| blob_write_uint32(ctx->blob, val); |
| if (has_name) |
| blob_write_string(ctx->blob, fxn->params[i].name); |
| |
| encode_type_to_blob(ctx->blob, fxn->params[i].type); |
| blob_write_uint32(ctx->blob, encode_deref_modes(fxn->params[i].mode)); |
| } |
| |
| /* At first glance, it looks like we should write the function_impl here. |
| * However, call instructions need to be able to reference at least the |
| * function and those will get processed as we write the function_impls. |
| * We stop here and write function_impls as a second pass. |
| */ |
| } |
| |
| static void |
| read_function(read_ctx *ctx) |
| { |
| uint32_t flags = blob_read_uint32(ctx->blob); |
| |
| bool has_name = flags & 0x4; |
| char *name = has_name ? blob_read_string(ctx->blob) : NULL; |
| |
| nir_function *fxn = nir_function_create(ctx->nir, name); |
| |
| if (flags & 0x100) { |
| fxn->workgroup_size[0] = blob_read_uint32(ctx->blob); |
| fxn->workgroup_size[1] = blob_read_uint32(ctx->blob); |
| fxn->workgroup_size[2] = blob_read_uint32(ctx->blob); |
| } |
| |
| fxn->subroutine_index = blob_read_uint32(ctx->blob); |
| fxn->num_subroutine_types = blob_read_uint32(ctx->blob); |
| for (unsigned i = 0; i < fxn->num_subroutine_types; i++) { |
| fxn->subroutine_types[i] = decode_type_from_blob(ctx->blob); |
| } |
| |
| read_add_object(ctx, fxn); |
| |
| fxn->num_params = blob_read_uint32(ctx->blob); |
| fxn->params = rzalloc_array(fxn, nir_parameter, fxn->num_params); |
| for (unsigned i = 0; i < fxn->num_params; i++) { |
| uint32_t val = blob_read_uint32(ctx->blob); |
| bool has_name = (val & 0x10000); |
| if (has_name) |
| fxn->params[i].name = blob_read_string(ctx->blob); |
| |
| fxn->params[i].num_components = val & 0xff; |
| fxn->params[i].bit_size = (val >> 8) & 0xff; |
| fxn->params[i].type = decode_type_from_blob(ctx->blob); |
| fxn->params[i].mode = decode_deref_modes(blob_read_uint32(ctx->blob)); |
| } |
| |
| fxn->is_entrypoint = flags & 0x1; |
| fxn->is_preamble = flags & 0x2; |
| if (flags & 0x8) |
| fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; |
| fxn->should_inline = flags & 0x10; |
| fxn->dont_inline = flags & 0x20; |
| fxn->is_subroutine = flags & 0x40; |
| fxn->is_tmp_globals_wrapper = flags & 0x80; |
| } |
| |
| static void |
| write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb) |
| { |
| if (xfb == NULL) { |
| blob_write_uint32(ctx->blob, 0); |
| } else { |
| size_t size = nir_xfb_info_size(xfb->output_count); |
| assert(size <= UINT32_MAX); |
| blob_write_uint32(ctx->blob, size); |
| blob_write_bytes(ctx->blob, xfb, size); |
| } |
| } |
| |
| static nir_xfb_info * |
| read_xfb_info(read_ctx *ctx) |
| { |
| uint32_t size = blob_read_uint32(ctx->blob); |
| if (size == 0) |
| return NULL; |
| |
| struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size); |
| blob_copy_bytes(ctx->blob, (void *)xfb, size); |
| |
| return xfb; |
| } |
| |
| /** |
| * Serialize NIR into a binary blob. |
| * |
| * \param strip Don't serialize information only useful for debugging, |
| * such as variable names, making cache hits from similar |
| * shaders more likely. |
| */ |
| void |
| nir_serialize(struct blob *blob, const nir_shader *nir, bool strip) |
| { |
| write_ctx ctx = { 0 }; |
| ctx.remap_table = _mesa_pointer_hash_table_create(NULL); |
| ctx.blob = blob; |
| ctx.nir = nir; |
| ctx.strip = strip; |
| util_dynarray_init(&ctx.phi_fixups, NULL); |
| |
| size_t idx_size_offset = blob_reserve_uint32(blob); |
| |
| struct shader_info info = nir->info; |
| uint32_t strings = 0; |
| if (!strip && info.name) |
| strings |= 0x1; |
| if (!strip && info.label) |
| strings |= 0x2; |
| blob_write_uint32(blob, strings); |
| if (!strip && info.name) |
| blob_write_string(blob, info.name); |
| if (!strip && info.label) |
| blob_write_string(blob, info.label); |
| info.name = info.label = NULL; |
| blob_write_bytes(blob, (uint8_t *)&info, sizeof(info)); |
| |
| write_var_list(&ctx, &nir->variables); |
| |
| blob_write_uint32(blob, nir->num_inputs); |
| blob_write_uint32(blob, nir->num_uniforms); |
| blob_write_uint32(blob, nir->num_outputs); |
| blob_write_uint32(blob, nir->scratch_size); |
| |
| blob_write_uint32(blob, exec_list_length(&nir->functions)); |
| nir_foreach_function(fxn, nir) { |
| write_function(&ctx, fxn); |
| } |
| |
| nir_foreach_function_impl(impl, nir) { |
| write_function_impl(&ctx, impl); |
| } |
| |
| blob_write_uint32(blob, nir->constant_data_size); |
| if (nir->constant_data_size > 0) |
| blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); |
| |
| write_xfb_info(&ctx, nir->xfb_info); |
| |
| if (nir->info.uses_printf) |
| u_printf_serialize_info(blob, nir->printf_info, nir->printf_info_count); |
| |
| blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx); |
| |
| _mesa_hash_table_destroy(ctx.remap_table, NULL); |
| util_dynarray_fini(&ctx.phi_fixups); |
| } |
| |
| nir_shader * |
| nir_deserialize(void *mem_ctx, |
| const struct nir_shader_compiler_options *options, |
| struct blob_reader *blob) |
| { |
| read_ctx ctx = { 0 }; |
| ctx.blob = blob; |
| list_inithead(&ctx.phi_srcs); |
| ctx.idx_table_len = blob_read_uint32(blob); |
| ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); |
| |
| uint32_t strings = blob_read_uint32(blob); |
| char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; |
| char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; |
| |
| struct shader_info info; |
| blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info)); |
| |
| ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); |
| |
| info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; |
| info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; |
| |
| ctx.nir->info = info; |
| |
| read_var_list(&ctx, &ctx.nir->variables); |
| |
| ctx.nir->num_inputs = blob_read_uint32(blob); |
| ctx.nir->num_uniforms = blob_read_uint32(blob); |
| ctx.nir->num_outputs = blob_read_uint32(blob); |
| ctx.nir->scratch_size = blob_read_uint32(blob); |
| |
| unsigned num_functions = blob_read_uint32(blob); |
| for (unsigned i = 0; i < num_functions; i++) |
| read_function(&ctx); |
| |
| nir_foreach_function(fxn, ctx.nir) { |
| if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) |
| nir_function_set_impl(fxn, read_function_impl(&ctx)); |
| } |
| |
| ctx.nir->constant_data_size = blob_read_uint32(blob); |
| if (ctx.nir->constant_data_size > 0) { |
| ctx.nir->constant_data = |
| ralloc_size(ctx.nir, ctx.nir->constant_data_size); |
| blob_copy_bytes(blob, ctx.nir->constant_data, |
| ctx.nir->constant_data_size); |
| } |
| |
| ctx.nir->xfb_info = read_xfb_info(&ctx); |
| |
| if (ctx.nir->info.uses_printf) { |
| ctx.nir->printf_info = |
| u_printf_deserialize_info(ctx.nir, blob, |
| &ctx.nir->printf_info_count); |
| } |
| |
| free(ctx.idx_table); |
| |
| nir_validate_shader(ctx.nir, "after deserialize"); |
| |
| return ctx.nir; |
| } |
| |
| void |
| nir_shader_serialize_deserialize(nir_shader *shader) |
| { |
| const struct nir_shader_compiler_options *options = shader->options; |
| |
| struct blob writer; |
| blob_init(&writer); |
| nir_serialize(&writer, shader, false); |
| |
| /* Delete all of dest's ralloc children but leave dest alone */ |
| void *dead_ctx = ralloc_context(NULL); |
| ralloc_adopt(dead_ctx, shader); |
| ralloc_free(dead_ctx); |
| |
| dead_ctx = ralloc_context(NULL); |
| |
| struct blob_reader reader; |
| blob_reader_init(&reader, writer.data, writer.size); |
| nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); |
| |
| blob_finish(&writer); |
| |
| nir_shader_replace(shader, copy); |
| ralloc_free(dead_ctx); |
| } |