| /* |
| Copyright (C) Intel Corp. 2006. All Rights Reserved. |
| Intel funded Tungsten Graphics to |
| develop this 3D driver. |
| |
| Permission is hereby granted, free of charge, to any person obtaining |
| a copy of this software and associated documentation files (the |
| "Software"), to deal in the Software without restriction, including |
| without limitation the rights to use, copy, modify, merge, publish, |
| distribute, sublicense, and/or sell copies of the Software, and to |
| permit persons to whom the Software is furnished to do so, subject to |
| the following conditions: |
| |
| The above copyright notice and this permission notice (including the |
| next paragraph) shall be included in all copies or substantial |
| portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
| LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| |
| **********************************************************************/ |
| /* |
| * Authors: |
| * Keith Whitwell <[email protected]> |
| */ |
| |
| #pragma once |
| |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include "brw_eu_inst.h" |
| #include "brw_compiler.h" |
| #include "brw_eu_defines.h" |
| #include "brw_isa_info.h" |
| #include "brw_reg.h" |
| |
| #include "intel_wa.h" |
| #include "util/bitset.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| struct disasm_info; |
| |
| #define BRW_EU_MAX_INSN_STACK 5 |
| |
| struct brw_insn_state { |
| /* One of BRW_EXECUTE_* */ |
| unsigned exec_size:3; |
| |
| /* Group in units of channels */ |
| unsigned group:5; |
| |
| /* One of BRW_MASK_* */ |
| unsigned mask_control:1; |
| |
| /* Scheduling info for Gfx12+ */ |
| struct tgl_swsb swsb; |
| |
| bool saturate:1; |
| |
| /* One of BRW_ALIGN_* */ |
| unsigned access_mode:1; |
| |
| /* One of BRW_PREDICATE_* */ |
| enum brw_predicate predicate:4; |
| |
| bool pred_inv:1; |
| |
| /* Flag subreg. Bottom bit is subreg, top bit is reg */ |
| unsigned flag_subreg:2; |
| |
| bool acc_wr_control:1; |
| }; |
| |
| |
| /* A helper for accessing the last instruction emitted. This makes it easy |
| * to set various bits on an instruction without having to create temporary |
| * variable and assign the emitted instruction to those. |
| */ |
| #define brw_last_inst (&p->store[p->nr_insn - 1]) |
| |
| struct brw_codegen { |
| brw_eu_inst *store; |
| int store_size; |
| unsigned nr_insn; |
| unsigned int next_insn_offset; |
| |
| void *mem_ctx; |
| |
| /* Allow clients to push/pop instruction state: |
| */ |
| struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK]; |
| struct brw_insn_state *current; |
| |
| const struct brw_isa_info *isa; |
| const struct intel_device_info *devinfo; |
| |
| /* Control flow stacks: |
| * - if_stack contains IF and ELSE instructions which must be patched |
| * (and popped) once the matching ENDIF instruction is encountered. |
| * |
| * Just store the instruction pointer(an index). |
| */ |
| int *if_stack; |
| int if_stack_depth; |
| int if_stack_array_size; |
| |
| /** |
| * loop_stack contains the instruction pointers of the starts of loops which |
| * must be patched (and popped) once the matching WHILE instruction is |
| * encountered. |
| */ |
| int *loop_stack; |
| int loop_stack_depth; |
| int loop_stack_array_size; |
| |
| struct brw_shader_reloc *relocs; |
| int num_relocs; |
| int reloc_array_size; |
| }; |
| |
| struct brw_label { |
| int offset; |
| int number; |
| struct brw_label *next; |
| }; |
| |
| void brw_pop_insn_state( struct brw_codegen *p ); |
| void brw_push_insn_state( struct brw_codegen *p ); |
| unsigned brw_get_default_exec_size(struct brw_codegen *p); |
| unsigned brw_get_default_group(struct brw_codegen *p); |
| unsigned brw_get_default_access_mode(struct brw_codegen *p); |
| struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p); |
| void brw_set_default_exec_size(struct brw_codegen *p, unsigned value); |
| void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ); |
| void brw_set_default_saturate( struct brw_codegen *p, bool enable ); |
| void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ); |
| void brw_eu_inst_set_group(const struct intel_device_info *devinfo, |
| brw_eu_inst *inst, unsigned group); |
| void brw_set_default_group(struct brw_codegen *p, unsigned group); |
| void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc); |
| void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse); |
| void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg); |
| void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value); |
| void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value); |
| |
| void brw_init_codegen(const struct brw_isa_info *isa, |
| struct brw_codegen *p, void *mem_ctx); |
| bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode); |
| bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode); |
| bool brw_has_branch_ctrl(const struct intel_device_info *devinfo, enum opcode opcode); |
| const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p, |
| unsigned *num_relocs); |
| const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz ); |
| |
| bool brw_should_dump_shader_bin(void); |
| void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset, |
| const char *identifier); |
| |
| bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, |
| const char *identifier); |
| |
| void brw_realign(struct brw_codegen *p, unsigned alignment); |
| int brw_append_data(struct brw_codegen *p, void *data, |
| unsigned size, unsigned alignment); |
| brw_eu_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode); |
| void brw_add_reloc(struct brw_codegen *p, uint32_t id, |
| enum brw_shader_reloc_type type, |
| uint32_t offset, uint32_t delta); |
| void brw_set_dest(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg dest); |
| void brw_set_src0(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg); |
| |
| /* Helpers for regular instructions: |
| */ |
| #define ALU1(OP) \ |
| brw_eu_inst *brw_##OP(struct brw_codegen *p, \ |
| struct brw_reg dest, \ |
| struct brw_reg src0); |
| |
| #define ALU2(OP) \ |
| brw_eu_inst *brw_##OP(struct brw_codegen *p, \ |
| struct brw_reg dest, \ |
| struct brw_reg src0, \ |
| struct brw_reg src1); |
| |
| #define ALU3(OP) \ |
| brw_eu_inst *brw_##OP(struct brw_codegen *p, \ |
| struct brw_reg dest, \ |
| struct brw_reg src0, \ |
| struct brw_reg src1, \ |
| struct brw_reg src2); |
| |
| ALU1(MOV) |
| ALU2(SEL) |
| ALU1(NOT) |
| ALU2(AND) |
| ALU2(OR) |
| ALU2(XOR) |
| ALU2(SHR) |
| ALU2(SHL) |
| ALU1(DIM) |
| ALU2(ASR) |
| ALU2(ROL) |
| ALU2(ROR) |
| ALU3(CSEL) |
| ALU1(F32TO16) |
| ALU1(F16TO32) |
| ALU2(ADD) |
| ALU3(ADD3) |
| ALU2(AVG) |
| ALU2(MUL) |
| ALU1(FRC) |
| ALU1(RNDD) |
| ALU1(RNDE) |
| ALU1(RNDU) |
| ALU1(RNDZ) |
| ALU2(MAC) |
| ALU2(MACH) |
| ALU1(LZD) |
| ALU2(DP4) |
| ALU2(DPH) |
| ALU2(DP3) |
| ALU2(DP2) |
| ALU3(DP4A) |
| ALU2(LINE) |
| ALU2(PLN) |
| ALU3(MAD) |
| ALU3(LRP) |
| ALU1(BFREV) |
| ALU3(BFE) |
| ALU2(BFI1) |
| ALU3(BFI2) |
| ALU1(FBH) |
| ALU1(FBL) |
| ALU1(CBIT) |
| ALU2(ADDC) |
| ALU2(SUBB) |
| |
| #undef ALU1 |
| #undef ALU2 |
| #undef ALU3 |
| |
| static inline unsigned |
| reg_unit(const struct intel_device_info *devinfo) |
| { |
| return devinfo->ver >= 20 ? 2 : 1; |
| } |
| |
| |
| /* Helpers for SEND instruction: |
| */ |
| |
| /** |
| * Construct a message descriptor immediate with the specified common |
| * descriptor controls. |
| */ |
| static inline uint32_t |
| brw_message_desc(const struct intel_device_info *devinfo, |
| unsigned msg_length, |
| unsigned response_length, |
| bool header_present) |
| { |
| assert(msg_length % reg_unit(devinfo) == 0); |
| assert(response_length % reg_unit(devinfo) == 0); |
| return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) | |
| SET_BITS(response_length / reg_unit(devinfo), 24, 20) | |
| SET_BITS(header_present, 19, 19)); |
| } |
| |
| static inline unsigned |
| brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 28, 25) * reg_unit(devinfo); |
| } |
| |
| static inline unsigned |
| brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 24, 20) * reg_unit(devinfo); |
| } |
| |
| static inline bool |
| brw_message_desc_header_present(ASSERTED |
| const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 19, 19); |
| } |
| |
| static inline unsigned |
| brw_message_ex_desc(const struct intel_device_info *devinfo, |
| unsigned ex_msg_length) |
| { |
| assert(ex_msg_length % reg_unit(devinfo) == 0); |
| return devinfo->ver >= 20 ? |
| SET_BITS(ex_msg_length / reg_unit(devinfo), 10, 6) : |
| SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6); |
| } |
| |
| static inline unsigned |
| brw_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo, |
| uint32_t ex_desc) |
| { |
| return devinfo->ver >= 20 ? |
| GET_BITS(ex_desc, 10, 6) * reg_unit(devinfo) : |
| GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo); |
| } |
| |
| static inline uint32_t |
| brw_urb_desc(const struct intel_device_info *devinfo, |
| unsigned msg_type, |
| bool per_slot_offset_present, |
| bool channel_mask_present, |
| unsigned global_offset) |
| { |
| return (SET_BITS(per_slot_offset_present, 17, 17) | |
| SET_BITS(channel_mask_present, 15, 15) | |
| SET_BITS(global_offset, 14, 4) | |
| SET_BITS(msg_type, 3, 0)); |
| } |
| |
| static inline uint32_t |
| brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 3, 0); |
| } |
| |
| static inline uint32_t |
| brw_urb_fence_desc(const struct intel_device_info *devinfo) |
| { |
| assert(devinfo->has_lsc); |
| return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0); |
| } |
| |
| /** |
| * Construct a message descriptor immediate with the specified sampler |
| * function controls. |
| */ |
| static inline uint32_t |
| brw_sampler_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned sampler, |
| unsigned msg_type, |
| unsigned simd_mode, |
| unsigned return_format) |
| { |
| const unsigned desc = (SET_BITS(binding_table_index, 7, 0) | |
| SET_BITS(sampler, 11, 8)); |
| |
| /* From GFX20 Bspec: Shared Functions - Message Descriptor - |
| * Sampling Engine: |
| * |
| * Message Type[5] 31 This bit represents the upper bit of message type |
| * 6-bit encoding (c.f. [16:12]). This bit is set |
| * for messages with programmable offsets. |
| */ |
| if (devinfo->ver >= 20) |
| return desc | SET_BITS(msg_type & 0x1F, 16, 12) | |
| SET_BITS(simd_mode & 0x3, 18, 17) | |
| SET_BITS(simd_mode >> 2, 29, 29) | |
| SET_BITS(return_format, 30, 30) | |
| SET_BITS(msg_type >> 5, 31, 31); |
| |
| /* From the CHV Bspec: Shared Functions - Message Descriptor - |
| * Sampling Engine: |
| * |
| * SIMD Mode[2] 29 This field is the upper bit of the 3-bit |
| * SIMD Mode field. |
| */ |
| return desc | SET_BITS(msg_type, 16, 12) | |
| SET_BITS(simd_mode & 0x3, 18, 17) | |
| SET_BITS(simd_mode >> 2, 29, 29) | |
| SET_BITS(return_format, 30, 30); |
| } |
| |
| static inline unsigned |
| brw_sampler_desc_binding_table_index(UNUSED |
| const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 7, 0); |
| } |
| |
| static inline unsigned |
| brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 11, 8); |
| } |
| |
| static inline unsigned |
| brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| if (devinfo->ver >= 20) |
| return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12); |
| else |
| return GET_BITS(desc, 16, 12); |
| } |
| |
| static inline unsigned |
| brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2; |
| } |
| |
| static inline unsigned |
| brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 30, 30); |
| } |
| |
| /** |
| * Construct a message descriptor for the dataport |
| */ |
| static inline uint32_t |
| brw_dp_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_type, |
| unsigned msg_control) |
| { |
| return SET_BITS(binding_table_index, 7, 0) | |
| SET_BITS(msg_control, 13, 8) | |
| SET_BITS(msg_type, 18, 14); |
| } |
| |
| static inline unsigned |
| brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 7, 0); |
| } |
| |
| static inline unsigned |
| brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 18, 14); |
| } |
| |
| static inline unsigned |
| brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 13, 8); |
| } |
| |
| /** |
| * Construct a message descriptor immediate with the specified dataport read |
| * function controls. |
| */ |
| static inline uint32_t |
| brw_dp_read_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_control, |
| unsigned msg_type, |
| unsigned target_cache) |
| { |
| return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control); |
| } |
| |
| static inline unsigned |
| brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_dp_desc_msg_type(devinfo, desc); |
| } |
| |
| static inline unsigned |
| brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_dp_desc_msg_control(devinfo, desc); |
| } |
| |
| /** |
| * Construct a message descriptor immediate with the specified dataport write |
| * function controls. |
| */ |
| static inline uint32_t |
| brw_dp_write_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_control, |
| unsigned msg_type, |
| unsigned send_commit_msg) |
| { |
| assert(!send_commit_msg); |
| return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) | |
| SET_BITS(send_commit_msg, 17, 17); |
| } |
| |
| static inline unsigned |
| brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_dp_desc_msg_type(devinfo, desc); |
| } |
| |
| static inline unsigned |
| brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_dp_desc_msg_control(devinfo, desc); |
| } |
| |
| /** |
| * Construct a message descriptor immediate with the specified dataport |
| * surface function controls. |
| */ |
| static inline uint32_t |
| brw_dp_surface_desc(const struct intel_device_info *devinfo, |
| unsigned msg_type, |
| unsigned msg_control) |
| { |
| /* We'll OR in the binding table index later */ |
| return brw_dp_desc(devinfo, 0, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, /**< 0 for SIMD4x2 */ |
| unsigned atomic_op, |
| bool response_expected) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| unsigned msg_type; |
| if (exec_size > 0) { |
| msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; |
| } else { |
| msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; |
| } |
| |
| const unsigned msg_control = |
| SET_BITS(atomic_op, 3, 0) | |
| SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) | |
| SET_BITS(response_expected, 5, 5); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, |
| unsigned atomic_op, |
| bool response_expected) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| assert(exec_size > 0); |
| const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP; |
| |
| const unsigned msg_control = |
| SET_BITS(atomic_op, 1, 0) | |
| SET_BITS(exec_size <= 8, 4, 4) | |
| SET_BITS(response_expected, 5, 5); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline unsigned |
| brw_mdc_cmask(unsigned num_channels) |
| { |
| /* See also MDC_CMASK in the SKL PRM Vol 2d. */ |
| return 0xf & (0xf << num_channels); |
| } |
| |
| static inline uint32_t |
| brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, /**< 0 for SIMD4x2 */ |
| unsigned num_channels, |
| bool write) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| const unsigned msg_type = |
| write ? HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : |
| HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; |
| |
| /* See also MDC_SM3 in the SKL PRM Vol 2d. */ |
| const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ |
| exec_size <= 8 ? 2 : 1; |
| |
| const unsigned msg_control = |
| SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | |
| SET_BITS(simd_mode, 5, 4); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline unsigned |
| brw_mdc_ds(unsigned bit_size) |
| { |
| switch (bit_size) { |
| case 8: |
| return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; |
| case 16: |
| return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD; |
| case 32: |
| return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; |
| default: |
| unreachable("Unsupported bit_size for byte scattered messages"); |
| } |
| } |
| |
| static inline uint32_t |
| brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, |
| unsigned bit_size, |
| bool write) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| const unsigned msg_type = |
| write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE : |
| HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ; |
| |
| assert(exec_size > 0); |
| const unsigned msg_control = |
| SET_BITS(exec_size == 16, 0, 0) | |
| SET_BITS(brw_mdc_ds(bit_size), 3, 2); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, |
| bool write) |
| { |
| assert(exec_size == 8 || exec_size == 16); |
| |
| const unsigned msg_type = |
| write ? GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE : |
| GFX7_DATAPORT_DC_DWORD_SCATTERED_READ; |
| |
| const unsigned msg_control = |
| SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */ |
| SET_BITS(exec_size == 16, 0, 0); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo, |
| bool align_16B, |
| unsigned num_dwords, |
| bool write) |
| { |
| /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ |
| assert(!write || align_16B); |
| |
| const unsigned msg_type = |
| write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE : |
| align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ : |
| GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ; |
| |
| const unsigned msg_control = |
| SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, /**< 0 for SIMD4x2 */ |
| unsigned num_channels, |
| bool write) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| unsigned msg_type = |
| write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE : |
| GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ; |
| |
| /* See also MDC_SM3 in the SKL PRM Vol 2d. */ |
| const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ |
| exec_size <= 8 ? 2 : 1; |
| |
| const unsigned msg_control = |
| SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | |
| SET_BITS(simd_mode, 5, 4); |
| |
| return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, |
| msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo, |
| bool align_16B, |
| unsigned num_dwords, |
| bool write) |
| { |
| /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ |
| assert(!write || align_16B); |
| |
| unsigned msg_type = |
| write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE : |
| GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ; |
| |
| unsigned msg_control = |
| SET_BITS(!align_16B, 4, 3) | |
| SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); |
| |
| return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, |
| msg_type, msg_control); |
| } |
| |
| /** |
| * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the |
| * Skylake PRM). |
| */ |
| static inline uint32_t |
| brw_mdc_a64_ds(unsigned elems) |
| { |
| switch (elems) { |
| case 1: return 0; |
| case 2: return 1; |
| case 4: return 2; |
| case 8: return 3; |
| default: |
| unreachable("Unsupported elmeent count for A64 scattered message"); |
| } |
| } |
| |
| static inline uint32_t |
| brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, /**< 0 for SIMD4x2 */ |
| unsigned bit_size, |
| bool write) |
| { |
| assert(exec_size <= 8 || exec_size == 16); |
| |
| unsigned msg_type = |
| write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE : |
| GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ; |
| |
| const unsigned msg_control = |
| SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) | |
| SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) | |
| SET_BITS(exec_size == 16, 4, 4); |
| |
| return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, |
| msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo, |
| ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */ |
| unsigned bit_size, |
| unsigned atomic_op, |
| bool response_expected) |
| { |
| assert(exec_size == 8); |
| assert(bit_size == 16 || bit_size == 32 || bit_size == 64); |
| assert(devinfo->ver >= 12 || bit_size >= 32); |
| |
| const unsigned msg_type = bit_size == 16 ? |
| GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP : |
| GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; |
| |
| const unsigned msg_control = |
| SET_BITS(atomic_op, 3, 0) | |
| SET_BITS(bit_size == 64, 4, 4) | |
| SET_BITS(response_expected, 5, 5); |
| |
| return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, |
| msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo, |
| ASSERTED unsigned exec_size, |
| unsigned bit_size, |
| unsigned atomic_op, |
| bool response_expected) |
| { |
| assert(exec_size == 8); |
| assert(bit_size == 16 || bit_size == 32); |
| assert(devinfo->ver >= 12 || bit_size == 32); |
| |
| assert(exec_size > 0); |
| const unsigned msg_type = bit_size == 32 ? |
| GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP : |
| GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP; |
| |
| const unsigned msg_control = |
| SET_BITS(atomic_op, 1, 0) | |
| SET_BITS(response_expected, 5, 5); |
| |
| return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, |
| msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, |
| unsigned exec_group, |
| unsigned atomic_op, |
| bool response_expected) |
| { |
| assert(exec_size > 0 || exec_group == 0); |
| assert(exec_group % 8 == 0); |
| |
| const unsigned msg_type = |
| exec_size == 0 ? HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 : |
| HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP; |
| |
| const bool high_sample_mask = (exec_group / 8) % 2 == 1; |
| |
| const unsigned msg_control = |
| SET_BITS(atomic_op, 3, 0) | |
| SET_BITS(high_sample_mask, 4, 4) | |
| SET_BITS(response_expected, 5, 5); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo, |
| unsigned exec_size, |
| unsigned exec_group, |
| unsigned num_channels, |
| bool write) |
| { |
| assert(exec_size > 0 || exec_group == 0); |
| assert(exec_group % 8 == 0); |
| |
| /* Typed surface reads and writes don't support SIMD16 */ |
| assert(exec_size <= 8); |
| |
| const unsigned msg_type = |
| write ? HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE : |
| HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ; |
| |
| /* See also MDC_SG3 in the SKL PRM Vol 2d. */ |
| const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */ |
| 1 + ((exec_group / 8) % 2); |
| |
| const unsigned msg_control = |
| SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | |
| SET_BITS(slot_group, 5, 4); |
| |
| return brw_dp_surface_desc(devinfo, msg_type, msg_control); |
| } |
| |
| static inline uint32_t |
| brw_fb_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_type, |
| unsigned msg_control) |
| { |
| return SET_BITS(binding_table_index, 7, 0) | |
| SET_BITS(msg_control, 13, 8) | |
| SET_BITS(msg_type, 17, 14); |
| } |
| |
| static inline unsigned |
| brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 7, 0); |
| } |
| |
| static inline uint32_t |
| brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 13, 8); |
| } |
| |
| static inline unsigned |
| brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) |
| { |
| return GET_BITS(desc, 17, 14); |
| } |
| |
| static inline uint32_t |
| brw_fb_read_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_control, |
| unsigned exec_size, |
| bool per_sample) |
| { |
| assert(exec_size == 8 || exec_size == 16); |
| |
| return brw_fb_desc(devinfo, binding_table_index, |
| GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) | |
| SET_BITS(per_sample, 13, 13) | |
| SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */; |
| } |
| |
| static inline uint32_t |
| brw_fb_write_desc(const struct intel_device_info *devinfo, |
| unsigned binding_table_index, |
| unsigned msg_control, |
| bool last_render_target, |
| bool coarse_write) |
| { |
| const unsigned msg_type = GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
| |
| assert(devinfo->ver >= 10 || !coarse_write); |
| |
| return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) | |
| SET_BITS(last_render_target, 12, 12) | |
| SET_BITS(coarse_write, 18, 18); |
| } |
| |
| static inline bool |
| brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 12, 12); |
| } |
| |
| static inline bool |
| brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->ver >= 10); |
| return GET_BITS(desc, 18, 18); |
| } |
| |
| static inline bool |
| lsc_opcode_has_cmask(enum lsc_opcode opcode) |
| { |
| return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK; |
| } |
| |
| static inline bool |
| lsc_opcode_has_transpose(enum lsc_opcode opcode) |
| { |
| return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE; |
| } |
| |
| static inline bool |
| lsc_opcode_is_store(enum lsc_opcode opcode) |
| { |
| return opcode == LSC_OP_STORE || |
| opcode == LSC_OP_STORE_CMASK; |
| } |
| |
| static inline bool |
| lsc_opcode_is_atomic(enum lsc_opcode opcode) |
| { |
| switch (opcode) { |
| case LSC_OP_ATOMIC_INC: |
| case LSC_OP_ATOMIC_DEC: |
| case LSC_OP_ATOMIC_LOAD: |
| case LSC_OP_ATOMIC_STORE: |
| case LSC_OP_ATOMIC_ADD: |
| case LSC_OP_ATOMIC_SUB: |
| case LSC_OP_ATOMIC_MIN: |
| case LSC_OP_ATOMIC_MAX: |
| case LSC_OP_ATOMIC_UMIN: |
| case LSC_OP_ATOMIC_UMAX: |
| case LSC_OP_ATOMIC_CMPXCHG: |
| case LSC_OP_ATOMIC_FADD: |
| case LSC_OP_ATOMIC_FSUB: |
| case LSC_OP_ATOMIC_FMIN: |
| case LSC_OP_ATOMIC_FMAX: |
| case LSC_OP_ATOMIC_FCMPXCHG: |
| case LSC_OP_ATOMIC_AND: |
| case LSC_OP_ATOMIC_OR: |
| case LSC_OP_ATOMIC_XOR: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| static inline bool |
| lsc_opcode_is_atomic_float(enum lsc_opcode opcode) |
| { |
| switch (opcode) { |
| case LSC_OP_ATOMIC_FADD: |
| case LSC_OP_ATOMIC_FSUB: |
| case LSC_OP_ATOMIC_FMIN: |
| case LSC_OP_ATOMIC_FMAX: |
| case LSC_OP_ATOMIC_FCMPXCHG: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| static inline unsigned |
| lsc_op_num_data_values(unsigned _op) |
| { |
| enum lsc_opcode op = (enum lsc_opcode) _op; |
| |
| switch (op) { |
| case LSC_OP_ATOMIC_CMPXCHG: |
| case LSC_OP_ATOMIC_FCMPXCHG: |
| return 2; |
| case LSC_OP_ATOMIC_INC: |
| case LSC_OP_ATOMIC_DEC: |
| case LSC_OP_LOAD: |
| case LSC_OP_LOAD_CMASK: |
| case LSC_OP_FENCE: |
| /* XXX: actually check docs */ |
| return 0; |
| default: |
| return 1; |
| } |
| } |
| |
| static inline unsigned |
| lsc_op_to_legacy_atomic(unsigned _op) |
| { |
| enum lsc_opcode op = (enum lsc_opcode) _op; |
| |
| switch (op) { |
| case LSC_OP_ATOMIC_INC: |
| return BRW_AOP_INC; |
| case LSC_OP_ATOMIC_DEC: |
| return BRW_AOP_DEC; |
| case LSC_OP_ATOMIC_STORE: |
| return BRW_AOP_MOV; |
| case LSC_OP_ATOMIC_ADD: |
| return BRW_AOP_ADD; |
| case LSC_OP_ATOMIC_SUB: |
| return BRW_AOP_SUB; |
| case LSC_OP_ATOMIC_MIN: |
| return BRW_AOP_IMIN; |
| case LSC_OP_ATOMIC_MAX: |
| return BRW_AOP_IMAX; |
| case LSC_OP_ATOMIC_UMIN: |
| return BRW_AOP_UMIN; |
| case LSC_OP_ATOMIC_UMAX: |
| return BRW_AOP_UMAX; |
| case LSC_OP_ATOMIC_CMPXCHG: |
| return BRW_AOP_CMPWR; |
| case LSC_OP_ATOMIC_FADD: |
| return BRW_AOP_FADD; |
| case LSC_OP_ATOMIC_FMIN: |
| return BRW_AOP_FMIN; |
| case LSC_OP_ATOMIC_FMAX: |
| return BRW_AOP_FMAX; |
| case LSC_OP_ATOMIC_FCMPXCHG: |
| return BRW_AOP_FCMPWR; |
| case LSC_OP_ATOMIC_AND: |
| return BRW_AOP_AND; |
| case LSC_OP_ATOMIC_OR: |
| return BRW_AOP_OR; |
| case LSC_OP_ATOMIC_XOR: |
| return BRW_AOP_XOR; |
| /* No LSC op maps to BRW_AOP_PREDEC */ |
| case LSC_OP_ATOMIC_LOAD: |
| case LSC_OP_ATOMIC_FSUB: |
| unreachable("no corresponding legacy atomic operation"); |
| case LSC_OP_LOAD: |
| case LSC_OP_LOAD_CMASK: |
| case LSC_OP_STORE: |
| case LSC_OP_STORE_CMASK: |
| case LSC_OP_FENCE: |
| unreachable("not an atomic op"); |
| } |
| |
| unreachable("invalid LSC op"); |
| } |
| |
| static inline uint32_t |
| lsc_data_size_bytes(enum lsc_data_size data_size) |
| { |
| switch (data_size) { |
| case LSC_DATA_SIZE_D8: |
| return 1; |
| case LSC_DATA_SIZE_D16: |
| return 2; |
| case LSC_DATA_SIZE_D32: |
| case LSC_DATA_SIZE_D8U32: |
| case LSC_DATA_SIZE_D16U32: |
| case LSC_DATA_SIZE_D16BF32: |
| return 4; |
| case LSC_DATA_SIZE_D64: |
| return 8; |
| default: |
| unreachable("Unsupported data payload size."); |
| } |
| } |
| |
| static inline uint32_t |
| lsc_addr_size_bytes(enum lsc_addr_size addr_size) |
| { |
| switch (addr_size) { |
| case LSC_ADDR_SIZE_A16: return 2; |
| case LSC_ADDR_SIZE_A32: return 4; |
| case LSC_ADDR_SIZE_A64: return 8; |
| default: |
| unreachable("Unsupported address size."); |
| } |
| } |
| |
| static inline uint32_t |
| lsc_vector_length(enum lsc_vect_size vect_size) |
| { |
| switch (vect_size) { |
| case LSC_VECT_SIZE_V1: return 1; |
| case LSC_VECT_SIZE_V2: return 2; |
| case LSC_VECT_SIZE_V3: return 3; |
| case LSC_VECT_SIZE_V4: return 4; |
| case LSC_VECT_SIZE_V8: return 8; |
| case LSC_VECT_SIZE_V16: return 16; |
| case LSC_VECT_SIZE_V32: return 32; |
| case LSC_VECT_SIZE_V64: return 64; |
| default: |
| unreachable("Unsupported size of vector"); |
| } |
| } |
| |
| static inline enum lsc_vect_size |
| lsc_vect_size(unsigned vect_size) |
| { |
| switch(vect_size) { |
| case 1: return LSC_VECT_SIZE_V1; |
| case 2: return LSC_VECT_SIZE_V2; |
| case 3: return LSC_VECT_SIZE_V3; |
| case 4: return LSC_VECT_SIZE_V4; |
| case 8: return LSC_VECT_SIZE_V8; |
| case 16: return LSC_VECT_SIZE_V16; |
| case 32: return LSC_VECT_SIZE_V32; |
| case 64: return LSC_VECT_SIZE_V64; |
| default: |
| unreachable("Unsupported vector size for dataport"); |
| } |
| } |
| |
| static inline uint32_t |
| lsc_msg_desc(const struct intel_device_info *devinfo, |
| enum lsc_opcode opcode, |
| enum lsc_addr_surface_type addr_type, |
| enum lsc_addr_size addr_sz, |
| enum lsc_data_size data_sz, unsigned num_channels_or_cmask, |
| bool transpose, unsigned cache_ctrl) |
| { |
| assert(devinfo->has_lsc); |
| assert(!transpose || lsc_opcode_has_transpose(opcode)); |
| |
| unsigned msg_desc = |
| SET_BITS(opcode, 5, 0) | |
| SET_BITS(addr_sz, 8, 7) | |
| SET_BITS(data_sz, 11, 9) | |
| SET_BITS(transpose, 15, 15) | |
| (devinfo->ver >= 20 ? SET_BITS(cache_ctrl, 19, 16) : |
| SET_BITS(cache_ctrl, 19, 17)) | |
| SET_BITS(addr_type, 30, 29); |
| |
| if (lsc_opcode_has_cmask(opcode)) |
| msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12); |
| else |
| msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12); |
| |
| return msg_desc; |
| } |
| |
| static inline enum lsc_opcode |
| lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_opcode) GET_BITS(desc, 5, 0); |
| } |
| |
| static inline enum lsc_addr_size |
| lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_addr_size) GET_BITS(desc, 8, 7); |
| } |
| |
| static inline enum lsc_data_size |
| lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_data_size) GET_BITS(desc, 11, 9); |
| } |
| |
| static inline enum lsc_vect_size |
| lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); |
| return (enum lsc_vect_size) GET_BITS(desc, 14, 12); |
| } |
| |
| static inline enum lsc_cmask |
| lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); |
| return (enum lsc_cmask) GET_BITS(desc, 15, 12); |
| } |
| |
| static inline bool |
| lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return GET_BITS(desc, 15, 15); |
| } |
| |
| static inline unsigned |
| lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return devinfo->ver >= 20 ? GET_BITS(desc, 19, 16) : GET_BITS(desc, 19, 17); |
| } |
| |
| static inline unsigned |
| lsc_msg_dest_len(const struct intel_device_info *devinfo, |
| enum lsc_data_size data_sz, unsigned n) |
| { |
| return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n, |
| reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); |
| } |
| |
| static inline unsigned |
| lsc_msg_addr_len(const struct intel_device_info *devinfo, |
| enum lsc_addr_size addr_sz, unsigned n) |
| { |
| return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n, |
| reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); |
| } |
| |
| static inline enum lsc_addr_surface_type |
| lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29); |
| } |
| |
| static inline uint32_t |
| lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo, |
| enum lsc_fence_scope scope, |
| enum lsc_flush_type flush_type, |
| bool route_to_lsc) |
| { |
| assert(devinfo->has_lsc); |
| |
| #if INTEL_NEEDS_WA_22017182272 |
| assert(flush_type != LSC_FLUSH_TYPE_DISCARD); |
| #endif |
| |
| return SET_BITS(LSC_OP_FENCE, 5, 0) | |
| SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) | |
| SET_BITS(scope, 11, 9) | |
| SET_BITS(flush_type, 14, 12) | |
| SET_BITS(route_to_lsc, 18, 18) | |
| SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29); |
| } |
| |
| static inline enum lsc_fence_scope |
| lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_fence_scope) GET_BITS(desc, 11, 9); |
| } |
| |
| static inline enum lsc_flush_type |
| lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_flush_type) GET_BITS(desc, 14, 12); |
| } |
| |
| static inline enum lsc_backup_fence_routing |
| lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| assert(devinfo->has_lsc); |
| return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18); |
| } |
| |
| static inline uint32_t |
| lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti) |
| { |
| assert(devinfo->has_lsc); |
| return SET_BITS(bti, 31, 24) | |
| SET_BITS(0, 23, 12); /* base offset */ |
| } |
| |
| static inline unsigned |
| lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo, |
| uint32_t ex_desc) |
| { |
| assert(devinfo->has_lsc); |
| return GET_BITS(ex_desc, 23, 12); |
| } |
| |
| static inline unsigned |
| lsc_bti_ex_desc_index(const struct intel_device_info *devinfo, |
| uint32_t ex_desc) |
| { |
| assert(devinfo->has_lsc); |
| return GET_BITS(ex_desc, 31, 24); |
| } |
| |
| static inline unsigned |
| lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo, |
| uint32_t ex_desc) |
| { |
| assert(devinfo->has_lsc); |
| return GET_BITS(ex_desc, 31, 12); |
| } |
| |
| static inline uint32_t |
| lsc_bss_ex_desc(const struct intel_device_info *devinfo, |
| unsigned surface_state_index) |
| { |
| assert(devinfo->has_lsc); |
| return SET_BITS(surface_state_index, 31, 6); |
| } |
| |
| static inline unsigned |
| lsc_bss_ex_desc_index(const struct intel_device_info *devinfo, |
| uint32_t ex_desc) |
| { |
| assert(devinfo->has_lsc); |
| return GET_BITS(ex_desc, 31, 6); |
| } |
| |
| static inline uint32_t |
| brw_mdc_sm2(unsigned exec_size) |
| { |
| assert(exec_size == 8 || exec_size == 16); |
| return exec_size > 8; |
| } |
| |
| static inline uint32_t |
| brw_mdc_sm2_exec_size(uint32_t sm2) |
| { |
| assert(sm2 <= 1); |
| return 8 << sm2; |
| } |
| |
| static inline uint32_t |
| brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo, |
| unsigned exec_size, unsigned msg_type) |
| { |
| assert(devinfo->has_ray_tracing); |
| assert(devinfo->ver < 20 || exec_size == 16); |
| |
| return SET_BITS(0, 19, 19) | /* No header */ |
| SET_BITS(msg_type, 17, 14) | |
| SET_BITS(brw_mdc_sm2(exec_size), 8, 8); |
| } |
| |
| static inline uint32_t |
| brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return GET_BITS(desc, 17, 14); |
| } |
| |
| static inline uint32_t |
| brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); |
| } |
| |
| static inline uint32_t |
| brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo, |
| unsigned exec_size) |
| { |
| assert(devinfo->has_ray_tracing); |
| assert(devinfo->ver < 20 || exec_size == 16); |
| |
| return SET_BITS(0, 19, 19) | /* No header */ |
| SET_BITS(0, 17, 14) | /* Message type */ |
| SET_BITS(brw_mdc_sm2(exec_size), 8, 8); |
| } |
| |
| static inline uint32_t |
| brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo, |
| uint32_t desc) |
| { |
| return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); |
| } |
| |
| /** |
| * Construct a message descriptor immediate with the specified pixel |
| * interpolator function controls. |
| */ |
| static inline uint32_t |
| brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, |
| unsigned msg_type, |
| bool noperspective, |
| bool coarse_pixel_rate, |
| unsigned exec_size, |
| unsigned group) |
| { |
| assert(exec_size == 8 || exec_size == 16); |
| const bool simd_mode = exec_size == 16; |
| const bool slot_group = group >= 16; |
| |
| assert(devinfo->ver >= 10 || !coarse_pixel_rate); |
| return (SET_BITS(slot_group, 11, 11) | |
| SET_BITS(msg_type, 13, 12) | |
| SET_BITS(!!noperspective, 14, 14) | |
| SET_BITS(coarse_pixel_rate, 15, 15) | |
| SET_BITS(simd_mode, 16, 16)); |
| } |
| |
| /** |
| * Send message to shared unit \p sfid with a possibly indirect descriptor \p |
| * desc. If \p desc is not an immediate it will be transparently loaded to an |
| * address register using an OR instruction. |
| */ |
| void |
| brw_send_indirect_message(struct brw_codegen *p, |
| unsigned sfid, |
| struct brw_reg dst, |
| struct brw_reg payload, |
| struct brw_reg desc, |
| bool eot); |
| |
| void |
| brw_send_indirect_split_message(struct brw_codegen *p, |
| unsigned sfid, |
| struct brw_reg dst, |
| struct brw_reg payload0, |
| struct brw_reg payload1, |
| struct brw_reg desc, |
| struct brw_reg ex_desc, |
| unsigned ex_mlen, |
| bool ex_bso, |
| bool eot); |
| |
| void gfx6_math(struct brw_codegen *p, |
| struct brw_reg dest, |
| unsigned function, |
| struct brw_reg src0, |
| struct brw_reg src1); |
| |
| /** |
| * Return the generation-specific jump distance scaling factor. |
| * |
| * Given the number of instructions to jump, we need to scale by |
| * some number to obtain the actual jump distance to program in an |
| * instruction. |
| */ |
| static inline unsigned |
| brw_jump_scale(const struct intel_device_info *devinfo) |
| { |
| /* Broadwell measures jump targets in bytes. */ |
| return 16; |
| } |
| |
| void brw_barrier(struct brw_codegen *p, struct brw_reg src); |
| |
| /* If/else/endif. Works by manipulating the execution flags on each |
| * channel. |
| */ |
| brw_eu_inst *brw_IF(struct brw_codegen *p, unsigned execute_size); |
| |
| void brw_ELSE(struct brw_codegen *p); |
| void brw_ENDIF(struct brw_codegen *p); |
| |
| /* DO/WHILE loops: |
| */ |
| brw_eu_inst *brw_DO(struct brw_codegen *p, unsigned execute_size); |
| |
| brw_eu_inst *brw_WHILE(struct brw_codegen *p); |
| |
| brw_eu_inst *brw_BREAK(struct brw_codegen *p); |
| brw_eu_inst *brw_CONT(struct brw_codegen *p); |
| brw_eu_inst *brw_HALT(struct brw_codegen *p); |
| |
| /* Forward jumps: |
| */ |
| brw_eu_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, |
| unsigned predicate_control); |
| |
| void brw_NOP(struct brw_codegen *p); |
| |
| void brw_WAIT(struct brw_codegen *p); |
| |
| void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func); |
| |
| /* Special case: there is never a destination, execution size will be |
| * taken from src0: |
| */ |
| void brw_CMP(struct brw_codegen *p, |
| struct brw_reg dest, |
| unsigned conditional, |
| struct brw_reg src0, |
| struct brw_reg src1); |
| |
| void brw_CMPN(struct brw_codegen *p, |
| struct brw_reg dest, |
| unsigned conditional, |
| struct brw_reg src0, |
| struct brw_reg src1); |
| |
| brw_eu_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth, |
| unsigned rcount, struct brw_reg dest, struct brw_reg src0, |
| struct brw_reg src1, struct brw_reg src2); |
| |
| void |
| brw_memory_fence(struct brw_codegen *p, |
| struct brw_reg dst, |
| struct brw_reg src, |
| enum opcode send_op, |
| enum brw_message_target sfid, |
| uint32_t desc, |
| bool commit_enable, |
| unsigned bti); |
| |
| void |
| brw_broadcast(struct brw_codegen *p, |
| struct brw_reg dst, |
| struct brw_reg src, |
| struct brw_reg idx); |
| |
| void |
| brw_float_controls_mode(struct brw_codegen *p, |
| unsigned mode, unsigned mask); |
| |
| void |
| brw_update_reloc_imm(const struct brw_isa_info *isa, |
| brw_eu_inst *inst, |
| uint32_t value); |
| |
| void |
| brw_MOV_reloc_imm(struct brw_codegen *p, |
| struct brw_reg dst, |
| enum brw_reg_type src_type, |
| uint32_t id, uint32_t base); |
| |
| unsigned |
| brw_num_sources_from_inst(const struct brw_isa_info *isa, |
| const brw_eu_inst *inst); |
| |
| void brw_set_src1(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg); |
| |
| void brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *insn, |
| unsigned desc, unsigned ex_desc); |
| |
| static inline void |
| brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc) |
| { |
| brw_set_desc_ex(p, insn, desc, 0); |
| } |
| |
| void brw_set_uip_jip(struct brw_codegen *p, int start_offset); |
| |
| enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod); |
| enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod); |
| |
| /* brw_eu_compact.c */ |
| void brw_compact_instructions(struct brw_codegen *p, int start_offset, |
| struct disasm_info *disasm); |
| void brw_uncompact_instruction(const struct brw_isa_info *isa, |
| brw_eu_inst *dst, brw_eu_compact_inst *src); |
| bool brw_try_compact_instruction(const struct brw_isa_info *isa, |
| brw_eu_compact_inst *dst, const brw_eu_inst *src); |
| |
| void brw_debug_compact_uncompact(const struct brw_isa_info *isa, |
| brw_eu_inst *orig, brw_eu_inst *uncompacted); |
| |
| /* brw_eu_validate.c */ |
| bool brw_validate_instruction(const struct brw_isa_info *isa, |
| const brw_eu_inst *inst, int offset, |
| unsigned inst_size, |
| struct disasm_info *disasm); |
| bool brw_validate_instructions(const struct brw_isa_info *isa, |
| const void *assembly, int start_offset, int end_offset, |
| struct disasm_info *disasm); |
| |
| static inline int |
| next_offset(const struct intel_device_info *devinfo, void *store, int offset) |
| { |
| brw_eu_inst *insn = (brw_eu_inst *)((char *)store + offset); |
| |
| if (brw_eu_inst_cmpt_control(devinfo, insn)) |
| return offset + 8; |
| else |
| return offset + 16; |
| } |
| |
| /** Maximum SEND message length */ |
| #define BRW_MAX_MSG_LENGTH 15 |
| |
| #ifdef __cplusplus |
| } |
| #endif |