blob: 6df606262349c3268cb8f61a544ee520e8afbb53 [file] [log] [blame]
/*
* Copyright © 2021 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_SHADER_H
#define PANVK_SHADER_H
#ifndef PAN_ARCH
#error "PAN_ARCH must be defined"
#endif
#include "util/pan_ir.h"
#include "pan_desc.h"
#include "panvk_cmd_push_constant.h"
#include "panvk_descriptor_set.h"
#include "panvk_macros.h"
#include "panvk_mempool.h"
#include "vk_pipeline_layout.h"
#include "vk_shader.h"
extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
#define MAX_VS_ATTRIBS 16
struct nir_shader;
struct pan_blend_state;
struct panvk_device;
enum panvk_varying_buf_id {
PANVK_VARY_BUF_GENERAL,
PANVK_VARY_BUF_POSITION,
PANVK_VARY_BUF_PSIZ,
/* Keep last */
PANVK_VARY_BUF_MAX,
};
#if PAN_ARCH <= 7
enum panvk_desc_table_id {
PANVK_DESC_TABLE_USER = 0,
PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS,
PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1,
PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS,
PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1,
PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1,
};
#endif
#define FAU_WORD_SIZE sizeof(uint64_t)
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
struct panvk_graphics_sysvals {
struct {
float constants[4];
} blend;
struct {
struct {
float x, y, z;
} scale, offset;
} viewport;
struct {
#if PAN_ARCH <= 7
int32_t raw_vertex_offset;
#endif
int32_t first_vertex;
int32_t base_instance;
uint32_t noperspective_varyings;
} vs;
aligned_u64 push_consts;
#if PAN_ARCH <= 7
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
* layer, and filter primitives at the VS level.
*/
int32_t layer_id;
struct {
aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT];
} desc;
#endif
} __attribute__((aligned(FAU_WORD_SIZE)));
static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
"struct panvk_graphics_sysvals must be 8-byte aligned");
static_assert((offsetof(struct panvk_graphics_sysvals, push_consts) %
FAU_WORD_SIZE) == 0,
"panvk_graphics_sysvals::push_consts must be 8-byte aligned");
#if PAN_ARCH <= 7
static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
0,
"panvk_graphics_sysvals::desc must be 8-byte aligned");
#endif
struct panvk_compute_sysvals {
struct {
uint32_t x, y, z;
} base;
struct {
uint32_t x, y, z;
} num_work_groups;
struct {
uint32_t x, y, z;
} local_group_size;
aligned_u64 push_consts;
#if PAN_ARCH <= 7
struct {
aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
} desc;
#endif
} __attribute__((aligned(FAU_WORD_SIZE)));
static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
"struct panvk_compute_sysvals must be 8-byte aligned");
static_assert((offsetof(struct panvk_compute_sysvals, push_consts) %
FAU_WORD_SIZE) == 0,
"panvk_compute_sysvals::push_consts must be 8-byte aligned");
#if PAN_ARCH <= 7
static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
0,
"panvk_compute_sysvals::desc must be 8-byte aligned");
#endif
/* This is not the final offset in the push constant buffer (AKA FAU), but
* just a magic offset we use before packing push constants so we can easily
* identify the type of push constant (driver sysvals vs user push constants).
*/
#define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
#define sysval_size(__ptype, __name) \
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name)
#define sysval_offset(__ptype, __name) \
offsetof(struct panvk_##__ptype##_sysvals, __name)
#define sysval_entry_size(__ptype, __name) \
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])
#define sysval_entry_offset(__ptype, __name, __idx) \
(sysval_offset(__ptype, __name) + \
(sysval_entry_size(__ptype, __name) * __idx))
#define sysval_fau_start(__ptype, __name) \
(sysval_offset(__ptype, __name) / FAU_WORD_SIZE)
#define sysval_fau_end(__ptype, __name) \
((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) / \
FAU_WORD_SIZE)
#define sysval_fau_entry_start(__ptype, __name, __idx) \
(sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE)
#define sysval_fau_entry_end(__ptype, __name, __idx) \
((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE)
#define shader_remapped_fau_offset(__shader, __kind, __offset) \
((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind, \
(__offset) / FAU_WORD_SIZE)) + \
((__offset) % FAU_WORD_SIZE))
#define shader_remapped_sysval_offset(__shader, __offset) \
shader_remapped_fau_offset(__shader, sysvals, __offset)
#define shader_remapped_push_const_offset(__shader, __offset) \
(((__shader)->fau.sysval_count * FAU_WORD_SIZE) + \
shader_remapped_fau_offset(__shader, push_consts, __offset))
#define shader_use_sysval(__shader, __ptype, __name) \
BITSET_SET_RANGE((__shader)->fau.used_sysvals, \
sysval_fau_start(__ptype, __name), \
sysval_fau_end(__ptype, __name))
#define shader_uses_sysval(__shader, __ptype, __name) \
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
sysval_fau_start(__ptype, __name), \
sysval_fau_end(__ptype, __name))
#define shader_uses_sysval_entry(__shader, __ptype, __name, __idx) \
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
sysval_fau_entry_start(__ptype, __name, __idx), \
sysval_fau_entry_end(__ptype, __name, __idx))
#define shader_use_sysval_range(__shader, __base, __range) \
BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE, \
((__base) + (__range) - 1) / FAU_WORD_SIZE)
#define shader_use_push_const_range(__shader, __base, __range) \
BITSET_SET_RANGE((__shader)->fau.used_push_consts, \
(__base) / FAU_WORD_SIZE, \
((__base) + (__range) - 1) / FAU_WORD_SIZE)
#define load_sysval(__b, __ptype, __bitsz, __name) \
nir_load_push_constant( \
__b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
nir_imm_int(__b, sysval_offset(__ptype, __name)), \
.base = SYSVALS_PUSH_CONST_BASE)
#define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx) \
nir_load_push_constant( \
__b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
nir_iadd_imm( \
__b, \
nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)), \
sysval_offset(__ptype, __name)), \
.base = SYSVALS_PUSH_CONST_BASE)
#if PAN_ARCH <= 7
enum panvk_bifrost_desc_table_type {
PANVK_BIFROST_DESC_TABLE_INVALID = -1,
/* UBO is encoded on 8 bytes */
PANVK_BIFROST_DESC_TABLE_UBO = 0,
/* Images are using a <3DAttributeBuffer,Attribute> pair, each
* of them being stored in a separate table. */
PANVK_BIFROST_DESC_TABLE_IMG,
/* Texture and sampler are encoded on 32 bytes */
PANVK_BIFROST_DESC_TABLE_TEXTURE,
PANVK_BIFROST_DESC_TABLE_SAMPLER,
PANVK_BIFROST_DESC_TABLE_COUNT,
};
#endif
#define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx))
#define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28))
#define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
#define MAX_COMPUTE_SYSVAL_FAUS \
(sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE)
#define MAX_GFX_SYSVAL_FAUS \
(sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE)
#define MAX_SYSVAL_FAUS MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS)
#define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE)
struct panvk_shader_fau_info {
BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
uint32_t sysval_count;
uint32_t total_count;
};
struct panvk_shader {
struct vk_shader vk;
struct pan_shader_info info;
struct pan_compute_dim local_size;
struct {
uint32_t used_set_mask;
#if PAN_ARCH <= 7
struct {
uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
uint32_t count;
} dyn_ubos;
struct {
uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
uint32_t count;
} dyn_ssbos;
struct {
struct panvk_priv_mem map;
uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
} others;
#else
struct {
uint32_t map[MAX_DYNAMIC_BUFFERS];
uint32_t count;
} dyn_bufs;
#endif
} desc_info;
struct panvk_shader_fau_info fau;
const void *bin_ptr;
uint32_t bin_size;
struct panvk_priv_mem code_mem;
#if PAN_ARCH <= 7
struct panvk_priv_mem rsd;
#else
union {
struct panvk_priv_mem spd;
struct {
struct panvk_priv_mem pos_points;
struct panvk_priv_mem pos_triangles;
struct panvk_priv_mem var;
} spds;
};
#endif
const char *nir_str;
const char *asm_str;
};
static inline uint64_t
panvk_shader_get_dev_addr(const struct panvk_shader *shader)
{
return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0;
}
#if PAN_ARCH <= 7
struct panvk_shader_link {
struct {
struct panvk_priv_mem attribs;
} vs, fs;
unsigned buf_strides[PANVK_VARY_BUF_MAX];
};
VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
const struct panvk_shader *vs,
const struct panvk_shader *fs,
struct panvk_shader_link *link);
static inline void
panvk_shader_link_cleanup(struct panvk_shader_link *link)
{
panvk_pool_free_mem(&link->vs.attribs);
panvk_pool_free_mem(&link->fs.attribs);
}
#endif
void panvk_per_arch(nir_lower_descriptors)(
nir_shader *nir, struct panvk_device *dev,
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
struct vk_descriptor_set_layout *const *set_layouts,
struct panvk_shader *shader);
/* This a stripped-down version of panvk_shader for internal shaders that
* are managed by vk_meta (blend and preload shaders). Those don't need the
* complexity inherent to user provided shaders as they're not exposed. */
struct panvk_internal_shader {
struct vk_shader vk;
struct pan_shader_info info;
struct panvk_priv_mem code_mem;
#if PAN_ARCH <= 7
struct panvk_priv_mem rsd;
#else
struct panvk_priv_mem spd;
#endif
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT,
VK_OBJECT_TYPE_SHADER_EXT)
VkResult panvk_per_arch(create_internal_shader)(
struct panvk_device *dev, nir_shader *nir,
struct panfrost_compile_inputs *compiler_inputs,
struct panvk_internal_shader **shader_out);
#endif