blob: 25f228e11ea2e4b2b69934e1189de4dcfffea4d5 [file] [log] [blame]
/*
* Copyright © 2020 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "nir.h"
#include "nir_worklist.h"
#include "util/u_vector.h"
static bool
combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_)
{
nir_intrinsic_set_memory_modes(
a, nir_intrinsic_memory_modes(a) | nir_intrinsic_memory_modes(b));
nir_intrinsic_set_memory_semantics(
a, nir_intrinsic_memory_semantics(a) | nir_intrinsic_memory_semantics(b));
nir_intrinsic_set_memory_scope(
a, MAX2(nir_intrinsic_memory_scope(a), nir_intrinsic_memory_scope(b)));
nir_intrinsic_set_execution_scope(
a, MAX2(nir_intrinsic_execution_scope(a), nir_intrinsic_execution_scope(b)));
return true;
}
static bool
nir_opt_combine_barriers_impl(nir_function_impl *impl,
nir_combine_barrier_cb combine_cb,
void *data)
{
bool progress = false;
nir_foreach_block(block, impl) {
nir_intrinsic_instr *prev = NULL;
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic) {
prev = NULL;
continue;
}
nir_intrinsic_instr *current = nir_instr_as_intrinsic(instr);
if (current->intrinsic != nir_intrinsic_barrier) {
prev = NULL;
continue;
}
if (prev && combine_cb(prev, current, data)) {
nir_instr_remove(&current->instr);
progress = true;
} else {
prev = current;
}
}
}
if (progress) {
nir_metadata_preserve(impl, nir_metadata_control_flow |
nir_metadata_live_defs);
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
return progress;
}
/* Combine adjacent scoped barriers. */
bool
nir_opt_combine_barriers(nir_shader *shader,
nir_combine_barrier_cb combine_cb,
void *data)
{
/* Default to combining everything. Only some backends can do better. */
if (!combine_cb)
combine_cb = combine_all_barriers;
bool progress = false;
nir_foreach_function_impl(impl, shader) {
if (nir_opt_combine_barriers_impl(impl, combine_cb, data)) {
progress = true;
}
}
return progress;
}
static bool
barrier_happens_before(const nir_instr *a, const nir_instr *b)
{
if (a->block == b->block)
return a->index < b->index;
return nir_block_dominates(a->block, b->block);
}
static bool
nir_opt_barrier_modes_impl(nir_function_impl *impl)
{
bool progress = false;
nir_instr_worklist *barriers = nir_instr_worklist_create();
if (!barriers)
return false;
struct u_vector mem_derefs;
if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) {
nir_instr_worklist_destroy(barriers);
return false;
}
const unsigned all_memory_modes = nir_var_image |
nir_var_mem_ssbo |
nir_var_mem_shared |
nir_var_mem_global;
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_barrier)
nir_instr_worklist_push_tail(barriers, instr);
} else if (instr->type == nir_instr_type_deref) {
nir_deref_instr *deref = nir_instr_as_deref(instr);
if (nir_deref_mode_may_be(deref, all_memory_modes) ||
glsl_contains_atomic(deref->type)) {
nir_deref_instr **tail = u_vector_add(&mem_derefs);
*tail = deref;
}
}
}
}
nir_foreach_instr_in_worklist(instr, barriers) {
nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier);
unsigned new_modes = barrier_modes & ~all_memory_modes;
/* If a barrier dominates all memory accesses for a particular mode (or
* there are none), then the barrier cannot affect those accesses. We
* can drop that mode from the barrier.
*
* For each barrier, we look at the list of memory derefs, and see if
* the barrier fails to dominate the deref. If so, then there's at
* least one memory access that may happen before the barrier, so we
* need to keep the mode. Any modes not kept are discarded.
*/
nir_deref_instr **p_deref;
u_vector_foreach(p_deref, &mem_derefs) {
nir_deref_instr *deref = *p_deref;
const unsigned atomic_mode =
glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0;
const unsigned deref_modes =
(deref->modes | atomic_mode) & barrier_modes;
if (deref_modes &&
!barrier_happens_before(&barrier->instr, &deref->instr))
new_modes |= deref_modes;
}
/* If we don't need all the modes, update the barrier. */
if (barrier_modes != new_modes) {
nir_intrinsic_set_memory_modes(barrier, new_modes);
progress = true;
}
/* Shared memory only exists within a workgroup, so synchronizing it
* beyond workgroup scope is nonsense.
*/
if (nir_intrinsic_execution_scope(barrier) == SCOPE_NONE &&
new_modes == nir_var_mem_shared) {
nir_intrinsic_set_memory_scope(barrier,
MIN2(nir_intrinsic_memory_scope(barrier), SCOPE_WORKGROUP));
progress = true;
}
}
nir_instr_worklist_destroy(barriers);
u_vector_finish(&mem_derefs);
return progress;
}
/**
* Reduce barriers to remove unnecessary modes and scope.
*
* This pass must be called before nir_lower_explicit_io lowers derefs!
*
* Many shaders issue full memory barriers, which may need to synchronize
* access to images, SSBOs, shared local memory, or global memory. However,
* many of them only use a subset of those memory types - say, only SSBOs.
*
* Shaders may also have patterns such as:
*
* 1. shared local memory access
* 2. barrier with full variable modes
* 3. more shared local memory access
* 4. image access
*
* In this case, the barrier is needed to ensure synchronization between the
* various shared memory operations. Image reads and writes do also exist,
* but they are all on one side of the barrier, so it is a no-op for image
* access. We can drop the image mode from the barrier in this case too.
*
* In addition, we can reduce the memory scope of shared-only barriers, as
* shared local memory only exists within a workgroup.
*/
bool
nir_opt_barrier_modes(nir_shader *shader)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
nir_metadata_require(impl, nir_metadata_dominance |
nir_metadata_instr_index);
if (nir_opt_barrier_modes_impl(impl)) {
nir_metadata_preserve(impl, nir_metadata_control_flow |
nir_metadata_live_defs);
progress = true;
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
}
return progress;
}