src/compiler/glsl/gl_nir_link_varyings.c - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2012 Intel Corporation
  * Copyright © 2021 Valve Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 /**
  * Linker functions related specifically to linking varyings between shader
  * stages.
  */

 #include "main/errors.h"
 #include "main/macros.h"
 #include "main/menums.h"
 #include "main/mtypes.h"
 #include "program/symbol_table.h"
 #include "util/hash_table.h"
 #include "util/u_math.h"
 #include "util/perf/cpu_trace.h"

 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_deref.h"
 #include "gl_nir.h"
 #include "gl_nir_link_varyings.h"
 #include "gl_nir_linker.h"
 #include "linker_util.h"
 #include "string_to_uint_map.h"

 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))

 /* Temporary storage for the set of attributes that need locations assigned. */
 struct temp_attr {
    unsigned slots;
    unsigned original_idx;
    nir_variable *var;
 };

 /* Used below in the call to qsort. */
 static int
 compare_attr(const void *a, const void *b)
 {
    const struct temp_attr *const l = (const struct temp_attr *) a;
    const struct temp_attr *const r = (const struct temp_attr *) b;

    /* Reversed because we want a descending order sort below. */
    if (r->slots != l->slots)
       return r->slots - l->slots;

    return l->original_idx - r->original_idx;
 }

 /**
  * Get the varying type stripped of the outermost array if we're processing
  * a stage whose varyings are arrays indexed by a vertex number (such as
  * geometry shader inputs).
  */
 static const struct glsl_type *
 get_varying_type(const nir_variable *var, gl_shader_stage stage)
 {
    const struct glsl_type *type = var->type;
    if (nir_is_arrayed_io(var, stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }

    return type;
 }

 /**
  * Find a contiguous set of available bits in a bitmask.
  *
  * \param used_mask     Bits representing used (1) and unused (0) locations
  * \param needed_count  Number of contiguous bits needed.
  *
  * \return
  * Base location of the available bits on success or -1 on failure.
  */
 static int
 find_available_slots(unsigned used_mask, unsigned needed_count)
 {
    unsigned needed_mask = (1 << needed_count) - 1;
    const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;

    /* The comparison to 32 is redundant, but without it GCC emits "warning:
     * cannot optimize possibly infinite loops" for the loop below.
     */
    if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
       return -1;

    for (int i = 0; i <= max_bit_to_test; i++) {
       if ((needed_mask & ~used_mask) == needed_mask)
          return i;

       needed_mask <<= 1;
    }

    return -1;
 }

 /* Find deref based on variable name.
  * Note: This function does not support arrays.
  */
 static bool
 find_deref(nir_shader *shader, const char *name)
 {
    nir_foreach_function(func, shader) {
       nir_foreach_block(block, func->impl) {
          nir_foreach_instr(instr, block) {
             if (instr->type == nir_instr_type_deref) {
                nir_deref_instr *deref = nir_instr_as_deref(instr);
                if (deref->deref_type == nir_deref_type_var &&
                    strcmp(deref->var->name, name) == 0)
                   return true;
             }
          }
       }
    }

    return false;
 }

 /**
  * Validate the types and qualifiers of an output from one stage against the
  * matching input to another stage.
  */
 static void
 cross_validate_types_and_qualifiers(const struct gl_constants *consts,
                                     struct gl_shader_program *prog,
                                     const nir_variable *input,
                                     const nir_variable *output,
                                     gl_shader_stage consumer_stage,
                                     gl_shader_stage producer_stage)
 {
    /* Check that the types match between stages.
     */
    const struct glsl_type *type_to_match = input->type;

    /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
    const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
                                    consumer_stage != MESA_SHADER_FRAGMENT) ||
                                   consumer_stage == MESA_SHADER_GEOMETRY;
    if (extra_array_level) {
       assert(glsl_type_is_array(type_to_match));
       type_to_match = glsl_get_array_element(type_to_match);
    }

    if (type_to_match != output->type) {
       if (glsl_type_is_struct(output->type)) {
          /* Structures across shader stages can have different name
           * and considered to match in type if and only if structure
           * members match in name, type, qualification, and declaration
           * order. The precision doesn’t need to match.
           */
          if (!glsl_record_compare(output->type, type_to_match,
                                   false, /* match_name */
                                   true, /* match_locations */
                                   false /* match_precision */)) {
             linker_error(prog,
                   "%s shader output `%s' declared as struct `%s', "
                   "doesn't match in type with %s shader input "
                   "declared as struct `%s'\n",
                   _mesa_shader_stage_to_string(producer_stage),
                   output->name,
                   glsl_get_type_name(output->type),
                   _mesa_shader_stage_to_string(consumer_stage),
                   glsl_get_type_name(input->type));
          }
       } else if (!glsl_type_is_array(output->type) ||
                  !is_gl_identifier(output->name)) {
          /* There is a bit of a special case for gl_TexCoord.  This
           * built-in is unsized by default.  Applications that variable
           * access it must redeclare it with a size.  There is some
           * language in the GLSL spec that implies the fragment shader
           * and vertex shader do not have to agree on this size.  Other
           * driver behave this way, and one or two applications seem to
           * rely on it.
           *
           * Neither declaration needs to be modified here because the array
           * sizes are fixed later when update_array_sizes is called.
           *
           * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
           *
           *     "Unlike user-defined varying variables, the built-in
           *     varying variables don't have a strict one-to-one
           *     correspondence between the vertex language and the
           *     fragment language."
           */
          linker_error(prog,
                       "%s shader output `%s' declared as type `%s', "
                       "but %s shader input declared as type `%s'\n",
                       _mesa_shader_stage_to_string(producer_stage),
                       output->name,
                       glsl_get_type_name(output->type),
                       _mesa_shader_stage_to_string(consumer_stage),
                       glsl_get_type_name(input->type));
          return;
       }
    }

    /* Check that all of the qualifiers match between stages.
     */

    /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
     * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
     * conformance test suite does not verify that the qualifiers must match.
     * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
     * OpenGLES 3.0 drivers, so we relax the checking in all cases.
     */
    if (false /* always skip the centroid check */ &&
        prog->GLSL_Version < (prog->IsES ? 310 : 430) &&
        input->data.centroid != output->data.centroid) {
       linker_error(prog,
                    "%s shader output `%s' %s centroid qualifier, "
                    "but %s shader input %s centroid qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
                    (output->data.centroid) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
                    (input->data.centroid) ? "has" : "lacks");
       return;
    }

    if (input->data.sample != output->data.sample) {
       linker_error(prog,
                    "%s shader output `%s' %s sample qualifier, "
                    "but %s shader input %s sample qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
                    (output->data.sample) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
                    (input->data.sample) ? "has" : "lacks");
       return;
    }

    if (input->data.patch != output->data.patch) {
       linker_error(prog,
                    "%s shader output `%s' %s patch qualifier, "
                    "but %s shader input %s patch qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
                    (output->data.patch) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
                    (input->data.patch) ? "has" : "lacks");
       return;
    }

    /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
     *
     *    "As only outputs need be declared with invariant, an output from
     *     one shader stage will still match an input of a subsequent stage
     *     without the input being declared as invariant."
     *
     * while GLSL 4.10 says:
     *
     *    "For variables leaving one shader and coming into another shader,
     *     the invariant keyword has to be used in both shaders, or a link
     *     error will result."
     *
     * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
     *
     *    "The invariance of varyings that are declared in both the vertex
     *     and fragment shaders must match."
     */
    if (input->data.explicit_invariant != output->data.explicit_invariant &&
        prog->GLSL_Version < (prog->IsES ? 300 : 420)) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
                    "but %s shader input %s invariant qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
                    (output->data.explicit_invariant) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
                    (input->data.explicit_invariant) ? "has" : "lacks");
       return;
    }

    /* GLSL >= 4.40 removes text requiring interpolation qualifiers
     * to match cross stage, they must only match within the same stage.
     *
     * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
     *
     *     "It is a link-time error if, within the same stage, the interpolation
     *     qualifiers of variables of the same name do not match.
     *
     * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
     *
     *    "When no interpolation qualifier is present, smooth interpolation
     *    is used."
     *
     * So we match variables where one is smooth and the other has no explicit
     * qualifier.
     */
    unsigned input_interpolation = input->data.interpolation;
    unsigned output_interpolation = output->data.interpolation;
    if (prog->IsES) {
       if (input_interpolation == INTERP_MODE_NONE)
          input_interpolation = INTERP_MODE_SMOOTH;
       if (output_interpolation == INTERP_MODE_NONE)
          output_interpolation = INTERP_MODE_SMOOTH;
    }
    if (input_interpolation != output_interpolation &&
        prog->GLSL_Version < 440) {
       if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
          linker_error(prog,
                       "%s shader output `%s' specifies %s "
                       "interpolation qualifier, "
                       "but %s shader input specifies %s "
                       "interpolation qualifier\n",
                       _mesa_shader_stage_to_string(producer_stage),
                       output->name,
                       interpolation_string(output->data.interpolation),
                       _mesa_shader_stage_to_string(consumer_stage),
                       interpolation_string(input->data.interpolation));
          return;
       } else {
          linker_warning(prog,
                         "%s shader output `%s' specifies %s "
                         "interpolation qualifier, "
                         "but %s shader input specifies %s "
                         "interpolation qualifier\n",
                         _mesa_shader_stage_to_string(producer_stage),
                         output->name,
                         interpolation_string(output->data.interpolation),
                         _mesa_shader_stage_to_string(consumer_stage),
                         interpolation_string(input->data.interpolation));
       }
    }
 }

 /**
  * Validate front and back color outputs against single color input
  */
 static void
 cross_validate_front_and_back_color(const struct gl_constants *consts,
                                     struct gl_shader_program *prog,
                                     const nir_variable *input,
                                     const nir_variable *front_color,
                                     const nir_variable *back_color,
                                     gl_shader_stage consumer_stage,
                                     gl_shader_stage producer_stage)
 {
    if (front_color != NULL && front_color->data.assigned)
       cross_validate_types_and_qualifiers(consts, prog, input, front_color,
                                           consumer_stage, producer_stage);

    if (back_color != NULL && back_color->data.assigned)
       cross_validate_types_and_qualifiers(consts, prog, input, back_color,
                                           consumer_stage, producer_stage);
 }

 static unsigned
 compute_variable_location_slot(nir_variable *var, gl_shader_stage stage)
 {
    unsigned location_start = VARYING_SLOT_VAR0;

    switch (stage) {
       case MESA_SHADER_VERTEX:
          if (var->data.mode == nir_var_shader_in)
             location_start = VERT_ATTRIB_GENERIC0;
          break;
       case MESA_SHADER_TESS_CTRL:
       case MESA_SHADER_TESS_EVAL:
          if (var->data.patch)
             location_start = VARYING_SLOT_PATCH0;
          break;
       case MESA_SHADER_FRAGMENT:
          if (var->data.mode == nir_var_shader_out)
             location_start = FRAG_RESULT_DATA0;
          break;
       default:
          break;
    }

    return var->data.location - location_start;
 }


 struct explicit_location_info {
    nir_variable *var;
    bool base_type_is_integer;
    unsigned base_type_bit_size;
    unsigned interpolation;
    bool centroid;
    bool sample;
    bool patch;
 };

 static bool
 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
                         nir_variable *var,
                         unsigned location,
                         unsigned component,
                         unsigned location_limit,
                         const struct glsl_type *type,
                         unsigned interpolation,
                         bool centroid,
                         bool sample,
                         bool patch,
                         struct gl_shader_program *prog,
                         gl_shader_stage stage)
 {
    unsigned last_comp;
    unsigned base_type_bit_size;
    const struct glsl_type *type_without_array = glsl_without_array(type);
    const bool base_type_is_integer =
       glsl_base_type_is_integer(glsl_get_base_type(type_without_array));
    const bool is_struct = glsl_type_is_struct(type_without_array);
    if (is_struct) {
       /* structs don't have a defined underlying base type so just treat all
        * component slots as used and set the bit size to 0. If there is
        * location aliasing, we'll fail anyway later.
        */
       last_comp = 4;
       base_type_bit_size = 0;
    } else {
       unsigned dmul = glsl_type_is_64bit(type_without_array) ? 2 : 1;
       last_comp = component + glsl_get_vector_elements(type_without_array) * dmul;
       base_type_bit_size =
          glsl_base_type_get_bit_size(glsl_get_base_type(type_without_array));
    }

    while (location < location_limit) {
       unsigned comp = 0;
       while (comp < 4) {
          struct explicit_location_info *info =
             &explicit_locations[location][comp];

          if (info->var) {
             if (glsl_type_is_struct(glsl_without_array(info->var->type)) ||
                 is_struct) {
                /* Structs cannot share location since they are incompatible
                 * with any other underlying numerical type.
                 */
                linker_error(prog,
                             "%s shader has multiple %sputs sharing the "
                             "same location that don't have the same "
                             "underlying numerical type. Struct variable '%s', "
                             "location %u\n",
                             _mesa_shader_stage_to_string(stage),
                             var->data.mode == nir_var_shader_in ? "in" : "out",
                             is_struct ? var->name : info->var->name,
                             location);
                return false;
             } else if (comp >= component && comp < last_comp) {
                /* Component aliasing is not allowed */
                linker_error(prog,
                             "%s shader has multiple %sputs explicitly "
                             "assigned to location %d and component %d\n",
                             _mesa_shader_stage_to_string(stage),
                             var->data.mode == nir_var_shader_in ? "in" : "out",
                             location, comp);
                return false;
             } else {
                /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
                 * Qualifiers, Page 67, (Location aliasing):
                 *
                 *   " Further, when location aliasing, the aliases sharing the
                 *     location must have the same underlying numerical type
                 *     and bit width (floating-point or integer, 32-bit versus
                 *     64-bit, etc.) and the same auxiliary storage and
                 *     interpolation qualification."
                 */

                /* If the underlying numerical type isn't integer, implicitly
                 * it will be float or else we would have failed by now.
                 */
                if (info->base_type_is_integer != base_type_is_integer) {
                   linker_error(prog,
                                "%s shader has multiple %sputs sharing the "
                                "same location that don't have the same "
                                "underlying numerical type. Location %u "
                                "component %u.\n",
                                _mesa_shader_stage_to_string(stage),
                                var->data.mode == nir_var_shader_in ?
                                "in" : "out", location, comp);
                   return false;
                }

                if (info->base_type_bit_size != base_type_bit_size) {
                   linker_error(prog,
                                "%s shader has multiple %sputs sharing the "
                                "same location that don't have the same "
                                "underlying numerical bit size. Location %u "
                                "component %u.\n",
                                _mesa_shader_stage_to_string(stage),
                                var->data.mode == nir_var_shader_in ?
                                "in" : "out", location, comp);
                   return false;
                }

                if (info->interpolation != interpolation) {
                   linker_error(prog,
                                "%s shader has multiple %sputs sharing the "
                                "same location that don't have the same "
                                "interpolation qualification. Location %u "
                                "component %u.\n",
                                _mesa_shader_stage_to_string(stage),
                                var->data.mode == nir_var_shader_in ?
                                "in" : "out", location, comp);
                   return false;
                }

                if (info->centroid != centroid ||
                    info->sample != sample ||
                    info->patch != patch) {
                   linker_error(prog,
                                "%s shader has multiple %sputs sharing the "
                                "same location that don't have the same "
                                "auxiliary storage qualification. Location %u "
                                "component %u.\n",
                                _mesa_shader_stage_to_string(stage),
                                var->data.mode == nir_var_shader_in ?
                                "in" : "out", location, comp);
                   return false;
                }
             }
          } else if (comp >= component && comp < last_comp) {
             info->var = var;
             info->base_type_is_integer = base_type_is_integer;
             info->base_type_bit_size = base_type_bit_size;
             info->interpolation = interpolation;
             info->centroid = centroid;
             info->sample = sample;
             info->patch = patch;
          }

          comp++;

          /* We need to do some special handling for doubles as dvec3 and
           * dvec4 consume two consecutive locations. We don't need to
           * worry about components beginning at anything other than 0 as
           * the spec does not allow this for dvec3 and dvec4.
           */
          if (comp == 4 && last_comp > 4) {
             last_comp = last_comp - 4;
             /* Bump location index and reset the component index */
             location++;
             comp = 0;
             component = 0;
          }
       }

       location++;
    }

    return true;
 }

 static void
 resize_input_array(nir_shader *shader, struct gl_shader_program *prog,
                    unsigned stage, unsigned num_vertices)
 {
    nir_foreach_shader_in_variable(var, shader) {
       if (!glsl_type_is_array(var->type) || var->data.patch)
          continue;

       unsigned size = glsl_array_size(var->type);

       if (stage == MESA_SHADER_GEOMETRY) {
          /* Generate a link error if the shader has declared this array with
           * an incorrect size.
           */
          if (!var->data.implicit_sized_array &&
              size != -1 && size != num_vertices) {
             linker_error(prog, "size of array %s declared as %u, "
                          "but number of input vertices is %u\n",
                          var->name, size, num_vertices);
             break;
          }

          /* Generate a link error if the shader attempts to access an input
           * array using an index too large for its actual size assigned at
           * link time.
           */
          if (var->data.max_array_access >= (int)num_vertices) {
             linker_error(prog, "%s shader accesses element %i of "
                          "%s, but only %i input vertices\n",
                          _mesa_shader_stage_to_string(stage),
                          var->data.max_array_access, var->name, num_vertices);
             break;
          }
       }

       var->type = glsl_array_type(var->type->fields.array, num_vertices, 0);
       var->data.max_array_access = num_vertices - 1;
    }

    nir_fixup_deref_types(shader);
 }

 /**
  * Resize tessellation evaluation per-vertex inputs to the size of
  * tessellation control per-vertex outputs.
  */
 void
 resize_tes_inputs(const struct gl_constants *consts,
                   struct gl_shader_program *prog)
 {
    if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL)
       return;

    struct gl_linked_shader *tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
    struct gl_linked_shader *tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];

    /* If no control shader is present, then the TES inputs are statically
     * sized to MaxPatchVertices; the actual size of the arrays won't be
     * known until draw time.
     */
    const int num_vertices = tcs
       ? tcs->Program->nir->info.tess.tcs_vertices_out
       : consts->MaxPatchVertices;

    resize_input_array(tes->Program->nir, prog, MESA_SHADER_TESS_EVAL,
                       num_vertices);
    if (tcs) {
       /* Convert the gl_PatchVerticesIn system value into a constant, since
        * the value is known at this point.
        */
       nir_variable *var =
          nir_find_variable_with_location(tes->Program->nir,
                                          nir_var_system_value,
                                          SYSTEM_VALUE_VERTICES_IN);
       if (var) {
          var->data.location = 0;
          var->data.explicit_location = false;
          var->data.mode = nir_var_mem_constant;

          nir_constant *val = rzalloc(var, nir_constant);
          val->values[0].i32 = num_vertices;
          var->constant_initializer = val;

          nir_fixup_deref_modes(tes->Program->nir);
       }
    }
 }

 void
 set_geom_shader_input_array_size(struct gl_shader_program *prog)
 {
    if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] == NULL)
       return;

    /* Set the size of geometry shader input arrays */
    nir_shader *nir = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->nir;
    unsigned num_vertices =
       mesa_vertices_per_prim(nir->info.gs.input_primitive);
    resize_input_array(nir, prog, MESA_SHADER_GEOMETRY, num_vertices);
 }

 static bool
 validate_explicit_variable_location(const struct gl_constants *consts,
                                     struct explicit_location_info explicit_locations[][4],
                                     nir_variable *var,
                                     struct gl_shader_program *prog,
                                     struct gl_linked_shader *sh)
 {
    const struct glsl_type *type = get_varying_type(var, sh->Stage);
    unsigned num_elements = glsl_count_attribute_slots(type, false);
    unsigned idx = compute_variable_location_slot(var, sh->Stage);
    unsigned slot_limit = idx + num_elements;

    /* Vertex shader inputs and fragment shader outputs are validated in
     * assign_attribute_or_color_locations() so we should not attempt to
     * validate them again here.
     */
    unsigned slot_max;
    if (var->data.mode == nir_var_shader_out) {
       assert(sh->Stage != MESA_SHADER_FRAGMENT);
       slot_max = consts->Program[sh->Stage].MaxOutputComponents / 4;
    } else {
       assert(var->data.mode == nir_var_shader_in);
       assert(sh->Stage != MESA_SHADER_VERTEX);
       slot_max = consts->Program[sh->Stage].MaxInputComponents / 4;
    }

    if (slot_limit > slot_max) {
       linker_error(prog,
                    "Invalid location %u in %s shader\n",
                    idx, _mesa_shader_stage_to_string(sh->Stage));
       return false;
    }

    const struct glsl_type *type_without_array = glsl_without_array(type);
    if (glsl_type_is_interface(type_without_array)) {
       for (unsigned i = 0; i < glsl_get_length(type_without_array); i++) {
          const struct glsl_struct_field *field =
             glsl_get_struct_field_data(type_without_array, i);
          unsigned field_location = field->location -
             (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
          unsigned field_slots = glsl_count_attribute_slots(field->type, false);
          if (!check_location_aliasing(explicit_locations, var,
                                       field_location,
                                       0,
                                       field_location + field_slots,
                                       field->type,
                                       field->interpolation,
                                       field->centroid,
                                       field->sample,
                                       field->patch,
                                       prog, sh->Stage)) {
             return false;
          }
       }
    } else if (!check_location_aliasing(explicit_locations, var,
                                        idx, var->data.location_frac,
                                        slot_limit, type,
                                        var->data.interpolation,
                                        var->data.centroid,
                                        var->data.sample,
                                        var->data.patch,
                                        prog, sh->Stage)) {
       return false;
    }

    return true;
 }

 /**
  * Validate explicit locations for the inputs to the first stage and the
  * outputs of the last stage in a program, if those are not the VS and FS
  * shaders.
  */
 void
 gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
                                                             struct gl_shader_program *prog,
                                                             gl_shader_stage first_stage,
                                                             gl_shader_stage last_stage)
 {
    /* VS inputs and FS outputs are validated in
     * assign_attribute_or_color_locations()
     */
    bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
    bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
    if (!validate_first_stage && !validate_last_stage)
       return;

    struct explicit_location_info explicit_locations[MAX_VARYING][4];

    gl_shader_stage stages[2] = { first_stage, last_stage };
    bool validate_stage[2] = { validate_first_stage, validate_last_stage };
    nir_variable_mode var_mode[2] = { nir_var_shader_in, nir_var_shader_out };

    for (unsigned i = 0; i < 2; i++) {
       if (!validate_stage[i])
          continue;

       gl_shader_stage stage = stages[i];

       struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
       assert(sh);

       memset(explicit_locations, 0, sizeof(explicit_locations));

       nir_foreach_variable_with_modes(var, sh->Program->nir, var_mode[i]) {
          if (!var->data.explicit_location ||
              var->data.location < VARYING_SLOT_VAR0)
             continue;

          if (!validate_explicit_variable_location(consts, explicit_locations,
                                                   var, prog, sh)) {
             return;
          }
       }
    }
 }

 /**
  * Check if we should force input / output matching between shader
  * interfaces.
  *
  * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
  *
  *   "Only the input variables that are actually read need to be
  *    written by the previous stage; it is allowed to have
  *    superfluous declarations of input variables."
  *
  * However it's not defined anywhere as to how we should handle
  * inputs that are not written in the previous stage and it's not
  * clear what "actually read" means.
  *
  * The GLSL 4.20 spec however is much clearer:
  *
  *    "Only the input variables that are statically read need to
  *     be written by the previous stage; it is allowed to have
  *     superfluous declarations of input variables."
  *
  * It also has a table that states it is an error to statically
  * read an input that is not defined in the previous stage. While
  * it is not an error to not statically write to the output (it
  * just needs to be defined to not be an error).
  *
  * The text in the GLSL 4.20 spec was an attempt to clarify the
  * previous spec iterations. However given the difference in spec
  * and that some applications seem to depend on not erroring when
  * the input is not actually read in control flow we only apply
  * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
  * seen in the wild that depend on the less strict interpretation.
  */
 static bool
 static_input_output_matching(struct gl_shader_program *prog)
 {
    return prog->GLSL_Version >= (prog->IsES ? 0 : 420);
 }

 /**
  * Validate that outputs from one stage match inputs of another
  */
 void
 gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
                                         struct gl_shader_program *prog,
                                         struct gl_linked_shader *producer,
                                         struct gl_linked_shader *consumer)
 {
    struct _mesa_symbol_table *table = _mesa_symbol_table_ctor();
    struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {0};
    struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {0};

    /* Find all shader outputs in the "producer" stage.
     */
    nir_foreach_variable_with_modes(var, producer->Program->nir, nir_var_shader_out) {
       if (!var->data.explicit_location
           || var->data.location < VARYING_SLOT_VAR0) {
          /* Interface block validation is handled elsewhere */
          if (!var->interface_type || is_gl_identifier(var->name))
             _mesa_symbol_table_add_symbol(table, var->name, var);

       } else {
          /* User-defined varyings with explicit locations are handled
           * differently because they do not need to have matching names.
           */
          if (!validate_explicit_variable_location(consts,
                                                   output_explicit_locations,
                                                   var, prog, producer)) {
             goto out;
          }
       }
    }

    /* Find all shader inputs in the "consumer" stage.  Any variables that have
     * matching outputs already in the symbol table must have the same type and
     * qualifiers.
     *
     * Exception: if the consumer is the geometry shader, then the inputs
     * should be arrays and the type of the array element should match the type
     * of the corresponding producer output.
     */
    nir_foreach_variable_with_modes(input, consumer->Program->nir, nir_var_shader_in) {
       if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
          const nir_variable *front_color =
             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontColor");

          const nir_variable *back_color =
             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackColor");

          cross_validate_front_and_back_color(consts, prog, input,
                                              front_color, back_color,
                                              consumer->Stage, producer->Stage);
       } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
          const nir_variable *front_color =
             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontSecondaryColor");

          const nir_variable *back_color =
             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackSecondaryColor");

          cross_validate_front_and_back_color(consts, prog, input,
                                              front_color, back_color,
                                              consumer->Stage, producer->Stage);
       } else {
          /* The rules for connecting inputs and outputs change in the presence
           * of explicit locations.  In this case, we no longer care about the
           * names of the variables.  Instead, we care only about the
           * explicitly assigned location.
           */
          nir_variable *output = NULL;
          if (input->data.explicit_location
              && input->data.location >= VARYING_SLOT_VAR0) {

             const struct glsl_type *type =
                get_varying_type(input, consumer->Stage);
             unsigned num_elements = glsl_count_attribute_slots(type, false);
             unsigned idx =
                compute_variable_location_slot(input, consumer->Stage);
             unsigned slot_limit = idx + num_elements;

             if (!validate_explicit_variable_location(consts,
                                                      input_explicit_locations,
                                                      input, prog, consumer)) {
                goto out;
             }

             while (idx < slot_limit) {
                if (idx >= MAX_VARYING) {
                   linker_error(prog,
                                "Invalid location %u in %s shader\n", idx,
                                _mesa_shader_stage_to_string(consumer->Stage));
                   goto out;
                }

                output = output_explicit_locations[idx][input->data.location_frac].var;

                if (output == NULL) {
                   /* A linker failure should only happen when there is no
                    * output declaration and there is Static Use of the
                    * declared input.
                    */
                   if (input->data.used && static_input_output_matching(prog)) {
                      linker_error(prog,
                                   "%s shader input `%s' with explicit location "
                                   "has no matching output\n",
                                   _mesa_shader_stage_to_string(consumer->Stage),
                                   input->name);
                      break;
                   }
                } else if (input->data.location != output->data.location) {
                   linker_error(prog,
                                "%s shader input `%s' with explicit location "
                                "has no matching output\n",
                                _mesa_shader_stage_to_string(consumer->Stage),
                                input->name);
                   break;
                }
                idx++;
             }
          } else {
             /* Interface block validation is handled elsewhere */
             if (input->interface_type)
                continue;

             output = (nir_variable *)
                _mesa_symbol_table_find_symbol(table, input->name);
          }

          if (output != NULL) {
             /* Interface blocks have their own validation elsewhere so don't
              * try validating them here.
              */
             if (!(input->interface_type && output->interface_type))
                cross_validate_types_and_qualifiers(consts, prog, input, output,
                                                    consumer->Stage,
                                                    producer->Stage);
          } else {
             /* Check for input vars with unmatched output vars in prev stage
              * taking into account that interface blocks could have a matching
              * output but with different name, so we ignore them.
              */
             assert(!input->data.assigned);
             if (input->data.used && !input->interface_type &&
                 !input->data.explicit_location &&
                 static_input_output_matching(prog))
                linker_error(prog,
                             "%s shader input `%s' "
                             "has no matching output in the previous stage\n",
                             _mesa_shader_stage_to_string(consumer->Stage),
                             input->name);
          }
       }
    }

  out:
    _mesa_symbol_table_dtor(table);
 }

 /**
  * Assign locations for either VS inputs or FS outputs.
  *
  * \param mem_ctx        Temporary ralloc context used for linking.
  * \param prog           Shader program whose variables need locations
  *                       assigned.
  * \param constants      Driver specific constant values for the program.
  * \param target_index   Selector for the program target to receive location
  *                       assignmnets.  Must be either \c MESA_SHADER_VERTEX or
  *                       \c MESA_SHADER_FRAGMENT.
  * \param do_assignment  Whether we are actually marking the assignment or we
  *                       are just doing a dry-run checking.
  *
  * \return
  * If locations are (or can be, in case of dry-running) successfully assigned,
  * true is returned.  Otherwise an error is emitted to the shader link log and
  * false is returned.
  */
 static bool
 assign_attribute_or_color_locations(void *mem_ctx,
                                     struct gl_shader_program *prog,
                                     const struct gl_constants *constants,
                                     unsigned target_index,
                                     bool do_assignment)
 {
    /* Maximum number of generic locations.  This corresponds to either the
     * maximum number of draw buffers or the maximum number of generic
     * attributes.
     */
    unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
       constants->Program[target_index].MaxAttribs :
       MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);

    assert(max_index <= 32);
    struct temp_attr to_assign[32];

    /* Mark invalid locations as being used.
     */
    unsigned used_locations = ~SAFE_MASK_FROM_INDEX(max_index);
    unsigned double_storage_locations = 0;

    assert((target_index == MESA_SHADER_VERTEX)
           || (target_index == MESA_SHADER_FRAGMENT));

    if (prog->_LinkedShaders[target_index] == NULL)
       return true;

    /* Operate in a total of four passes.
     *
     * 1. Invalidate the location assignments for all vertex shader inputs.
     *
     * 2. Assign locations for inputs that have user-defined (via
     *    glBindVertexAttribLocation) locations and outputs that have
     *    user-defined locations (via glBindFragDataLocation).
     *
     * 3. Sort the attributes without assigned locations by number of slots
     *    required in decreasing order.  Fragmentation caused by attribute
     *    locations assigned by the application may prevent large attributes
     *    from having enough contiguous space.
     *
     * 4. Assign locations to any inputs without assigned locations.
     */

    const int generic_base = (target_index == MESA_SHADER_VERTEX)
       ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;

    nir_variable_mode io_mode =
       (target_index == MESA_SHADER_VERTEX)
       ? nir_var_shader_in : nir_var_shader_out;

    /* Temporary array for the set of attributes that have locations assigned,
     * for the purpose of checking overlapping slots/components of (non-ES)
     * fragment shader outputs.
     */
    nir_variable *assigned[FRAG_RESULT_MAX * 4]; /* (max # of FS outputs) * # components */
    unsigned assigned_attr = 0;

    unsigned num_attr = 0;

    nir_shader *shader = prog->_LinkedShaders[target_index]->Program->nir;
    nir_foreach_variable_with_modes(var, shader, io_mode) {

       if (var->data.explicit_location) {
          if ((var->data.location >= (int)(max_index + generic_base))
              || (var->data.location < 0)) {
             linker_error(prog,
                          "invalid explicit location %d specified for `%s'\n",
                          (var->data.location < 0)
                          ? var->data.location
                          : var->data.location - generic_base,
                          var->name);
             return false;
          }
       } else if (target_index == MESA_SHADER_VERTEX) {
          unsigned binding;

          if (string_to_uint_map_get(prog->AttributeBindings, &binding, var->name)) {
             assert(binding >= VERT_ATTRIB_GENERIC0);
             var->data.location = binding;
          }
       } else if (target_index == MESA_SHADER_FRAGMENT) {
          unsigned binding;
          unsigned index;
          const char *name = var->name;
          const struct glsl_type *type = var->type;

          while (type) {
             /* Check if there's a binding for the variable name */
             if (string_to_uint_map_get(prog->FragDataBindings, &binding, name)) {
                assert(binding >= FRAG_RESULT_DATA0);
                var->data.location = binding;

                if (string_to_uint_map_get(prog->FragDataIndexBindings, &index, name)) {
                   var->data.index = index;
                }
                break;
             }

             /* If not, but it's an array type, look for name[0] */
             if (glsl_type_is_array(type)) {
                name = ralloc_asprintf(mem_ctx, "%s[0]", name);
                type = glsl_get_array_element(type);
                continue;
             }

             break;
          }
       }

       if (strcmp(var->name, "gl_LastFragData") == 0)
          continue;

       /* From GL4.5 core spec, section 15.2 (Shader Execution):
        *
        *     "Output binding assignments will cause LinkProgram to fail:
        *     ...
        *     If the program has an active output assigned to a location greater
        *     than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
        *     an active output assigned an index greater than or equal to one;"
        */
       if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
           var->data.location - generic_base >=
           (int) constants->MaxDualSourceDrawBuffers) {
          linker_error(prog,
                       "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
                       "with index %u for %s\n",
                       var->data.location - generic_base, var->data.index,
                       var->name);
          return false;
       }

       const unsigned slots =
          glsl_count_attribute_slots(var->type,
                                     target_index == MESA_SHADER_VERTEX);

       /* If the variable is not a built-in and has a location statically
        * assigned in the shader (presumably via a layout qualifier), make sure
        * that it doesn't collide with other assigned locations.  Otherwise,
        * add it to the list of variables that need linker-assigned locations.
        */
       if (var->data.location != -1) {
          if (var->data.location >= generic_base && var->data.index < 1) {
             /* From page 61 of the OpenGL 4.0 spec:
              *
              *     "LinkProgram will fail if the attribute bindings assigned
              *     by BindAttribLocation do not leave not enough space to
              *     assign a location for an active matrix attribute or an
              *     active attribute array, both of which require multiple
              *     contiguous generic attributes."
              *
              * I think above text prohibits the aliasing of explicit and
              * automatic assignments. But, aliasing is allowed in manual
              * assignments of attribute locations. See below comments for
              * the details.
              *
              * From OpenGL 4.0 spec, page 61:
              *
              *     "It is possible for an application to bind more than one
              *     attribute name to the same location. This is referred to as
              *     aliasing. This will only work if only one of the aliased
              *     attributes is active in the executable program, or if no
              *     path through the shader consumes more than one attribute of
              *     a set of attributes aliased to the same location. A link
              *     error can occur if the linker determines that every path
              *     through the shader consumes multiple aliased attributes,
              *     but implementations are not required to generate an error
              *     in this case."
              *
              * From GLSL 4.30 spec, page 54:
              *
              *    "A program will fail to link if any two non-vertex shader
              *     input variables are assigned to the same location. For
              *     vertex shaders, multiple input variables may be assigned
              *     to the same location using either layout qualifiers or via
              *     the OpenGL API. However, such aliasing is intended only to
              *     support vertex shaders where each execution path accesses
              *     at most one input per each location. Implementations are
              *     permitted, but not required, to generate link-time errors
              *     if they detect that every path through the vertex shader
              *     executable accesses multiple inputs assigned to any single
              *     location. For all shader types, a program will fail to link
              *     if explicit location assignments leave the linker unable
              *     to find space for other variables without explicit
              *     assignments."
              *
              * From OpenGL ES 3.0 spec, page 56:
              *
              *    "Binding more than one attribute name to the same location
              *     is referred to as aliasing, and is not permitted in OpenGL
              *     ES Shading Language 3.00 vertex shaders. LinkProgram will
              *     fail when this condition exists. However, aliasing is
              *     possible in OpenGL ES Shading Language 1.00 vertex shaders.
              *     This will only work if only one of the aliased attributes
              *     is active in the executable program, or if no path through
              *     the shader consumes more than one attribute of a set of
              *     attributes aliased to the same location. A link error can
              *     occur if the linker determines that every path through the
              *     shader consumes multiple aliased attributes, but implemen-
              *     tations are not required to generate an error in this case."
              *
              * After looking at above references from OpenGL, OpenGL ES and
              * GLSL specifications, we allow aliasing of vertex input variables
              * in: OpenGL 2.0 (and above) and OpenGL ES 2.0.
              *
              * NOTE: This is not required by the spec but its worth mentioning
              * here that we're not doing anything to make sure that no path
              * through the vertex shader executable accesses multiple inputs
              * assigned to any single location.
              */

             /* Mask representing the contiguous slots that will be used by
              * this attribute.
              */
             const unsigned attr = var->data.location - generic_base;
             const unsigned use_mask = (1 << slots) - 1;
             const char *const string = (target_index == MESA_SHADER_VERTEX)
                ? "vertex shader input" : "fragment shader output";

             /* Generate a link error if the requested locations for this
              * attribute exceed the maximum allowed attribute location.
              */
             if (attr + slots > max_index) {
                linker_error(prog,
                            "insufficient contiguous locations "
                            "available for %s `%s' %d %d %d\n", string,
                            var->name, used_locations, use_mask, attr);
                return false;
             }

             /* Generate a link error if the set of bits requested for this
              * attribute overlaps any previously allocated bits.
              */
             if ((~(use_mask << attr) & used_locations) != used_locations) {
                if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
                   /* From section 4.4.2 (Output Layout Qualifiers) of the GLSL
                    * 4.40 spec:
                    *
                    *    "Additionally, for fragment shader outputs, if two
                    *    variables are placed within the same location, they
                    *    must have the same underlying type (floating-point or
                    *    integer). No component aliasing of output variables or
                    *    members is allowed.
                    */
                   for (unsigned i = 0; i < assigned_attr; i++) {
                      unsigned assigned_slots =
                         glsl_count_attribute_slots(assigned[i]->type, false);
                      unsigned assig_attr =
                         assigned[i]->data.location - generic_base;
                      unsigned assigned_use_mask = (1 << assigned_slots) - 1;

                      if ((assigned_use_mask << assig_attr) &
                          (use_mask << attr)) {

                         const struct glsl_type *assigned_type =
                            glsl_without_array(assigned[i]->type);
                         const struct glsl_type *type =
                            glsl_without_array(var->type);
                         if (glsl_get_base_type(assigned_type) !=
                             glsl_get_base_type(type)) {
                            linker_error(prog, "types do not match for aliased"
                                         " %ss %s and %s\n", string,
                                         assigned[i]->name, var->name);
                            return false;
                         }

                         unsigned assigned_component_mask =
                            ((1 << glsl_get_vector_elements(assigned_type)) - 1) <<
                            assigned[i]->data.location_frac;
                         unsigned component_mask =
                            ((1 << glsl_get_vector_elements(type)) - 1) <<
                            var->data.location_frac;
                         if (assigned_component_mask & component_mask) {
                            linker_error(prog, "overlapping component is "
                                         "assigned to %ss %s and %s "
                                         "(component=%d)\n",
                                         string, assigned[i]->name, var->name,
                                         var->data.location_frac);
                            return false;
                         }
                      }
                   }
                } else if (target_index == MESA_SHADER_FRAGMENT ||
                           (prog->IsES && prog->GLSL_Version >= 300)) {
                   linker_error(prog, "overlapping location is assigned "
                                "to %s `%s' %d %d %d\n", string, var->name,
                                used_locations, use_mask, attr);
                   return false;
                } else {
                   linker_warning(prog, "overlapping location is assigned "
                                  "to %s `%s' %d %d %d\n", string, var->name,
                                  used_locations, use_mask, attr);
                }
             }

             if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
                /* Only track assigned variables for non-ES fragment shaders
                 * to avoid overflowing the array.
                 *
                 * At most one variable per fragment output component should
                 * reach this.
                 */
                assert(assigned_attr < ARRAY_SIZE(assigned));
                assigned[assigned_attr] = var;
                assigned_attr++;
             }

             used_locations |= (use_mask << attr);

             /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
              *
              * "A program with more than the value of MAX_VERTEX_ATTRIBS
              *  active attribute variables may fail to link, unless
              *  device-dependent optimizations are able to make the program
              *  fit within available hardware resources. For the purposes
              *  of this test, attribute variables of the type dvec3, dvec4,
              *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
              *  count as consuming twice as many attributes as equivalent
              *  single-precision types. While these types use the same number
              *  of generic attributes as their single-precision equivalents,
              *  implementations are permitted to consume two single-precision
              *  vectors of internal storage for each three- or four-component
              *  double-precision vector."
              *
              * Mark this attribute slot as taking up twice as much space
              * so we can count it properly against limits.  According to
              * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
              * is optional behavior, but it seems preferable.
              */
             if (glsl_type_is_dual_slot(glsl_without_array(var->type)))
                double_storage_locations |= (use_mask << attr);
          }

          continue;
       }

       if (num_attr >= max_index) {
          linker_error(prog, "too many %s (max %u)",
                       target_index == MESA_SHADER_VERTEX ?
                       "vertex shader inputs" : "fragment shader outputs",
                       max_index);
          return false;
       }
       to_assign[num_attr].slots = slots;
       to_assign[num_attr].var = var;
       to_assign[num_attr].original_idx = num_attr;
       num_attr++;
    }

    if (!do_assignment)
       return true;

    if (target_index == MESA_SHADER_VERTEX) {
       unsigned total_attribs_size =
          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
          util_bitcount(double_storage_locations);
       if (total_attribs_size > max_index) {
          linker_error(prog,
                       "attempt to use %d vertex attribute slots only %d available ",
                       total_attribs_size, max_index);
          return false;
       }
    }

    /* If all of the attributes were assigned locations by the application (or
     * are built-in attributes with fixed locations), return early.  This should
     * be the common case.
     */
    if (num_attr == 0)
       return true;

    qsort(to_assign, num_attr, sizeof(to_assign[0]), &compare_attr);

    if (target_index == MESA_SHADER_VERTEX) {
       /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS.  It can
        * only be explicitly assigned by via glBindAttribLocation.  Mark it as
        * reserved to prevent it from being automatically allocated below.
        */
       if (find_deref(shader, "gl_Vertex"))
          used_locations |= (1 << 0);
    }

    for (unsigned i = 0; i < num_attr; i++) {
       /* Mask representing the contiguous slots that will be used by this
        * attribute.
        */
       const unsigned use_mask = (1 << to_assign[i].slots) - 1;

       int location = find_available_slots(used_locations, to_assign[i].slots);

       if (location < 0) {
          const char *const string = (target_index == MESA_SHADER_VERTEX)
             ? "vertex shader input" : "fragment shader output";

          linker_error(prog,
                       "insufficient contiguous locations "
                       "available for %s `%s'\n",
                       string, to_assign[i].var->name);
          return false;
       }

       to_assign[i].var->data.location = generic_base + location;
       used_locations |= (use_mask << location);

       if (glsl_type_is_dual_slot(glsl_without_array(to_assign[i].var->type)))
          double_storage_locations |= (use_mask << location);
    }

    /* Now that we have all the locations, from the GL 4.5 core spec, section
     * 11.1.1 (Vertex Attributes), dvec3, dvec4, dmat2x3, dmat2x4, dmat3,
     * dmat3x4, dmat4x3, and dmat4 count as consuming twice as many attributes
     * as equivalent single-precision types.
     */
    if (target_index == MESA_SHADER_VERTEX) {
       unsigned total_attribs_size =
          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
          util_bitcount(double_storage_locations);
       if (total_attribs_size > max_index) {
          linker_error(prog,
                       "attempt to use %d vertex attribute slots only %d available ",
                       total_attribs_size, max_index);
          return false;
       }
    }

    return true;
 }

 static bool
 varying_has_user_specified_location(const nir_variable *var)
 {
    return var->data.explicit_location &&
       var->data.location >= VARYING_SLOT_VAR0;
 }

 static void
 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
                          size_t name_length, unsigned *count,
                          const char *ifc_member_name,
                          const struct glsl_type *ifc_member_t,
                          char ***varying_names)
 {
    if (glsl_type_is_interface(t)) {
       size_t new_length = name_length;

       assert(ifc_member_name && ifc_member_t);
       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);

       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
                                NULL, NULL, varying_names);
    } else if (glsl_type_is_struct(t)) {
       for (unsigned i = 0; i < glsl_get_length(t); i++) {
          const char *field = glsl_get_struct_elem_name(t, i);
          size_t new_length = name_length;

          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);

          create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
                                   new_length, count, NULL, NULL,
                                   varying_names);
       }
    } else if (glsl_type_is_struct(glsl_without_array(t)) ||
               glsl_type_is_interface(glsl_without_array(t)) ||
               (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
       for (unsigned i = 0; i < glsl_get_length(t); i++) {
          size_t new_length = name_length;

          /* Append the subscript to the current variable name */
          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);

          create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
                                   new_length, count, ifc_member_name,
                                   ifc_member_t, varying_names);
       }
    } else {
       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
    }
 }

 static bool
 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
                               struct gl_shader_program *prog,
                               unsigned *num_xfb_decls,
                               char ***varying_names,
                               bool *compact_arrays)
 {
    bool has_xfb_qualifiers = false;

    /* We still need to enable transform feedback mode even if xfb_stride is
     * only applied to a global out. Also we don't bother to propagate
     * xfb_stride to interface block members so this will catch that case also.
     */
    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
       if (prog->TransformFeedback.BufferStride[j]) {
          has_xfb_qualifiers = true;
          break;
       }
    }

    *compact_arrays = sh->Program->nir->options->compact_arrays;
    nir_foreach_shader_out_variable(var, sh->Program->nir) {
       /* From the ARB_enhanced_layouts spec:
        *
        *    "Any shader making any static use (after preprocessing) of any of
        *     these *xfb_* qualifiers will cause the shader to be in a
        *     transform feedback capturing mode and hence responsible for
        *     describing the transform feedback setup.  This mode will capture
        *     any output selected by *xfb_offset*, directly or indirectly, to
        *     a transform feedback buffer."
        */
       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
          has_xfb_qualifiers = true;
       }

       if (var->data.explicit_offset) {
          *num_xfb_decls += glsl_varying_count(var->type);
          has_xfb_qualifiers = true;
       }
    }

    if (*num_xfb_decls == 0)
       return has_xfb_qualifiers;


    unsigned i = 0;
    *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
    nir_foreach_shader_out_variable(var, sh->Program->nir) {
       if (var->data.explicit_offset) {
          char *name;
          const struct glsl_type *type, *member_type;

          if (var->data.from_named_ifc_block) {
             type = var->interface_type;

             /* Find the member type before it was altered by lowering */
             const struct glsl_type *type_wa = glsl_without_array(type);
             member_type =
                glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
             name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
          } else {
             type = var->type;
             member_type = NULL;
             name = ralloc_strdup(NULL, var->name);
          }
          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
                                   var->name, member_type, varying_names);
          ralloc_free(name);
       }
    }

    assert(i == *num_xfb_decls);
    return has_xfb_qualifiers;
 }

 /**
  * Initialize this struct based on a string that was passed to
  * glTransformFeedbackVaryings.
  *
  * If the input is mal-formed, this call still succeeds, but it sets
  * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
  * will fail to find any matching variable.
  */
 static void
 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
               const struct gl_extensions *exts, const void *mem_ctx,
               const char *input, bool compact_arrays)
 {
    /* We don't have to be pedantic about what is a valid GLSL variable name,
     * because any variable with an invalid name can't exist in the IR anyway.
     */
    xfb_decl->location = -1;
    xfb_decl->orig_name = input;
    xfb_decl->lowered_builtin_array_variable = none;
    xfb_decl->skip_components = 0;
    xfb_decl->next_buffer_separator = false;
    xfb_decl->matched_candidate = NULL;
    xfb_decl->stream_id = 0;
    xfb_decl->buffer = 0;
    xfb_decl->offset = 0;

    if (exts->ARB_transform_feedback3) {
       /* Parse gl_NextBuffer. */
       if (strcmp(input, "gl_NextBuffer") == 0) {
          xfb_decl->next_buffer_separator = true;
          return;
       }

       /* Parse gl_SkipComponents. */
       if (strcmp(input, "gl_SkipComponents1") == 0)
          xfb_decl->skip_components = 1;
       else if (strcmp(input, "gl_SkipComponents2") == 0)
          xfb_decl->skip_components = 2;
       else if (strcmp(input, "gl_SkipComponents3") == 0)
          xfb_decl->skip_components = 3;
       else if (strcmp(input, "gl_SkipComponents4") == 0)
          xfb_decl->skip_components = 4;

       if (xfb_decl->skip_components)
          return;
    }

    /* Parse a declaration. */
    const char *base_name_end;
    long subscript = link_util_parse_program_resource_name(input, strlen(input),
                                                           &base_name_end);
    xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
    if (xfb_decl->var_name == NULL) {
       _mesa_error_no_memory(__func__);
       return;
    }

    if (subscript >= 0) {
       xfb_decl->array_subscript = subscript;
       xfb_decl->is_subscripted = true;
    } else {
       xfb_decl->is_subscripted = false;
    }

    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
     * class must behave specially to account for the fact that gl_ClipDistance
     * is converted from a float[8] to a vec4[2].
     */
    if (!compact_arrays &&
        strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
       xfb_decl->lowered_builtin_array_variable = clip_distance;
    }
    if (!compact_arrays &&
        strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
       xfb_decl->lowered_builtin_array_variable = cull_distance;
    }
 }

 /**
  * Determine whether two xfb_decl structs refer to the same variable and
  * array index (if applicable).
  */
 static bool
 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
 {
    assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));

    if (strcmp(x->var_name, y->var_name) != 0)
       return false;
    if (x->is_subscripted != y->is_subscripted)
       return false;
    if (x->is_subscripted && x->array_subscript != y->array_subscript)
       return false;
    return true;
 }

 /**
  * The total number of varying components taken up by this variable.  Only
  * valid if assign_location() has been called.
  */
 static unsigned
 xfb_decl_num_components(struct xfb_decl *xfb_decl)
 {
    if (xfb_decl->lowered_builtin_array_variable)
       return xfb_decl->size;
    else
       return xfb_decl->vector_elements * xfb_decl->matrix_columns *
          xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
 }

 /**
  * Assign a location and stream ID for this xfb_decl object based on the
  * transform feedback candidate found by find_candidate.
  *
  * If an error occurs, the error is reported through linker_error() and false
  * is returned.
  */
 static bool
 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
                          const struct gl_constants *consts,
                          struct gl_shader_program *prog,
                          bool disable_varying_packing, bool xfb_enabled)
 {
    assert(xfb_decl_is_varying(xfb_decl));

    unsigned fine_location
       = xfb_decl->matched_candidate->toplevel_var->data.location * 4
       + xfb_decl->matched_candidate->toplevel_var->data.location_frac
       + xfb_decl->matched_candidate->struct_offset_floats;
    const unsigned dmul =
       glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;

    if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
       /* Array variable */
       const struct glsl_type *element_type =
          glsl_get_array_element(xfb_decl->matched_candidate->type);
       const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
       const unsigned vector_elements = glsl_get_vector_elements(element_type);
       unsigned actual_array_size;
       switch (xfb_decl->lowered_builtin_array_variable) {
       case clip_distance:
          actual_array_size = prog->last_vert_prog ?
             prog->last_vert_prog->nir->info.clip_distance_array_size : 0;
          break;
       case cull_distance:
          actual_array_size = prog->last_vert_prog ?
             prog->last_vert_prog->nir->info.cull_distance_array_size : 0;
          break;
       case none:
       default:
          actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
          break;
       }

       if (xfb_decl->is_subscripted) {
          /* Check array bounds. */
          if (xfb_decl->array_subscript >= actual_array_size) {
             linker_error(prog, "Transform feedback varying %s has index "
                          "%i, but the array size is %u.",
                          xfb_decl->orig_name, xfb_decl->array_subscript,
                          actual_array_size);
             return false;
          }

          bool array_will_be_lowered =
             lower_packed_varying_needs_lowering(prog->last_vert_prog->nir,
                                                 xfb_decl->matched_candidate->toplevel_var,
                                                 nir_var_shader_out,
                                                 disable_varying_packing,
                                                 xfb_enabled) ||
             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_ClipDistance") == 0 ||
             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_CullDistance") == 0 ||
             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelInner") == 0 ||
             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelOuter") == 0;

          unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
             1 : (array_will_be_lowered ? vector_elements : 4) * matrix_cols * dmul;
          fine_location += array_elem_size * xfb_decl->array_subscript;
          xfb_decl->size = 1;
       } else {
          xfb_decl->size = actual_array_size;
       }
       xfb_decl->vector_elements = vector_elements;
       xfb_decl->matrix_columns = matrix_cols;
       if (xfb_decl->lowered_builtin_array_variable)
          xfb_decl->type = GL_FLOAT;
       else
          xfb_decl->type = glsl_get_gl_type(element_type);
    } else {
       /* Regular variable (scalar, vector, or matrix) */
       if (xfb_decl->is_subscripted) {
          linker_error(prog, "Transform feedback varying %s requested, "
                       "but %s is not an array.",
                       xfb_decl->orig_name, xfb_decl->var_name);
          return false;
       }
       xfb_decl->size = 1;
       xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
       xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
       xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
    }
    xfb_decl->location = fine_location / 4;
    xfb_decl->location_frac = fine_location % 4;

    /* From GL_EXT_transform_feedback:
     *   A program will fail to link if:
     *
     *   * the total number of components to capture in any varying
     *     variable in <varyings> is greater than the constant
     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
     *     buffer mode is SEPARATE_ATTRIBS_EXT;
     */
    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
        xfb_decl_num_components(xfb_decl) >
        consts->MaxTransformFeedbackSeparateComponents) {
       linker_error(prog, "Transform feedback varying %s exceeds "
                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
                    xfb_decl->orig_name);
       return false;
    }

    /* Only transform feedback varyings can be assigned to non-zero streams,
     * so assign the stream id here.
     */
    xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;

    unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
    unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
    xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
    xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
       array_offset + struct_offset;

    return true;
 }

 static unsigned
 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
 {
    if (!xfb_decl_is_varying(xfb_decl)) {
       return 0;
    }

    if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
       unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
       unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
       return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
    } else {
       return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
    }
 }

 static bool
 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
 {
    if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
       return false;

    return xfb_decl->matched_candidate->toplevel_var->data.assigned;
 }

 /**
  * Update gl_transform_feedback_info to reflect this xfb_decl.
  *
  * If an error occurs, the error is reported through linker_error() and false
  * is returned.
  */
 static bool
 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
                struct gl_shader_program *prog,
                struct gl_transform_feedback_info *info,
                unsigned buffer, unsigned buffer_index,
                const unsigned max_outputs,
                BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
                bool *explicit_stride, unsigned *max_member_alignment,
                bool has_xfb_qualifiers, const void* mem_ctx)
 {
    unsigned xfb_offset = 0;
    unsigned size = xfb_decl->size;
    /* Handle gl_SkipComponents. */
    if (xfb_decl->skip_components) {
       info->Buffers[buffer].Stride += xfb_decl->skip_components;
       size = xfb_decl->skip_components;
       goto store_varying;
    }

    if (xfb_decl->next_buffer_separator) {
       size = 0;
       goto store_varying;
    }

    if (has_xfb_qualifiers) {
       xfb_offset = xfb_decl->offset / 4;
    } else {
       xfb_offset = info->Buffers[buffer].Stride;
    }
    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;

    {
       unsigned location = xfb_decl->location;
       unsigned location_frac = xfb_decl->location_frac;
       unsigned num_components = xfb_decl_num_components(xfb_decl);

       /* From GL_EXT_transform_feedback:
        *
        *   " A program will fail to link if:
        *
        *       * the total number of components to capture is greater than the
        *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
        *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
        *
        * From GL_ARB_enhanced_layouts:
        *
        *   " The resulting stride (implicit or explicit) must be less than or
        *     equal to the implementation-dependent constant
        *     gl_MaxTransformFeedbackInterleavedComponents."
        */
       if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
            has_xfb_qualifiers) &&
           xfb_offset + num_components >
           consts->MaxTransformFeedbackInterleavedComponents) {
          linker_error(prog,
                       "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
                       "limit has been exceeded.");
          return false;
       }

       /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
        * Page 76, (Transform Feedback Layout Qualifiers):
        *
        *   " No aliasing in output buffers is allowed: It is a compile-time or
        *     link-time error to specify variables with overlapping transform
        *     feedback offsets."
        */
       const unsigned max_components =
          consts->MaxTransformFeedbackInterleavedComponents;
       const unsigned first_component = xfb_offset;
       const unsigned last_component = xfb_offset + num_components - 1;
       const unsigned start_word = BITSET_BITWORD(first_component);
       const unsigned end_word = BITSET_BITWORD(last_component);
       BITSET_WORD *used;
       assert(last_component < max_components);

       if (!used_components[buffer]) {
          used_components[buffer] =
             rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
       }
       used = used_components[buffer];

       for (unsigned word = start_word; word <= end_word; word++) {
          unsigned start_range = 0;
          unsigned end_range = BITSET_WORDBITS - 1;

          if (word == start_word)
             start_range = first_component % BITSET_WORDBITS;

          if (word == end_word)
             end_range = last_component % BITSET_WORDBITS;

          if (used[word] & BITSET_RANGE(start_range, end_range)) {
             linker_error(prog,
                          "variable '%s', xfb_offset (%d) is causing aliasing.",
                          xfb_decl->orig_name, xfb_offset * 4);
             return false;
          }
          used[word] |= BITSET_RANGE(start_range, end_range);
       }

       const unsigned type_num_components =
          xfb_decl->vector_elements *
          (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
       unsigned current_type_components_left = type_num_components;

       while (num_components > 0) {
          unsigned output_size = 0;

          /*  From GL_ARB_enhanced_layouts:
           *
           * "When an attribute variable declared using an array type is bound to
           * generic attribute index <i>, the active array elements are assigned to
           * consecutive generic attributes beginning with generic attribute <i>.  The
           * number of attributes and components assigned to each element are
           * determined according to the data type of array elements and "component"
           * layout qualifier (if any) specified in the declaration of the array."
           *
           * "When an attribute variable declared using a matrix type is bound to a
           * generic attribute index <i>, its values are taken from consecutive generic
           * attributes beginning with generic attribute <i>.  Such matrices are
           * treated as an array of column vectors with values taken from the generic
           * attributes.
           * This means there may be gaps in the varyings we are taking values from."
           *
           * Examples:
           *
           * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
           * |                                |                               |
           * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
           * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
           * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
           * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
           * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
           *
           */
          if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
             output_size = MIN3(num_components, current_type_components_left, 4);
             current_type_components_left -= output_size;
             if (current_type_components_left == 0) {
                current_type_components_left = type_num_components;
             }
          } else {
             output_size = MIN2(num_components, 4 - location_frac);
          }

          assert((info->NumOutputs == 0 && max_outputs == 0) ||
                 info->NumOutputs < max_outputs);

          /* From the ARB_enhanced_layouts spec:
           *
           *    "If such a block member or variable is not written during a shader
           *    invocation, the buffer contents at the assigned offset will be
           *    undefined.  Even if there are no static writes to a variable or
           *    member that is assigned a transform feedback offset, the space is
           *    still allocated in the buffer and still affects the stride."
           */
          if (xfb_decl_is_varying_written(xfb_decl)) {
             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
             info->Outputs[info->NumOutputs].OutputRegister = location;
             info->Outputs[info->NumOutputs].NumComponents = output_size;
             info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
             ++info->NumOutputs;
          }
          info->Buffers[buffer].Stream = xfb_decl->stream_id;
          xfb_offset += output_size;

          num_components -= output_size;
          location++;
          location_frac = 0;
       }
    }

    if (explicit_stride && explicit_stride[buffer]) {
       if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
           info->Buffers[buffer].Stride % 2) {
          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
                       "multiple of 8 as its applied to a type that is or "
                       "contains a double.",
                       info->Buffers[buffer].Stride * 4);
          return false;
       }

       if (xfb_offset > info->Buffers[buffer].Stride) {
          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
                       "buffer (%d)", xfb_offset * 4,
                       info->Buffers[buffer].Stride * 4, buffer);
          return false;
       }
    } else {
       if (max_member_alignment && has_xfb_qualifiers) {
          max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
                                              _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
          info->Buffers[buffer].Stride = ALIGN(xfb_offset,
                                               max_member_alignment[buffer]);
       } else {
          info->Buffers[buffer].Stride = xfb_offset;
       }
    }

  store_varying:
    info->Varyings[info->NumVarying].name.string =
       ralloc_strdup(prog, xfb_decl->orig_name);
    resource_name_updated(&info->Varyings[info->NumVarying].name);
    info->Varyings[info->NumVarying].Type = xfb_decl->type;
    info->Varyings[info->NumVarying].Size = size;
    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
    info->NumVarying++;
    info->Buffers[buffer].NumVaryings++;

    return true;
 }

 static const struct tfeedback_candidate *
 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
                         struct gl_shader_program *prog,
                         struct hash_table *tfeedback_candidates)
 {
    const char *name = xfb_decl->var_name;
    switch (xfb_decl->lowered_builtin_array_variable) {
    case none:
       name = xfb_decl->var_name;
       break;
    case clip_distance:
    case cull_distance:
       name = "gl_ClipDistanceMESA";
       break;
    }
    struct hash_entry *entry =
       _mesa_hash_table_search(tfeedback_candidates, name);

    xfb_decl->matched_candidate = entry ?
          (struct tfeedback_candidate *) entry->data : NULL;

    if (!xfb_decl->matched_candidate) {
       /* From GL_EXT_transform_feedback:
        *   A program will fail to link if:
        *
        *   * any variable name specified in the <varyings> array is not
        *     declared as an output in the geometry shader (if present) or
        *     the vertex shader (if no geometry shader is present);
        */
       linker_error(prog, "Transform feedback varying %s undeclared.",
                    xfb_decl->orig_name);
    }

    return xfb_decl->matched_candidate;
 }

 /**
  * Force a candidate over the previously matched one. It happens when a new
  * varying needs to be created to match the xfb declaration, for example,
  * to fullfil an alignment criteria.
  */
 static void
 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
                                struct tfeedback_candidate *candidate)
 {
    xfb_decl->matched_candidate = candidate;

    /* The subscript part is no longer relevant */
    xfb_decl->is_subscripted = false;
    xfb_decl->array_subscript = 0;
 }

 /**
  * Parse all the transform feedback declarations that were passed to
  * glTransformFeedbackVaryings() and store them in xfb_decl objects.
  *
  * If an error occurs, the error is reported through linker_error() and false
  * is returned.
  */
 static bool
 parse_xfb_decls(const struct gl_constants *consts,
                 const struct gl_extensions *exts,
                 struct gl_shader_program *prog,
                 const void *mem_ctx, unsigned num_names,
                 char **varying_names, struct xfb_decl *decls, bool compact_arrays)
 {
    for (unsigned i = 0; i < num_names; ++i) {
       xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i], compact_arrays);

       if (!xfb_decl_is_varying(&decls[i]))
          continue;

       /* From GL_EXT_transform_feedback:
        *   A program will fail to link if:
        *
        *   * any two entries in the <varyings> array specify the same varying
        *     variable;
        *
        * We interpret this to mean "any two entries in the <varyings> array
        * specify the same varying variable and array index", since transform
        * feedback of arrays would be useless otherwise.
        */
       for (unsigned j = 0; j < i; ++j) {
          if (xfb_decl_is_varying(&decls[j])) {
             if (xfb_decl_is_same(&decls[i], &decls[j])) {
                linker_error(prog, "Transform feedback varying %s specified "
                             "more than once.", varying_names[i]);
                return false;
             }
          }
       }
    }
    return true;
 }

 static int
 cmp_xfb_offset(const void * x_generic, const void * y_generic)
 {
    struct xfb_decl *x = (struct xfb_decl *) x_generic;
    struct xfb_decl *y = (struct xfb_decl *) y_generic;

    if (x->buffer != y->buffer)
       return x->buffer - y->buffer;
    return x->offset - y->offset;
 }

 /**
  * Store transform feedback location assignments into
  * prog->sh.LinkedTransformFeedback based on the data stored in
  * xfb_decls.
  *
  * If an error occurs, the error is reported through linker_error() and false
  * is returned.
  */
 static bool
 store_tfeedback_info(const struct gl_constants *consts,
                      struct gl_shader_program *prog, unsigned num_xfb_decls,
                      struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
                      const void *mem_ctx)
 {
    if (!prog->last_vert_prog)
       return true;

    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
     * tracking the number of buffers doesn't overflow.
     */
    assert(consts->MaxTransformFeedbackBuffers < 32);

    bool separate_attribs_mode =
       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;

    struct gl_program *xfb_prog = prog->last_vert_prog;
    xfb_prog->sh.LinkedTransformFeedback =
       rzalloc(xfb_prog, struct gl_transform_feedback_info);

    /* The xfb_offset qualifier does not have to be used in increasing order
     * however some drivers expect to receive the list of transform feedback
     * declarations in order so sort it now for convenience.
     */
    if (has_xfb_qualifiers) {
       qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
             cmp_xfb_offset);
    }

    xfb_prog->sh.LinkedTransformFeedback->Varyings =
       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
                     num_xfb_decls);

    unsigned num_outputs = 0;
    for (unsigned i = 0; i < num_xfb_decls; ++i) {
       if (xfb_decl_is_varying_written(&xfb_decls[i]))
          num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
    }

    xfb_prog->sh.LinkedTransformFeedback->Outputs =
       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
                     num_outputs);

    unsigned num_buffers = 0;
    unsigned buffers = 0;
    BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};

    if (!has_xfb_qualifiers && separate_attribs_mode) {
       /* GL_SEPARATE_ATTRIBS */
       for (unsigned i = 0; i < num_xfb_decls; ++i) {
          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
                              xfb_prog->sh.LinkedTransformFeedback,
                              num_buffers, num_buffers, num_outputs,
                              used_components, NULL, NULL, has_xfb_qualifiers,
                              mem_ctx))
             return false;

          buffers |= 1 << num_buffers;
          num_buffers++;
       }
    }
    else {
       /* GL_INVERLEAVED_ATTRIBS */
       int buffer_stream_id = -1;
       unsigned buffer =
          num_xfb_decls ? xfb_decls[0].buffer : 0;
       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
       unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
       /* Apply any xfb_stride global qualifiers */
       if (has_xfb_qualifiers) {
          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
             if (prog->TransformFeedback.BufferStride[j]) {
                explicit_stride[j] = true;
                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
                   prog->TransformFeedback.BufferStride[j] / 4;
             }
          }
       }

       for (unsigned i = 0; i < num_xfb_decls; ++i) {
          if (has_xfb_qualifiers &&
              buffer != xfb_decls[i].buffer) {
             /* we have moved to the next buffer so reset stream id */
             buffer_stream_id = -1;
             num_buffers++;
          }

          if (xfb_decls[i].next_buffer_separator) {
             if (!xfb_decl_store(&xfb_decls[i], consts, prog,
                                 xfb_prog->sh.LinkedTransformFeedback,
                                 buffer, num_buffers, num_outputs,
                                 used_components, explicit_stride,
                                 max_member_alignment, has_xfb_qualifiers,
                                 mem_ctx))
                return false;
             num_buffers++;
             buffer_stream_id = -1;
             continue;
          }

          if (has_xfb_qualifiers) {
             buffer = xfb_decls[i].buffer;
          } else {
             buffer = num_buffers;
          }

          if (xfb_decl_is_varying(&xfb_decls[i])) {
             if (buffer_stream_id == -1)  {
                /* First varying writing to this buffer: remember its stream */
                buffer_stream_id = (int) xfb_decls[i].stream_id;

                /* Only mark a buffer as active when there is a varying
                 * attached to it. This behaviour is based on a revised version
                 * of section 13.2.2 of the GL 4.6 spec.
                 */
                buffers |= 1 << buffer;
             } else if (buffer_stream_id !=
                        (int) xfb_decls[i].stream_id) {
                /* Varying writes to the same buffer from a different stream */
                linker_error(prog,
                             "Transform feedback can't capture varyings belonging "
                             "to different vertex streams in a single buffer. "
                             "Varying %s writes to buffer from stream %u, other "
                             "varyings in the same buffer write from stream %u.",
                             xfb_decls[i].orig_name,
                             xfb_decls[i].stream_id,
                             buffer_stream_id);
                return false;
             }
          }

          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
                              xfb_prog->sh.LinkedTransformFeedback,
                              buffer, num_buffers, num_outputs, used_components,
                              explicit_stride, max_member_alignment,
                              has_xfb_qualifiers, mem_ctx))
             return false;
       }
    }
    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);

    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
    return true;
 }

 /**
  * Enum representing the order in which varyings are packed within a
  * packing class.
  *
  * Currently we pack vec4's first, then vec2's, then scalar values, then
  * vec3's.  This order ensures that the only vectors that are at risk of
  * having to be "double parked" (split between two adjacent varying slots)
  * are the vec3's.
  */
 enum packing_order_enum {
    PACKING_ORDER_VEC4,
    PACKING_ORDER_VEC2,
    PACKING_ORDER_SCALAR,
    PACKING_ORDER_VEC3,
 };

 /**
  * Structure recording the relationship between a single producer output
  * and a single consumer input.
  */
 struct match {
    /**
     * Packing class for this varying, computed by compute_packing_class().
     */
    unsigned packing_class;

    /**
     * Packing order for this varying, computed by compute_packing_order().
     */
    enum packing_order_enum packing_order;

    /**
     * The output variable in the producer stage.
     */
    nir_variable *producer_var;

    /**
     * The input variable in the consumer stage.
     */
    nir_variable *consumer_var;

    /**
     * The location which has been assigned for this varying.  This is
     * expressed in multiples of a float, with the first generic varying
     * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
     * value 0.
     */
    unsigned generic_location;

    /**
     * Original index, used as a fallback sorting key to ensure
     * a stable sort
     */
    unsigned original_index;
 };

 /**
  * Data structure recording the relationship between outputs of one shader
  * stage (the "producer") and inputs of another (the "consumer").
  */
 struct varying_matches
 {
    /**
     * If true, this driver disables varying packing, so all varyings need to
     * be aligned on slot boundaries, and take up a number of slots equal to
     * their number of matrix columns times their array size.
     *
     * Packing may also be disabled because our current packing method is not
     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
     * guaranteed to match across stages.
     */
    bool disable_varying_packing;

    /**
     * If true, this driver disables packing for varyings used by transform
     * feedback.
     */
    bool disable_xfb_packing;

    /**
     * If true, this driver has transform feedback enabled. The transform
     * feedback code usually requires at least some packing be done even
     * when varying packing is disabled, fortunately where transform feedback
     * requires packing it's safe to override the disabled setting. See
     * is_varying_packing_safe().
     */
    bool xfb_enabled;

    bool enhanced_layouts_enabled;

    /**
     * If true, this driver prefers varyings to be aligned to power of two
     * in a slot.
     */
    bool prefer_pot_aligned_varyings;

    struct match *matches;

    /**
     * The number of elements in the \c matches array that are currently in
     * use.
     */
    unsigned num_matches;

    /**
     * The number of elements that were set aside for the \c matches array when
     * it was allocated.
     */
    unsigned matches_capacity;

    gl_shader_stage producer_stage;
    gl_shader_stage consumer_stage;
 };

 /**
  * Comparison function passed to qsort() to sort varyings by packing_class and
  * then by packing_order.
  */
 static int
 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
 {
    const struct match *x = (const struct match *) x_generic;
    const struct match *y = (const struct match *) y_generic;

    if (x->packing_class != y->packing_class)
       return x->packing_class - y->packing_class;
    if (x->packing_order != y->packing_order)
       return x->packing_order - y->packing_order;
    return x->original_index - y->original_index;
 }

 /**
  * Comparison function passed to qsort() to sort varyings used only by
  * transform feedback when packing of other varyings is disabled.
  */
 static int
 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
 {
    const struct match *x = (const struct match *) x_generic;
    const struct match *y = (const struct match *) y_generic;
    /* if both varying are used by transform feedback, sort them */
    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) {
       if (y->producer_var != NULL && y->producer_var->data.is_xfb_only)
          return 0;
       /* if x is varying and y is not, put y first */
       return +1;
    } else if (y->producer_var != NULL && y->producer_var->data.is_xfb_only) {
       /* if y is varying and x is not, leave x first */
       return -1;
    }

    /* otherwise leave the order alone */
    return x->original_index - y->original_index;
 }

 /**
  * Comparison function passed to qsort() to sort varyings NOT used by
  * transform feedback when packing of xfb varyings is disabled.
  */
 static int
 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
 {
    const struct match *x = (const struct match *) x_generic;
    const struct match *y = (const struct match *) y_generic;

    if ( (x->producer_var != NULL && !x->producer_var->data.is_xfb)
         && (y->producer_var != NULL && !y->producer_var->data.is_xfb) )
       /* if both are non-xfb, then sort them */
       return varying_matches_match_comparator(x_generic, y_generic);

    /* otherwise, leave the order alone */
    return x->original_index - y->original_index;
 }

 static bool
 is_unpackable_tess(gl_shader_stage producer_stage,
                    gl_shader_stage consumer_stage)
 {
    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
        consumer_stage == MESA_SHADER_TESS_CTRL ||
        producer_stage == MESA_SHADER_TESS_CTRL)
       return true;

    return false;
 }

 static void
 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
                      const struct gl_constants *consts,
                      const struct gl_extensions *exts,
                      gl_shader_stage producer_stage,
                      gl_shader_stage consumer_stage,
                      bool sso)
 {
    /* Tessellation shaders treat inputs and outputs as shared memory and can
     * access inputs and outputs of other invocations.
     * Therefore, they can't be lowered to temps easily (and definitely not
     * efficiently).
     */
    bool unpackable_tess =
       is_unpackable_tess(producer_stage, consumer_stage);

    /* Transform feedback code assumes varying arrays are packed, so if the
     * driver has disabled varying packing, make sure to at least enable
     * packing required by transform feedback. See below for exception.
     */
    bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;

    /* Some drivers actually requires packing to be explicitly disabled
     * for varyings used by transform feedback.
     */
    bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;

    /* Disable packing on outward facing interfaces for SSO because in ES we
     * need to retain the unpacked varying information for draw time
     * validation.
     *
     * Packing is still enabled on individual arrays, structs, and matrices as
     * these are required by the transform feedback code and it is still safe
     * to do so. We also enable packing when a varying is only used for
     * transform feedback and its not a SSO.
     */
    bool disable_varying_packing =
       consts->DisableVaryingPacking || unpackable_tess;
    if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
       disable_varying_packing = true;

    /* Note: this initial capacity is rather arbitrarily chosen to be large
     * enough for many cases without wasting an unreasonable amount of space.
     * varying_matches_record() will resize the array if there are more than
     * this number of varyings.
     */
    vm->matches_capacity = 8;
    vm->matches = (struct match *)
       ralloc_array(mem_ctx, struct match, vm->matches_capacity);
    vm->num_matches = 0;

    vm->disable_varying_packing = disable_varying_packing;
    vm->disable_xfb_packing = disable_xfb_packing;
    vm->xfb_enabled = xfb_enabled;
    vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
    vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
    vm->producer_stage = producer_stage;
    vm->consumer_stage = consumer_stage;
 }

 /**
  * Packing is always safe on individual arrays, structures, and matrices. It
  * is also safe if the varying is only used for transform feedback.
  */
 static bool
 is_varying_packing_safe(struct varying_matches *vm,
                         const struct glsl_type *type, const nir_variable *var)
 {
    if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
       return false;

    return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
                               glsl_type_is_struct(type) ||
                               var->data.is_xfb_only);
 }

 static bool
 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
                     const nir_variable *var)
 {
    return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
       (vm->disable_xfb_packing && var->data.is_xfb &&
        !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
          glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
 }

 /**
  * Compute the "packing class" of the given varying.  This is an unsigned
  * integer with the property that two variables in the same packing class can
  * be safely backed into the same vec4.
  */
 static unsigned
 varying_matches_compute_packing_class(const nir_variable *var)
 {
    /* Without help from the back-end, there is no way to pack together
     * variables with different interpolation types, because
     * lower_packed_varyings must choose exactly one interpolation type for
     * each packed varying it creates.
     *
     * However, we can safely pack together floats, ints, and uints, because:
     *
     * - varyings of base type "int" and "uint" must use the "flat"
     *   interpolation type, which can only occur in GLSL 1.30 and above.
     *
     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
     *   can store flat floats as ints without losing any information (using
     *   the ir_unop_bitcast_* opcodes).
     *
     * Therefore, the packing class depends only on the interpolation type.
     */
    bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
       glsl_contains_integer(var->type) || glsl_contains_double(var->type);

    const unsigned interp = is_interpolation_flat
       ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;

    assert(interp < (1 << 3));

    const unsigned packing_class = (interp << 0) |
                                   (var->data.centroid << 3) |
                                   (var->data.sample << 4) |
                                   (var->data.patch << 5) |
                                   (var->data.must_be_shader_input << 6);

    return packing_class;
 }

 /**
  * Compute the "packing order" of the given varying.  This is a sort key we
  * use to determine when to attempt to pack the given varying relative to
  * other varyings in the same packing class.
  */
 static enum packing_order_enum
 varying_matches_compute_packing_order(const nir_variable *var)
 {
    const struct glsl_type *element_type = glsl_without_array(var->type);

    switch (glsl_get_component_slots(element_type) % 4) {
    case 1: return PACKING_ORDER_SCALAR;
    case 2: return PACKING_ORDER_VEC2;
    case 3: return PACKING_ORDER_VEC3;
    case 0: return PACKING_ORDER_VEC4;
    default:
       assert(!"Unexpected value of vector_elements");
       return PACKING_ORDER_VEC4;
    }
 }

 /**
  * Record the given producer/consumer variable pair in the list of variables
  * that should later be assigned locations.
  *
  * It is permissible for \c consumer_var to be NULL (this happens if a
  * variable is output by the producer and consumed by transform feedback, but
  * not consumed by the consumer).
  *
  * If \c producer_var has already been paired up with a consumer_var, or
  * producer_var is part of fixed pipeline functionality (and hence already has
  * a location assigned), this function has no effect.
  *
  * Note: as a side effect this function may change the interpolation type of
  * \c producer_var, but only when the change couldn't possibly affect
  * rendering.
  */
 static void
 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
                        nir_variable *producer_var, nir_variable *consumer_var)
 {
    assert(producer_var != NULL || consumer_var != NULL);

    if ((producer_var &&
        (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
        (consumer_var &&
         (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
       /* Either a location already exists for this variable (since it is part
        * of fixed functionality), or it has already been assigned explicitly.
        */
       return;
    }

    /* The varyings should not have been matched and assgned previously */
    assert((producer_var == NULL || producer_var->data.location == -1) &&
           (consumer_var == NULL || consumer_var->data.location == -1));

    bool needs_flat_qualifier = consumer_var == NULL &&
       (glsl_contains_integer(producer_var->type) ||
        glsl_contains_double(producer_var->type));

    if (!vm->disable_varying_packing &&
        (!vm->disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
        (needs_flat_qualifier ||
         (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
       /* Since this varying is not being consumed by the fragment shader, its
        * interpolation type varying cannot possibly affect rendering.
        * Also, this variable is non-flat and is (or contains) an integer
        * or a double.
        * If the consumer stage is unknown, don't modify the interpolation
        * type as it could affect rendering later with separate shaders.
        *
        * lower_packed_varyings requires all integer varyings to flat,
        * regardless of where they appear.  We can trivially satisfy that
        * requirement by changing the interpolation type to flat here.
        */
       if (producer_var) {
          producer_var->data.centroid = false;
          producer_var->data.sample = false;
          producer_var->data.interpolation = INTERP_MODE_FLAT;
       }

       if (consumer_var) {
          consumer_var->data.centroid = false;
          consumer_var->data.sample = false;
          consumer_var->data.interpolation = INTERP_MODE_FLAT;
       }
    }

    if (vm->num_matches == vm->matches_capacity) {
       vm->matches_capacity *= 2;
       vm->matches = (struct match *)
          reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
    }

    /* We must use the consumer to compute the packing class because in GL4.4+
     * there is no guarantee interpolation qualifiers will match across stages.
     *
     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
     *
     *    "The type and presence of interpolation qualifiers of variables with
     *    the same name declared in all linked shaders for the same cross-stage
     *    interface must match, otherwise the link command will fail.
     *
     *    When comparing an output from one stage to an input of a subsequent
     *    stage, the input and output don't match if their interpolation
     *    qualifiers (or lack thereof) are not the same."
     *
     * This text was also in at least revison 7 of the 4.40 spec but is no
     * longer in revision 9 and not in the 4.50 spec.
     */
    const nir_variable *const var = (consumer_var != NULL)
       ? consumer_var : producer_var;

    if (producer_var && consumer_var &&
        consumer_var->data.must_be_shader_input) {
       producer_var->data.must_be_shader_input = 1;
    }

    vm->matches[vm->num_matches].packing_class
       = varying_matches_compute_packing_class(var);
    vm->matches[vm->num_matches].packing_order
       = varying_matches_compute_packing_order(var);

    vm->matches[vm->num_matches].producer_var = producer_var;
    vm->matches[vm->num_matches].consumer_var = consumer_var;
    vm->num_matches++;
 }

 /**
  * Choose locations for all of the variable matches that were previously
  * passed to varying_matches_record().
  * \param components  returns array[slot] of number of components used
  *                    per slot (1, 2, 3 or 4)
  * \param reserved_slots  bitmask indicating which varying slots are already
  *                        allocated
  * \return number of slots (4-element vectors) allocated
  */
 static unsigned
 varying_matches_assign_locations(struct varying_matches *vm,
                                  struct gl_shader_program *prog,
                                  uint8_t components[], uint64_t reserved_slots)
 {
    /* Establish the original order of the varying_matches array; our
     * sorts will use this for sorting when the varyings do not have
     * xfb qualifiers
     */
    for (unsigned i = 0; i < vm->num_matches; i++)
       vm->matches[i].original_index = i;

    /* If packing has been disabled then we cannot safely sort the varyings by
     * class as it may mean we are using a version of OpenGL where
     * interpolation qualifiers are not guaranteed to be matching across
     * shaders, sorting in this case could result in mismatching shader
     * interfaces. So we sort only the varyings used by transform feedback.
     *
     * If packing is only disabled for xfb varyings (mutually exclusive with
     * disable_varying_packing), we then group varyings depending on if they
     * are captured for transform feedback.
     */
    if (vm->disable_varying_packing) {
       /* Only sort varyings that are only used by transform feedback. */
       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
             &varying_matches_xfb_comparator);
    } else if (vm->disable_xfb_packing) {
       /* Only sort varyings that are NOT used by transform feedback. */
       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
             &varying_matches_not_xfb_comparator);
    } else {
       /* Sort varying matches into an order that makes them easy to pack. */
       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
             &varying_matches_match_comparator);
    }

    unsigned generic_location = 0;
    unsigned generic_patch_location = MAX_VARYING*4;
    bool previous_var_xfb = false;
    bool previous_var_xfb_only = false;
    unsigned previous_packing_class = ~0u;

    /* For tranform feedback separate mode, we know the number of attributes
     * is <= the number of buffers.  So packing isn't critical.  In fact,
     * packing vec3 attributes can cause trouble because splitting a vec3
     * effectively creates an additional transform feedback output.  The
     * extra TFB output may exceed device driver limits.
     *
     * Also don't pack vec3 if the driver prefers power of two aligned
     * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
     * pot-aligned, we only need to take care of vec3s
     */
    const bool dont_pack_vec3 =
       (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
        prog->TransformFeedback.NumVarying > 0) ||
       vm->prefer_pot_aligned_varyings;

    for (unsigned i = 0; i < vm->num_matches; i++) {
       unsigned *location = &generic_location;
       const nir_variable *var;
       const struct glsl_type *type;
       bool is_vertex_input = false;

       if (vm->matches[i].consumer_var) {
          var = vm->matches[i].consumer_var;
          type = get_varying_type(var, vm->consumer_stage);
          if (vm->consumer_stage == MESA_SHADER_VERTEX)
             is_vertex_input = true;
       } else {
          if (!vm->matches[i].producer_var)
             continue; /* The varying was optimised away */

          var = vm->matches[i].producer_var;
          type = get_varying_type(var, vm->producer_stage);
       }

       if (var->data.patch)
          location = &generic_patch_location;

       /* Advance to the next slot if this varying has a different packing
        * class than the previous one, and we're not already on a slot
        * boundary.
        *
        * Also advance if varying packing is disabled for transform feedback,
        * and previous or current varying is used for transform feedback.
        *
        * Also advance to the next slot if packing is disabled. This makes sure
        * we don't assign varyings the same locations which is possible
        * because we still pack individual arrays, records and matrices even
        * when packing is disabled. Note we don't advance to the next slot if
        * we can pack varyings together that are only used for transform
        * feedback.
        */
       if (var->data.must_be_shader_input ||
           (vm->disable_xfb_packing &&
            (previous_var_xfb || var->data.is_xfb)) ||
           (vm->disable_varying_packing &&
            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
           (previous_packing_class != vm->matches[i].packing_class) ||
           (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
            dont_pack_vec3)) {
          *location = ALIGN(*location, 4);
       }

       previous_var_xfb = var->data.is_xfb;
       previous_var_xfb_only = var->data.is_xfb_only;
       previous_packing_class = vm->matches[i].packing_class;

       /* The number of components taken up by this variable. For vertex shader
        * inputs, we use the number of slots * 4, as they have different
        * counting rules.
        */
       unsigned num_components = 0;
       if (is_vertex_input) {
          num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
       } else {
          if (is_packing_disabled(vm, type, var)) {
             num_components = glsl_count_attribute_slots(type, false) * 4;
          } else {
             num_components = glsl_get_component_slots_aligned(type, *location);
          }
       }

       /* The last slot for this variable, inclusive. */
       unsigned slot_end = *location + num_components - 1;

       /* FIXME: We could be smarter in the below code and loop back over
        * trying to fill any locations that we skipped because we couldn't pack
        * the varying between an explicit location. For now just let the user
        * hit the linking error if we run out of room and suggest they use
        * explicit locations.
        */
       while (slot_end < MAX_VARYING * 4u) {
          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);

          assert(slots > 0);

          if ((reserved_slots & slot_mask) == 0) {
             break;
          }

          *location = ALIGN(*location + 1, 4);
          slot_end = *location + num_components - 1;
       }

       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
          linker_error(prog, "insufficient contiguous locations available for "
                       "%s it is possible an array or struct could not be "
                       "packed between varyings with explicit locations. Try "
                       "using an explicit location for arrays and structs.",
                       var->name);
       }

       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
             components[j] = 4;
          components[slot_end / 4u] = (slot_end & 3) + 1;
       }

       vm->matches[i].generic_location = *location;

       *location = slot_end + 1;
    }

    return (generic_location + 3) / 4;
 }

 static void
 varying_matches_assign_temp_locations(struct varying_matches *vm,
                                       struct gl_shader_program *prog,
                                       uint64_t reserved_slots)
 {
    unsigned tmp_loc = 0;
    for (unsigned i = 0; i < vm->num_matches; i++) {
       nir_variable *producer_var = vm->matches[i].producer_var;
       nir_variable *consumer_var = vm->matches[i].consumer_var;

       while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
          if (reserved_slots & (UINT64_C(1) << tmp_loc))
             tmp_loc++;
          else
             break;
       }

       if (producer_var) {
          assert(producer_var->data.location == -1);
          producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
       }

       if (consumer_var) {
          assert(consumer_var->data.location == -1);
          consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
       }

       tmp_loc++;
    }
 }

 /**
  * Update the producer and consumer shaders to reflect the locations
  * assignments that were made by varying_matches_assign_locations().
  */
 static void
 varying_matches_store_locations(struct varying_matches *vm)
 {
    /* Check is location needs to be packed with lower_packed_varyings() or if
     * we can just use ARB_enhanced_layouts packing.
     */
    bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
    const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };

    for (unsigned i = 0; i < vm->num_matches; i++) {
       nir_variable *producer_var = vm->matches[i].producer_var;
       nir_variable *consumer_var = vm->matches[i].consumer_var;
       unsigned generic_location = vm->matches[i].generic_location;
       unsigned slot = generic_location / 4;
       unsigned offset = generic_location % 4;

       if (producer_var) {
          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
          producer_var->data.location_frac = offset;
       }

       if (consumer_var) {
          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
          consumer_var->data.location_frac = offset;
       }

       /* Find locations suitable for native packing via
        * ARB_enhanced_layouts.
        */
       if (vm->enhanced_layouts_enabled) {
          nir_variable *var = producer_var ? producer_var : consumer_var;
          unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
          const struct glsl_type *type =
             get_varying_type(var, stage);
          unsigned comp_slots = glsl_get_component_slots(type) + offset;
          unsigned slots = comp_slots / 4;
          if (comp_slots % 4)
             slots += 1;

          if (producer_var && consumer_var) {
             if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
                 glsl_type_is_64bit(type)) {
                for (unsigned j = 0; j < slots; j++) {
                   pack_loc[slot + j] = true;
                }
             } else if (offset + glsl_get_vector_elements(type) > 4) {
                pack_loc[slot] = true;
                pack_loc[slot + 1] = true;
             } else {
                loc_type[slot][offset] = type;
             }
          } else {
             for (unsigned j = 0; j < slots; j++) {
                pack_loc[slot + j] = true;
             }
          }
       }
    }

    /* Attempt to use ARB_enhanced_layouts for more efficient packing if
     * suitable.
     */
    if (vm->enhanced_layouts_enabled) {
       for (unsigned i = 0; i < vm->num_matches; i++) {
          nir_variable *producer_var = vm->matches[i].producer_var;
          nir_variable *consumer_var = vm->matches[i].consumer_var;
          if (!producer_var || !consumer_var)
             continue;

          unsigned generic_location = vm->matches[i].generic_location;
          unsigned slot = generic_location / 4;
          if (pack_loc[slot])
             continue;

          const struct glsl_type *type =
             get_varying_type(producer_var, vm->producer_stage);
          bool type_match = true;
          for (unsigned j = 0; j < 4; j++) {
             if (loc_type[slot][j]) {
                if (glsl_get_base_type(type) !=
                    glsl_get_base_type(loc_type[slot][j]))
                   type_match = false;
             }
          }

          if (type_match) {
             producer_var->data.explicit_location = 1;
             consumer_var->data.explicit_location = 1;
          }
       }
    }
 }

 /**
  * Is the given variable a varying variable to be counted against the
  * limit in ctx->Const.MaxVarying?
  * This includes variables such as texcoords, colors and generic
  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
  */
 static bool
 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
 {
    /* Only fragment shaders will take a varying variable as an input */
    if (stage == MESA_SHADER_FRAGMENT &&
        var->data.mode == nir_var_shader_in) {
       switch (var->data.location) {
       case VARYING_SLOT_POS:
       case VARYING_SLOT_FACE:
       case VARYING_SLOT_PNTC:
          return false;
       default:
          return true;
       }
    }
    return false;
 }

 struct tfeedback_candidate_generator_state {
    /**
     * Memory context used to allocate hash table keys and values.
     */
    void *mem_ctx;

    /**
     * Hash table in which tfeedback_candidate objects should be stored.
     */
    struct hash_table *tfeedback_candidates;

    gl_shader_stage stage;

    /**
     * Pointer to the toplevel variable that is being traversed.
     */
    nir_variable *toplevel_var;

    /**
     * Total number of varying floats that have been visited so far.  This is
     * used to determine the offset to each varying within the toplevel
     * variable.
     */
    unsigned varying_floats;

    /**
     * Offset within the xfb. Counted in floats.
     */
    unsigned xfb_offset_floats;
 };

 /**
  * Generates tfeedback_candidate structs describing all possible targets of
  * transform feedback.
  *
  * tfeedback_candidate structs are stored in the hash table
  * tfeedback_candidates.  This hash table maps varying names to instances of the
  * tfeedback_candidate struct.
  */
 static void
 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
                               char **name, size_t name_length,
                               const struct glsl_type *type,
                               const struct glsl_struct_field *named_ifc_member)
 {
    switch (glsl_get_base_type(type)) {
    case GLSL_TYPE_INTERFACE:
       if (named_ifc_member) {
          ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
                                       named_ifc_member->name);
          tfeedback_candidate_generator(state, name, name_length,
                                        named_ifc_member->type, NULL);
          return;
       }
       FALLTHROUGH;
    case GLSL_TYPE_STRUCT:
       for (unsigned i = 0; i < glsl_get_length(type); i++) {
          size_t new_length = name_length;

          /* Append '.field' to the current variable name. */
          if (name) {
             ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
                                          glsl_get_struct_elem_name(type, i));
          }

          tfeedback_candidate_generator(state, name, new_length,
                                        glsl_get_struct_field(type, i), NULL);
       }

       return;
    case GLSL_TYPE_ARRAY:
       if (glsl_type_is_struct(glsl_without_array(type)) ||
           glsl_type_is_interface(glsl_without_array(type)) ||
           glsl_type_is_array(glsl_get_array_element(type))) {

          for (unsigned i = 0; i < glsl_get_length(type); i++) {
             size_t new_length = name_length;

             /* Append the subscript to the current variable name */
             ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);

             tfeedback_candidate_generator(state, name, new_length,
                                           glsl_get_array_element(type),
                                           named_ifc_member);
          }

          return;
       }
       FALLTHROUGH;
    default:
       assert(!glsl_type_is_struct(glsl_without_array(type)));
       assert(!glsl_type_is_interface(glsl_without_array(type)));

       struct tfeedback_candidate *candidate
          = rzalloc(state->mem_ctx, struct tfeedback_candidate);
       candidate->toplevel_var = state->toplevel_var;
       candidate->type = type;

       if (glsl_type_is_64bit(glsl_without_array(type))) {
          /*  From ARB_gpu_shader_fp64:
           *
           * If any variable captured in transform feedback has double-precision
           * components, the practical requirements for defined behavior are:
           *     ...
           * (c) each double-precision variable captured must be aligned to a
           *     multiple of eight bytes relative to the beginning of a vertex.
           */
          state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
          /* 64-bit members of structs are also aligned. */
          state->varying_floats = ALIGN(state->varying_floats, 2);
       }

       candidate->xfb_offset_floats = state->xfb_offset_floats;
       candidate->struct_offset_floats = state->varying_floats;

       _mesa_hash_table_insert(state->tfeedback_candidates,
                               ralloc_strdup(state->mem_ctx, *name),
                               candidate);

       const unsigned component_slots = glsl_get_component_slots(type);

       if (varying_has_user_specified_location(state->toplevel_var)) {
          state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
       } else {
          state->varying_floats += component_slots;
       }

       state->xfb_offset_floats += component_slots;
    }
 }

 static void
 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
                              struct hash_table *consumer_inputs,
                              struct hash_table *consumer_interface_inputs,
                              nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
 {
    memset(consumer_inputs_with_locations, 0,
           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);

    nir_foreach_shader_in_variable(input_var, nir) {
       /* All interface blocks should have been lowered by this point */
       assert(!glsl_type_is_interface(input_var->type));

       if (input_var->data.explicit_location) {
          /* assign_varying_locations only cares about finding the
           * nir_variable at the start of a contiguous location block.
           *
           *     - For !producer, consumer_inputs_with_locations isn't used.
           *
           *     - For !consumer, consumer_inputs_with_locations is empty.
           *
           * For consumer && producer, if you were trying to set some
           * nir_variable to the middle of a location block on the other side
           * of producer/consumer, cross_validate_outputs_to_inputs() should
           * be link-erroring due to either type mismatch or location
           * overlaps.  If the variables do match up, then they've got a
           * matching data.location and you only looked at
           * consumer_inputs_with_locations[var->data.location], not any
           * following entries for the array/structure.
           */
          consumer_inputs_with_locations[input_var->data.location] =
             input_var;
       } else if (input_var->interface_type != NULL) {
          char *const iface_field_name =
             ralloc_asprintf(mem_ctx, "%s.%s",
                glsl_get_type_name(glsl_without_array(input_var->interface_type)),
                input_var->name);
          _mesa_hash_table_insert(consumer_interface_inputs,
                                  iface_field_name, input_var);
       } else {
          _mesa_hash_table_insert(consumer_inputs,
                                  ralloc_strdup(mem_ctx, input_var->name),
                                  input_var);
       }
    }
 }

 /**
  * Find a variable from the consumer that "matches" the specified variable
  *
  * This function only finds inputs with names that match.  There is no
  * validation (here) that the types, etc. are compatible.
  */
 static nir_variable *
 get_matching_input(void *mem_ctx,
                    const nir_variable *output_var,
                    struct hash_table *consumer_inputs,
                    struct hash_table *consumer_interface_inputs,
                    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
 {
    nir_variable *input_var;

    if (output_var->data.explicit_location) {
       input_var = consumer_inputs_with_locations[output_var->data.location];
    } else if (output_var->interface_type != NULL) {
       char *const iface_field_name =
          ralloc_asprintf(mem_ctx, "%s.%s",
             glsl_get_type_name(glsl_without_array(output_var->interface_type)),
             output_var->name);
       struct hash_entry *entry =
          _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
       input_var = entry ? (nir_variable *) entry->data : NULL;
    } else {
       struct hash_entry *entry =
          _mesa_hash_table_search(consumer_inputs, output_var->name);
       input_var = entry ? (nir_variable *) entry->data : NULL;
    }

    return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
       ? NULL : input_var;
 }

 static int
 io_variable_cmp(const void *_a, const void *_b)
 {
    const nir_variable *const a = *(const nir_variable **) _a;
    const nir_variable *const b = *(const nir_variable **) _b;

    if (a->data.explicit_location && b->data.explicit_location)
       return b->data.location - a->data.location;

    if (a->data.explicit_location && !b->data.explicit_location)
       return 1;

    if (!a->data.explicit_location && b->data.explicit_location)
       return -1;

    return -strcmp(a->name, b->name);
 }

 /**
  * Sort the shader IO variables into canonical order
  */
 static void
 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
 {
    nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
    unsigned num_variables = 0;

    nir_foreach_variable_with_modes(var, nir, io_mode) {
       /* If we have already encountered more I/O variables that could
        * successfully link, bail.
        */
       if (num_variables == ARRAY_SIZE(var_table))
          return;

       var_table[num_variables++] = var;
    }

    if (num_variables == 0)
       return;

    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
     * we're going to push the variables on to the IR list as a stack, so we
     * want the last variable (in canonical order) to be first in the list.
     */
    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);

    /* Remove the variable from it's current location in the varible list, and
     * put it at the front.
     */
    for (unsigned i = 0; i < num_variables; i++) {
       exec_node_remove(&var_table[i]->node);
       exec_list_push_head(&nir->variables, &var_table[i]->node);
    }
 }

 /**
  * Generate a bitfield map of the explicit locations for shader varyings.
  *
  * Note: For Tessellation shaders we are sitting right on the limits of the
  * 64 bit map. Per-vertex and per-patch both have separate location domains
  * with a max of MAX_VARYING.
  */
 static uint64_t
 reserved_varying_slot(struct gl_linked_shader *sh,
                       nir_variable_mode io_mode)
 {
    assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
    /* Avoid an overflow of the returned value */
    assert(MAX_VARYINGS_INCL_PATCH <= 64);

    uint64_t slots = 0;
    int var_slot;

    if (!sh)
       return slots;

    nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
       if (!var->data.explicit_location ||
           var->data.location < VARYING_SLOT_VAR0)
          continue;

       var_slot = var->data.location - VARYING_SLOT_VAR0;

       bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
                                 sh->Stage == MESA_SHADER_VERTEX;
       unsigned num_elements =
          glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
                                     is_gl_vertex_input);
       for (unsigned i = 0; i < num_elements; i++) {
          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
             slots |= UINT64_C(1) << var_slot;
          var_slot += 1;
       }
    }

    return slots;
 }

 /**
  * Sets the bits in the inputs_read, or outputs_written
  * bitfield corresponding to this variable.
  */
 static void
 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
 {
    assert(var->data.mode == nir_var_shader_in ||
           var->data.mode == nir_var_shader_out);
    assert(var->data.location >= VARYING_SLOT_VAR0);

    const struct glsl_type *type = var->type;
    if (nir_is_arrayed_io(var, stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }

    unsigned location = var->data.location - VARYING_SLOT_VAR0;
    unsigned slots = glsl_count_attribute_slots(type, false);
    for (unsigned i = 0; i < slots; i++) {
       BITSET_SET(bits, location + i);
    }
 }

 static uint8_t
 get_num_components(nir_variable *var)
 {
    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
       return 4;

    return glsl_get_vector_elements(glsl_without_array(var->type));
 }

 static void
 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
 {
    nir_foreach_function_impl(impl, shader) {
       nir_foreach_block(block, impl) {
          nir_foreach_instr(instr, block) {
             if (instr->type != nir_instr_type_intrinsic)
                continue;

             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
             if (intrin->intrinsic != nir_intrinsic_load_deref)
                continue;

             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
             if (!nir_deref_mode_is(deref, nir_var_shader_out))
                continue;

             nir_variable *var = nir_deref_instr_get_variable(deref);
             for (unsigned i = 0; i < get_num_components(var); i++) {
                if (var->data.location < VARYING_SLOT_VAR0)
                   continue;

                unsigned comp = var->data.location_frac;
                set_variable_io_mask(read[comp + i], var, shader->info.stage);
             }
          }
       }
    }
 }

 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
  * as no further NIR pass expects to see this.
  */
 static bool
 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
                                          void *data)
 {
    if (instr->type == nir_instr_type_intrinsic) {
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
           intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
          nir_variable *var = nir_intrinsic_get_var(intrin, 0);
          if (var->data.mode == nir_var_shader_temp) {
             /* Create undef and rewrite the interp uses */
             nir_def *undef =
                nir_undef(b, intrin->def.num_components,
                              intrin->def.bit_size);
             nir_def_replace(&intrin->def, undef);
             return true;
          }
       }
    }

    return false;
 }

 static void
 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
 {
    /* Remove all interpolate uses of the unset varying and replace with undef. */
    if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
       (void) nir_shader_instructions_pass(shader,
                                           replace_unused_interpolate_at_with_undef,
                                           nir_metadata_control_flow,
                                           NULL);
    }

    nir_lower_global_vars_to_local(shader);
    nir_fixup_deref_modes(shader);
 }

 /**
  * Helper for removing unused shader I/O variables, by demoting them to global
  * variables (which may then be dead code eliminated).
  *
  * Example usage is:
  *
  * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
  *                                      read, patches_read) ||
  *                                      progress;
  *
  * The "used" should be an array of 4 BITSET_WORDs representing each
  * .location_frac used.  Note that for vector variables, only the first channel
  * (.location_frac) is examined for deciding if the variable is used!
  */
 static bool
 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
                       struct gl_shader_program *prog,
                       nir_variable_mode mode,
                       BITSET_WORD **used_by_other_stage)
 {
    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);

    bool progress = false;
    nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;

    BITSET_WORD **used;
    nir_foreach_variable_with_modes_safe(var, shader, mode) {
       used = used_by_other_stage;

       /* Skip builtins dead builtins are removed elsewhere */
       if (is_gl_identifier(var->name))
          continue;

       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
          continue;

       /* Skip xfb varyings and any other type we cannot remove */
       if (var->data.always_active_io)
          continue;

       if (var->data.explicit_xfb_buffer)
          continue;

       BITSET_WORD *other_stage = used[var->data.location_frac];

       /* if location == -1 lower varying to global as it has no match and is not
        * a xfb varying, this must be done after skiping bultins as builtins
        * could be assigned a location of -1.
        * We also lower unused varyings with explicit locations.
        */
       bool use_found = false;
       if (var->data.location >= 0) {
          unsigned location = var->data.location - VARYING_SLOT_VAR0;

          const struct glsl_type *type = var->type;
          if (nir_is_arrayed_io(var, shader->info.stage)) {
             assert(glsl_type_is_array(type));
             type = glsl_get_array_element(type);
          }

          unsigned slots = glsl_count_attribute_slots(type, false);
          for (unsigned i = 0; i < slots; i++) {
             if (BITSET_TEST(other_stage, location + i)) {
                use_found = true;
                break;
             }
          }
       }

       if (!use_found) {
          /* This one is invalid, make it a global variable instead */
          var->data.location = 0;
          var->data.mode = nir_var_shader_temp;

          progress = true;

          if (mode == nir_var_shader_in) {
             if (!prog->IsES && prog->GLSL_Version <= 120) {
                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
                 *
                 *     Only those varying variables used (i.e. read) in
                 *     the fragment shader executable must be written to
                 *     by the vertex shader executable; declaring
                 *     superfluous varying variables in a vertex shader is
                 *     permissible.
                 *
                 * We interpret this text as meaning that the VS must
                 * write the variable for the FS to read it.  See
                 * "glsl1-varying read but not written" in piglit.
                 */
                linker_error(prog, "%s shader varying %s not written "
                             "by %s shader\n.",
                             _mesa_shader_stage_to_string(consumer->info.stage),
                             var->name,
                             _mesa_shader_stage_to_string(producer->info.stage));
             } else {
                linker_warning(prog, "%s shader varying %s not written "
                               "by %s shader\n.",
                               _mesa_shader_stage_to_string(consumer->info.stage),
                               var->name,
                               _mesa_shader_stage_to_string(producer->info.stage));
             }
          }
       }
    }

    if (progress)
       fixup_vars_lowered_to_temp(shader, mode);

    return progress;
 }

 static bool
 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
                        struct gl_shader_program *prog, void *mem_ctx)
 {
    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
    assert(consumer->info.stage != MESA_SHADER_VERTEX);

    int max_loc_out = 0;
    nir_foreach_shader_out_variable(var, producer) {
       if (var->data.location < VARYING_SLOT_VAR0)
          continue;

       const struct glsl_type *type = var->type;
       if (nir_is_arrayed_io(var, producer->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
       unsigned slots = glsl_count_attribute_slots(type, false);

       max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
    }

    int max_loc_in = 0;
    nir_foreach_shader_in_variable(var, consumer) {
       if (var->data.location < VARYING_SLOT_VAR0)
          continue;

       const struct glsl_type *type = var->type;
       if (nir_is_arrayed_io(var, consumer->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
       unsigned slots = glsl_count_attribute_slots(type, false);

       max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
    }

    /* Old glsl shaders that don't use explicit locations can contain greater
     * than 64 varyings before unused varyings are removed so we must count them
     * and make use of the BITSET macros to keep track of used slots. Once we
     * have removed these excess varyings we can make use of further nir varying
     * linking optimimisation passes.
     */
    BITSET_WORD *read[4];
    BITSET_WORD *written[4];
    int max_loc = MAX2(max_loc_in, max_loc_out);
    for (unsigned i = 0; i < 4; i++) {
       read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
       written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
    }

    nir_foreach_shader_out_variable(var, producer) {
       if (var->data.location < VARYING_SLOT_VAR0)
          continue;

       for (unsigned i = 0; i < get_num_components(var); i++) {
          unsigned comp = var->data.location_frac;
          set_variable_io_mask(written[comp + i], var, producer->info.stage);
       }
    }

    nir_foreach_shader_in_variable(var, consumer) {
       if (var->data.location < VARYING_SLOT_VAR0)
          continue;

       for (unsigned i = 0; i < get_num_components(var); i++) {
          unsigned comp = var->data.location_frac;
          set_variable_io_mask(read[comp + i], var, consumer->info.stage);
       }
    }

    /* Each TCS invocation can read data written by other TCS invocations,
     * so even if the outputs are not used by the TES we must also make
     * sure they are not read by the TCS before demoting them to globals.
     */
    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
       tcs_add_output_reads(producer, read);

    bool progress = false;
    progress =
       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
    progress =
       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;

    return progress;
 }

 static bool
 should_add_varying_match_record(nir_variable *const input_var,
                                 struct gl_shader_program *prog,
                                 struct gl_linked_shader *producer,
                                 struct gl_linked_shader *consumer) {

    /* If a matching input variable was found, add this output (and the input) to
     * the set.  If this is a separable program and there is no consumer stage,
     * add the output.
     *
     * Always add TCS outputs. They are shared by all invocations
     * within a patch and can be used as shared memory.
     */
    return input_var || (prog->SeparateShader && consumer == NULL) ||
              producer->Stage == MESA_SHADER_TESS_CTRL;
 }

 /* This assigns some initial unoptimised varying locations so that our nir
  * optimisations can perform some initial optimisations and also does initial
  * processing of
  */
 static bool
 assign_initial_varying_locations(const struct gl_constants *consts,
                                  const struct gl_extensions *exts,
                                  void *mem_ctx,
                                  struct gl_shader_program *prog,
                                  struct gl_linked_shader *producer,
                                  struct gl_linked_shader *consumer,
                                  unsigned num_xfb_decls,
                                  struct xfb_decl *xfb_decls,
                                  struct varying_matches *vm)
 {
    init_varying_matches(mem_ctx, vm, consts, exts,
                         producer ? producer->Stage : MESA_SHADER_NONE,
                         consumer ? consumer->Stage : MESA_SHADER_NONE,
                         prog->SeparateShader);

    struct hash_table *tfeedback_candidates =
          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
                                  _mesa_key_string_equal);
    struct hash_table *consumer_inputs =
          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
                                  _mesa_key_string_equal);
    struct hash_table *consumer_interface_inputs =
          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
                                  _mesa_key_string_equal);
    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
       NULL,
    };

    if (consumer)
       populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
                                    consumer_inputs, consumer_interface_inputs,
                                    consumer_inputs_with_locations);

    if (producer) {
       nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
          /* Only geometry shaders can use non-zero streams */
          assert(output_var->data.stream == 0 ||
                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
                  producer->Stage == MESA_SHADER_GEOMETRY));

          if (num_xfb_decls > 0) {
             /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
              * ("Vertex Shader Variables / Output Variables")
              *
              * "Each program object can specify a set of output variables from
              * one shader to be recorded in transform feedback mode (see
              * section 13.3). The variables that can be recorded are those
              * emitted by the first active shader, in order, from the
              * following list:
              *
              *  * geometry shader
              *  * tessellation evaluation shader
              *  * tessellation control shader
              *  * vertex shader"
              *
              * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
              * Variables / Output Variables") tessellation control shader is
              * not included in the stages list.
              */
             if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {

                const struct glsl_type *type = output_var->data.from_named_ifc_block ?
                   output_var->interface_type : output_var->type;
                if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
                   assert(glsl_type_is_array(type));
                   type = glsl_get_array_element(type);
                }

                const struct glsl_struct_field *ifc_member = NULL;
                if (output_var->data.from_named_ifc_block) {
                   ifc_member =
                      glsl_get_struct_field_data(glsl_without_array(type),
                         glsl_get_field_index(glsl_without_array(type), output_var->name));
                }

                char *name;
                if (glsl_type_is_struct(glsl_without_array(type)) ||
                    (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
                   type = output_var->type;
                   name = ralloc_strdup(NULL, output_var->name);
                } else if (glsl_type_is_interface(glsl_without_array(type))) {
                   name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
                } else  {
                   name = ralloc_strdup(NULL, output_var->name);
                }

                struct tfeedback_candidate_generator_state state;
                state.mem_ctx = mem_ctx;
                state.tfeedback_candidates = tfeedback_candidates;
                state.stage = producer->Stage;
                state.toplevel_var = output_var;
                state.varying_floats = 0;
                state.xfb_offset_floats = 0;

                tfeedback_candidate_generator(&state, &name, strlen(name), type,
                                              ifc_member);
                ralloc_free(name);
             }
          }

          nir_variable *const input_var =
             get_matching_input(mem_ctx, output_var, consumer_inputs,
                                consumer_interface_inputs,
                                consumer_inputs_with_locations);

          if (should_add_varying_match_record(input_var, prog, producer,
                                              consumer)) {
             varying_matches_record(mem_ctx, vm, output_var, input_var);
          }

          /* Only stream 0 outputs can be consumed in the next stage */
          if (input_var && output_var->data.stream != 0) {
             linker_error(prog, "output %s is assigned to stream=%d but "
                          "is linked to an input, which requires stream=0",
                          output_var->name, output_var->data.stream);
             return false;
          }
       }
    } else {
       /* If there's no producer stage, then this must be a separable program.
        * For example, we may have a program that has just a fragment shader.
        * Later this program will be used with some arbitrary vertex (or
        * geometry) shader program.  This means that locations must be assigned
        * for all the inputs.
        */
       nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
          varying_matches_record(mem_ctx, vm, NULL, input_var);
       }
    }

    for (unsigned i = 0; i < num_xfb_decls; ++i) {
       if (!xfb_decl_is_varying(&xfb_decls[i]))
          continue;

       const struct tfeedback_candidate *matched_candidate
          = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);

       if (matched_candidate == NULL)
          return false;

       /* There are two situations where a new output varying is needed:
        *
        *  - If varying packing is disabled for xfb and the current declaration
        *    is subscripting an array, whether the subscript is aligned or not.
        *    to preserve the rest of the array for the consumer.
        *
        *  - If a builtin variable needs to be copied to a new variable
        *    before its content is modified by another lowering pass (e.g.
        *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
        */
       const bool lowered =
          (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
          (matched_candidate->toplevel_var->data.explicit_location &&
           matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
           (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
           (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
               BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));

       if (lowered) {
          nir_variable *new_var;
          struct tfeedback_candidate *new_candidate = NULL;

          new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
                                             xfb_decls[i].orig_name,
                                             matched_candidate->toplevel_var);
          if (new_var == NULL)
             return false;

          /* Create new candidate and replace matched_candidate */
          new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
          new_candidate->toplevel_var = new_var;
          new_candidate->type = new_var->type;
          new_candidate->struct_offset_floats = 0;
          new_candidate->xfb_offset_floats = 0;
          _mesa_hash_table_insert(tfeedback_candidates,
                                  ralloc_strdup(mem_ctx, new_var->name),
                                  new_candidate);

          xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
          matched_candidate = new_candidate;
       }

       /* Mark as xfb varying */
       matched_candidate->toplevel_var->data.is_xfb = 1;

       /* Mark xfb varyings as always active */
       matched_candidate->toplevel_var->data.always_active_io = 1;

       /* Mark any corresponding inputs as always active also. We must do this
        * because we have a NIR pass that lowers vectors to scalars and another
        * that removes unused varyings.
        * We don't split varyings marked as always active because there is no
        * point in doing so. This means we need to mark both sides of the
        * interface as always active otherwise we will have a mismatch and
        * start removing things we shouldn't.
        */
       nir_variable *const input_var =
          get_matching_input(mem_ctx, matched_candidate->toplevel_var,
                             consumer_inputs, consumer_interface_inputs,
                             consumer_inputs_with_locations);
       if (input_var) {
          input_var->data.is_xfb = 1;
          input_var->data.always_active_io = 1;
       }

       /* Add the xfb varying to varying matches if it wasn't already added */
       if ((!should_add_varying_match_record(input_var, prog, producer,
                                             consumer) &&
            !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
          matched_candidate->toplevel_var->data.is_xfb_only = 1;
          varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
                                 NULL);
       }
    }

    uint64_t reserved_out_slots = 0;
    if (producer)
       reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);

    uint64_t reserved_in_slots = 0;
    if (consumer)
       reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);

    /* Assign temporary user varying locations. This is required for our NIR
     * varying optimisations to do their matching.
     */
    const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
    varying_matches_assign_temp_locations(vm, prog, reserved_slots);

    for (unsigned i = 0; i < num_xfb_decls; ++i) {
       if (!xfb_decl_is_varying(&xfb_decls[i]))
          continue;

       xfb_decls[i].matched_candidate->initial_location =
          xfb_decls[i].matched_candidate->toplevel_var->data.location;
       xfb_decls[i].matched_candidate->initial_location_frac =
          xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
    }

    return true;
 }

 static void
 link_shader_opts(struct varying_matches *vm,
                  nir_shader *producer, nir_shader *consumer,
                  struct gl_shader_program *prog, void *mem_ctx)
 {
    /* If we can't pack the stage using this pass then we can't lower io to
     * scalar just yet. Instead we leave it to a later NIR linking pass that uses
     * ARB_enhanced_layout style packing to pack things further.
     *
     * Otherwise we might end up causing linking errors and perf regressions
     * because the new scalars will be assigned individual slots and can overflow
     * the available slots.
     */
    if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
       !vm->disable_xfb_packing) {
       NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
       NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
    }

    gl_nir_opts(producer);
    gl_nir_opts(consumer);

    if (nir_link_opt_varyings(producer, consumer))
       gl_nir_opts(consumer);

    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);

    if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);

       gl_nir_opts(producer);
       gl_nir_opts(consumer);

       /* Optimizations can cause varyings to become unused.
        * nir_compact_varyings() depends on all dead varyings being removed so
        * we need to call nir_remove_dead_variables() again here.
        */
       NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
                  NULL);
       NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
                  NULL);
    }

    nir_link_varying_precision(producer, consumer);
 }

 /**
  * Assign locations for all variables that are produced in one pipeline stage
  * (the "producer") and consumed in the next stage (the "consumer").
  *
  * Variables produced by the producer may also be consumed by transform
  * feedback.
  *
  * \param num_xfb_decls is the number of declarations indicating
  *        variables that may be consumed by transform feedback.
  *
  * \param xfb_decls is a pointer to an array of xfb_decl objects
  *        representing the result of parsing the strings passed to
  *        glTransformFeedbackVaryings().  assign_location() will be called for
  *        each of these objects that matches one of the outputs of the
  *        producer.
  *
  * When num_xfb_decls is nonzero, it is permissible for the consumer to
  * be NULL.  In this case, varying locations are assigned solely based on the
  * requirements of transform feedback.
  */
 static bool
 assign_final_varying_locations(const struct gl_constants *consts,
                                const struct gl_extensions *exts,
                                void *mem_ctx,
                                struct gl_shader_program *prog,
                                struct gl_linked_shader *producer,
                                struct gl_linked_shader *consumer,
                                unsigned num_xfb_decls,
                                struct xfb_decl *xfb_decls,
                                const uint64_t reserved_slots,
                                struct varying_matches *vm)
 {
    init_varying_matches(mem_ctx, vm, consts, exts,
                         producer ? producer->Stage : MESA_SHADER_NONE,
                         consumer ? consumer->Stage : MESA_SHADER_NONE,
                         prog->SeparateShader);

    /* Regather varying matches as we ran optimisations and the previous pointers
     * are no longer valid.
     */
    if (producer) {
       nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
          if (var_out->data.location < VARYING_SLOT_VAR0 ||
              var_out->data.explicit_location)
             continue;

          if (vm->num_matches == vm->matches_capacity) {
             vm->matches_capacity *= 2;
             vm->matches = (struct match *)
                reralloc(mem_ctx, vm->matches, struct match,
                         vm->matches_capacity);
          }

          vm->matches[vm->num_matches].packing_class
             = varying_matches_compute_packing_class(var_out);
          vm->matches[vm->num_matches].packing_order
             = varying_matches_compute_packing_order(var_out);

          vm->matches[vm->num_matches].producer_var = var_out;
          vm->matches[vm->num_matches].consumer_var = NULL;
          vm->num_matches++;
       }

       /* Regather xfb varyings too */
       for (unsigned i = 0; i < num_xfb_decls; i++) {
          if (!xfb_decl_is_varying(&xfb_decls[i]))
             continue;

          /* Varying pointer was already reset */
          if (xfb_decls[i].matched_candidate->initial_location == -1)
             continue;

          bool UNUSED is_reset = false;
          bool UNUSED no_outputs = true;
          nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
             no_outputs = false;
             assert(var_out->data.location != -1);
             if (var_out->data.location ==
                 xfb_decls[i].matched_candidate->initial_location &&
                 var_out->data.location_frac ==
                 xfb_decls[i].matched_candidate->initial_location_frac) {
                xfb_decls[i].matched_candidate->toplevel_var = var_out;
                xfb_decls[i].matched_candidate->initial_location = -1;
                is_reset = true;
                break;
             }
          }
          assert(is_reset || no_outputs);
       }
    }

    bool found_match = false;
    if (consumer) {
       nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
          if (var_in->data.location < VARYING_SLOT_VAR0 ||
              var_in->data.explicit_location)
             continue;

          found_match = false;
          for (unsigned i = 0; i < vm->num_matches; i++) {
             if (vm->matches[i].producer_var &&
                 (vm->matches[i].producer_var->data.location == var_in->data.location &&
                  vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {

                vm->matches[i].consumer_var = var_in;
                found_match = true;
                break;
             }
          }
          if (!found_match) {
             if (vm->num_matches == vm->matches_capacity) {
                vm->matches_capacity *= 2;
                vm->matches = (struct match *)
                   reralloc(mem_ctx, vm->matches, struct match,
                            vm->matches_capacity);
             }

             vm->matches[vm->num_matches].packing_class
                = varying_matches_compute_packing_class(var_in);
             vm->matches[vm->num_matches].packing_order
                = varying_matches_compute_packing_order(var_in);

             vm->matches[vm->num_matches].producer_var = NULL;
             vm->matches[vm->num_matches].consumer_var = var_in;
             vm->num_matches++;
          }
       }
    }

    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
    const unsigned slots_used =
       varying_matches_assign_locations(vm, prog, components, reserved_slots);
    varying_matches_store_locations(vm);

    for (unsigned i = 0; i < num_xfb_decls; ++i) {
       if (xfb_decl_is_varying(&xfb_decls[i])) {
          if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog,
              vm->disable_varying_packing, vm->xfb_enabled))
             return false;
       }
    }

    if (producer) {
       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
                                    nir_var_shader_out, 0, producer,
                                    vm->disable_varying_packing,
                                    vm->disable_xfb_packing, vm->xfb_enabled);
       nir_lower_pack(producer->Program->nir);
    }

    if (consumer) {
       unsigned consumer_vertices = 0;
       if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
          consumer_vertices = consumer->Program->nir->info.gs.vertices_in;

       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
                                    nir_var_shader_in, consumer_vertices,
                                    consumer, vm->disable_varying_packing,
                                    vm->disable_xfb_packing, vm->xfb_enabled);
       nir_lower_pack(consumer->Program->nir);
    }

    return true;
 }

 static bool
 check_against_output_limit(const struct gl_constants *consts, gl_api api,
                            struct gl_shader_program *prog,
                            struct gl_linked_shader *producer,
                            unsigned num_explicit_locations)
 {
    unsigned output_vectors = num_explicit_locations;
    nir_foreach_shader_out_variable(var, producer->Program->nir) {
       if (!var->data.explicit_location &&
           var_counts_against_varying_limit(producer->Stage, var)) {
          /* outputs for fragment shader can't be doubles */
          output_vectors += glsl_count_attribute_slots(var->type, false);
       }
    }

    assert(producer->Stage != MESA_SHADER_FRAGMENT);
    unsigned max_output_components =
       consts->Program[producer->Stage].MaxOutputComponents;

    const unsigned output_components = output_vectors * 4;
    if (output_components > max_output_components) {
       if (api == API_OPENGLES2 || prog->IsES)
          linker_error(prog, "%s shader uses too many output vectors "
                       "(%u > %u)\n",
                       _mesa_shader_stage_to_string(producer->Stage),
                       output_vectors,
                       max_output_components / 4);
       else
          linker_error(prog, "%s shader uses too many output components "
                       "(%u > %u)\n",
                       _mesa_shader_stage_to_string(producer->Stage),
                       output_components,
                       max_output_components);

       return false;
    }

    return true;
 }

 static bool
 check_against_input_limit(const struct gl_constants *consts, gl_api api,
                           struct gl_shader_program *prog,
                           struct gl_linked_shader *consumer,
                           unsigned num_explicit_locations)
 {
    unsigned input_vectors = num_explicit_locations;

    nir_foreach_shader_in_variable(var, consumer->Program->nir) {
       if (!var->data.explicit_location &&
           var_counts_against_varying_limit(consumer->Stage, var)) {
          /* vertex inputs aren't varying counted */
          input_vectors += glsl_count_attribute_slots(var->type, false);
       }
    }

    assert(consumer->Stage != MESA_SHADER_VERTEX);
    unsigned max_input_components =
       consts->Program[consumer->Stage].MaxInputComponents;

    const unsigned input_components = input_vectors * 4;
    if (input_components > max_input_components) {
       if (api == API_OPENGLES2 || prog->IsES)
          linker_error(prog, "%s shader uses too many input vectors "
                       "(%u > %u)\n",
                       _mesa_shader_stage_to_string(consumer->Stage),
                       input_vectors,
                       max_input_components / 4);
       else
          linker_error(prog, "%s shader uses too many input components "
                       "(%u > %u)\n",
                       _mesa_shader_stage_to_string(consumer->Stage),
                       input_components,
                       max_input_components);

       return false;
    }

    return true;
 }

 /* Lower unset/unused inputs/outputs */
 static void
 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
                                         unsigned stage, nir_variable_mode mode)
 {
    bool progress = false;
    nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;

    nir_foreach_variable_with_modes_safe(var, shader, mode) {
       if (!var->data.is_xfb_only && var->data.location == -1) {
          var->data.location = 0;
          var->data.mode = nir_var_shader_temp;
          progress = true;
       }
    }

    if (progress)
       fixup_vars_lowered_to_temp(shader, mode);
 }

 static bool
 link_varyings(struct gl_shader_program *prog, unsigned first,
               unsigned last, const struct gl_constants *consts,
               const struct gl_extensions *exts, gl_api api, void *mem_ctx)
 {
    bool has_xfb_qualifiers = false;
    unsigned num_xfb_decls = 0;
    char **varying_names = NULL;
    bool compact_arrays = false;
    struct xfb_decl *xfb_decls = NULL;

    if (last > MESA_SHADER_FRAGMENT)
       return true;

    /* From the ARB_enhanced_layouts spec:
     *
     *    "If the shader used to record output variables for transform feedback
     *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
     *    qualifiers, the values specified by TransformFeedbackVaryings are
     *    ignored, and the set of variables captured for transform feedback is
     *    instead derived from the specified layout qualifiers."
     */
    for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
       /* Find last stage before fragment shader */
       if (prog->_LinkedShaders[i]) {
          has_xfb_qualifiers =
             process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
                                           prog, &num_xfb_decls,
                                           &varying_names,
                                           &compact_arrays);
          break;
       }
    }

    if (!has_xfb_qualifiers) {
       num_xfb_decls = prog->TransformFeedback.NumVarying;
       varying_names = prog->TransformFeedback.VaryingNames;
    }

    if (num_xfb_decls != 0) {
       /* From GL_EXT_transform_feedback:
        *   A program will fail to link if:
        *
        *   * the <count> specified by TransformFeedbackVaryingsEXT is
        *     non-zero, but the program object has no vertex or geometry
        *     shader;
        */
       if (first >= MESA_SHADER_FRAGMENT) {
          linker_error(prog, "Transform feedback varyings specified, but "
                       "no vertex, tessellation, or geometry shader is "
                       "present.\n");
          return false;
       }

       xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
                                       num_xfb_decls);
       if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
                            varying_names, xfb_decls, compact_arrays))
          return false;
    }

    struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
    unsigned num_shaders = 0;

    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       if (prog->_LinkedShaders[i])
          linked_shader[num_shaders++] = prog->_LinkedShaders[i];
    }

    struct varying_matches vm;
    if (last < MESA_SHADER_FRAGMENT &&
        (num_xfb_decls != 0 || prog->SeparateShader)) {
          struct gl_linked_shader *producer = prog->_LinkedShaders[last];
          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
                                                producer, NULL, num_xfb_decls,
                                                xfb_decls, &vm))
             return false;
    }

    if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
       remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
       remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
    }

    if (prog->SeparateShader) {
       struct gl_linked_shader *consumer = linked_shader[0];
       if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
                                             consumer, 0, NULL, &vm))
          return false;
    }

    if (num_shaders == 1) {
       /* Linking shaders also optimizes them. Separate shaders, compute shaders
        * and shaders with a fixed-func VS or FS that don't need linking are
        * optimized here.
        */
       gl_nir_opts(linked_shader[0]->Program->nir);
    } else {
       /* Linking the stages in the opposite order (from fragment to vertex)
        * ensures that inter-shader outputs written to in an earlier stage
        * are eliminated if they are (transitively) not used in a later
        * stage.
        */
       for (int i = num_shaders - 2; i >= 0; i--) {
          unsigned stage_num_xfb_decls =
             linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
             num_xfb_decls : 0;

          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
                                                linked_shader[i],
                                                linked_shader[i + 1],
                                                stage_num_xfb_decls, xfb_decls,
                                                &vm))
             return false;

          /* Now that validation is done its safe to remove unused varyings. As
           * we have both a producer and consumer its safe to remove unused
           * varyings even if the program is a SSO because the stages are being
           * linked together i.e. we have a multi-stage SSO.
           */
          link_shader_opts(&vm, linked_shader[i]->Program->nir,
                           linked_shader[i + 1]->Program->nir,
                           prog, mem_ctx);

          remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
                                                  nir_var_shader_out);
          remove_unused_shader_inputs_and_outputs(prog,
                                                  linked_shader[i + 1]->Stage,
                                                  nir_var_shader_in);
       }
    }

    if (!prog->SeparateShader) {
       /* If not SSO remove unused varyings from the first/last stage */
       NIR_PASS(_, prog->_LinkedShaders[first]->Program->nir,
                  nir_remove_dead_variables, nir_var_shader_in, NULL);
       NIR_PASS(_, prog->_LinkedShaders[last]->Program->nir,
                  nir_remove_dead_variables, nir_var_shader_out, NULL);
    } else {
       /* Sort inputs / outputs into a canonical order.  This is necessary so
        * that inputs / outputs of separable shaders will be assigned
        * predictable locations regardless of the order in which declarations
        * appeared in the shader source.
        */
       if (first != MESA_SHADER_VERTEX) {
          canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
                                 nir_var_shader_in);
       }

       if (last != MESA_SHADER_FRAGMENT) {
          canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
                                 nir_var_shader_out);
       }
    }

    /* If there is no fragment shader we need to set transform feedback.
     *
     * For SSO we also need to assign output locations.  We assign them here
     * because we need to do it for both single stage programs and multi stage
     * programs.
     */
    if (last < MESA_SHADER_FRAGMENT &&
        (num_xfb_decls != 0 || prog->SeparateShader)) {
       const uint64_t reserved_out_slots =
          reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
                                           prog->_LinkedShaders[last], NULL,
                                           num_xfb_decls, xfb_decls,
                                           reserved_out_slots, &vm))
          return false;
    }

    if (prog->SeparateShader) {
       struct gl_linked_shader *const sh = prog->_LinkedShaders[first];

       const uint64_t reserved_slots =
          reserved_varying_slot(sh, nir_var_shader_in);

       /* Assign input locations for SSO, output locations are already
        * assigned.
        */
       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
                                           NULL /* producer */,
                                           sh /* consumer */,
                                           0 /* num_xfb_decls */,
                                           NULL /* xfb_decls */,
                                           reserved_slots, &vm))
          return false;
    }

    if (num_shaders == 1) {
       gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
                                        0, NULL);
       gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
                                        num_xfb_decls, xfb_decls);
    } else {
       /* Linking the stages in the opposite order (from fragment to vertex)
        * ensures that inter-shader outputs written to in an earlier stage
        * are eliminated if they are (transitively) not used in a later
        * stage.
        */
       int next = last;
       for (int i = next - 1; i >= 0; i--) {
          if (prog->_LinkedShaders[i] == NULL && i != 0)
             continue;

          struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
          struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];

          gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
                                           next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
                                           xfb_decls);

          const uint64_t reserved_out_slots =
             reserved_varying_slot(sh_i, nir_var_shader_out);
          const uint64_t reserved_in_slots =
             reserved_varying_slot(sh_next, nir_var_shader_in);

          if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
                    sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
                    xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
             return false;

          /* This must be done after all dead varyings are eliminated. */
          if (sh_i != NULL) {
             unsigned slots_used = util_bitcount64(reserved_out_slots);
             if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
                return false;
          }

          unsigned slots_used = util_bitcount64(reserved_in_slots);
          if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
             return false;

          next = i;
       }
    }

    if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
                              has_xfb_qualifiers, mem_ctx))
       return false;

    return prog->data->LinkStatus != LINKING_FAILURE;
 }

 bool
 gl_assign_attribute_or_color_locations(const struct gl_constants *consts,
                                        struct gl_shader_program *prog)
 {
    void *mem_ctx = ralloc_context(NULL);

    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
                                             MESA_SHADER_VERTEX, true)) {
       ralloc_free(mem_ctx);
       return false;
    }

    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
                                             MESA_SHADER_FRAGMENT, true)) {
       ralloc_free(mem_ctx);
       return false;
    }

    ralloc_free(mem_ctx);
    return true;
 }

 bool
 gl_nir_link_varyings(const struct gl_constants *consts,
                      const struct gl_extensions *exts,
                      gl_api api, struct gl_shader_program *prog)
 {
    void *mem_ctx = ralloc_context(NULL);

    unsigned first, last;

    MESA_TRACE_FUNC();

    first = MESA_SHADER_STAGES;
    last = 0;

    /* We need to initialise the program resource list because the varying
     * packing pass my start inserting varyings onto the list.
     */
    init_program_resource_list(prog);

    /* Determine first and last stage. */
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       if (!prog->_LinkedShaders[i])
          continue;
       if (first == MESA_SHADER_STAGES)
          first = i;
       last = i;
    }

    bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
    if (r) {
       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
          if (!prog->_LinkedShaders[i])
             continue;

          /* Check for transform feedback varyings specified via the API */
          prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
             prog->TransformFeedback.NumVarying > 0;

          /* Check for transform feedback varyings specified in the Shader */
          if (prog->last_vert_prog) {
             prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
                prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
          }
       }

       /* Assign NIR XFB info to the last stage before the fragment shader */
       for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
          struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
          if (sh && stage != MESA_SHADER_TESS_CTRL) {
             sh->Program->nir->xfb_info =
                gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
                                   sh->Program->nir);
             break;
          }
       }

       /* Lower IO and thoroughly optimize and compact varyings. */
       gl_nir_lower_optimize_varyings(consts, prog, false);
    }

    ralloc_free(mem_ctx);
    return r;
 }