Merge remote-tracking branch 'public/master' into vulkan

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
diff --cc configure.ac
Simple merge
diff --cc src/compiler/Makefile.sources
Simple merge
diff --cc src/compiler/glsl/Makefile.sources
Simple merge
diff --cc src/compiler/glsl/glsl_parser_extras.cpp
Simple merge
diff --cc src/compiler/nir/glsl_to_nir.cpp
Simple merge
diff --cc src/compiler/nir/nir.h
Simple merge
diff --cc src/compiler/nir/nir_builder.h
Simple merge
diff --cc src/compiler/nir/nir_lower_io.c
Simple merge
diff --cc src/compiler/nir/nir_lower_system_values.c

index 79f6bed,2bd787d..c1cd139
--- 1/src/compiler/nir/nir_lower_system_values.c
--- 2/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@@ -55,77 -55,9 +55,77 @@@ convert_block(nir_block *block, void *v
   
         b->cursor = nir_after_instr(&load_var->instr);
   
- -      nir_intrinsic_op sysval_op =
- -         nir_intrinsic_from_system_value(var->data.location);
- -      nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+ +      nir_ssa_def *sysval;
+ +      switch (var->data.location) {
+ +      case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
+ +         /* From the GLSL man page for gl_GlobalInvocationID:
+ +          *
+ +          *    "The value of gl_GlobalInvocationID is equal to
+ +          *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
+ +          */
+ +
+ +         nir_const_value local_size;
-          local_size.u[0] = b->shader->info.cs.local_size[0];
-          local_size.u[1] = b->shader->info.cs.local_size[1];
-          local_size.u[2] = b->shader->info.cs.local_size[2];
++         local_size.u32[0] = b->shader->info.cs.local_size[0];
++         local_size.u32[1] = b->shader->info.cs.local_size[1];
++         local_size.u32[2] = b->shader->info.cs.local_size[2];
+ +
+ +         nir_ssa_def *group_id =
+ +            nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
+ +         nir_ssa_def *local_id =
+ +            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+ +
+ +         sysval = nir_iadd(b, nir_imul(b, group_id,
+ +                                          nir_build_imm(b, 3, local_size)),
+ +                              local_id);
+ +         break;
+ +      }
+ +
+ +      case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
+ +         /* From the GLSL man page for gl_LocalInvocationIndex:
+ +          *
+ +          *    ?The value of gl_LocalInvocationIndex is equal to
+ +          *    gl_LocalInvocationID.z * gl_WorkGroupSize.x *
+ +          *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
+ +          *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
+ +          */
+ +         nir_ssa_def *local_id =
+ +            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+ +
+ +         unsigned stride_y = b->shader->info.cs.local_size[0];
+ +         unsigned stride_z = b->shader->info.cs.local_size[0] *
+ +                             b->shader->info.cs.local_size[1];
+ +
+ +         sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2),
+ +                                          nir_imm_int(b, stride_z)),
+ +                              nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1),
+ +                                                      nir_imm_int(b, stride_y)),
+ +                                          nir_channel(b, local_id, 0)));
+ +         break;
+ +      }
+ +
+ +      case SYSTEM_VALUE_VERTEX_ID:
+ +         if (b->shader->options->vertex_id_zero_based) {
+ +            sysval = nir_iadd(b,
+ +               nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
+ +               nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
+ +         } else {
+ +            sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
+ +         }
+ +         break;
+ +
+ +      case SYSTEM_VALUE_INSTANCE_INDEX:
+ +         sysval = nir_iadd(b,
+ +            nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
+ +            nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
+ +         break;
+ +
+ +      default: {
+ +         nir_intrinsic_op sysval_op =
+ +            nir_intrinsic_from_system_value(var->data.location);
+ +         sysval = nir_load_system_value(b, sysval_op, 0);
+ +         break;
+ +      } /* default */
+ +      }
   
         nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
         nir_instr_remove(&load_var->instr);
diff --cc src/compiler/nir/nir_opcodes.py

index 60ade4a,553f924..d6b658d
--- 1/src/compiler/nir/nir_opcodes.py
--- 2/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@@ -170,13 -174,12 +174,13 @@@ unop_convert("u2f", tfloat32, tuint32, 
   # Unary floating-point rounding operations.
   
   
- unop("ftrunc", tfloat, "truncf(src0)")
- unop("fceil", tfloat, "ceilf(src0)")
- unop("ffloor", tfloat, "floorf(src0)")
- unop("ffract", tfloat, "src0 - floorf(src0)")
- unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+ unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+ unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+ unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+ unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+ unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
   
+ +unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
   
   # Trigonometric operations.
   
diff --cc src/compiler/nir/nir_opt_algebraic.py

index 54f7d86,5363323..ed21c5d
--- 1/src/compiler/nir/nir_opt_algebraic.py
--- 2/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@@ -75,27 -81,29 +82,30 @@@ optimizations = 
      (('imul', a, 1), a),
      (('fmul', a, -1.0), ('fneg', a)),
      (('imul', a, -1), ('ineg', a)),
-    (('ffma', 0.0, a, b), b),
-    (('ffma', a, 0.0, b), b),
-    (('ffma', a, b, 0.0), ('fmul', a, b)),
+ +   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+    (('~ffma', 0.0, a, b), b),
+    (('~ffma', a, 0.0, b), b),
+    (('~ffma', a, b, 0.0), ('fmul', a, b)),
      (('ffma', a, 1.0, b), ('fadd', a, b)),
      (('ffma', 1.0, a, b), ('fadd', a, b)),
-    (('flrp', a, b, 0.0), a),
-    (('flrp', a, b, 1.0), b),
-    (('flrp', a, a, b), a),
-    (('flrp', 0.0, a, b), ('fmul', a, b)),
+    (('~flrp', a, b, 0.0), a),
+    (('~flrp', a, b, 1.0), b),
+    (('~flrp', a, a, b), a),
+    (('~flrp', 0.0, a, b), ('fmul', a, b)),
+    (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
      (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
      (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
-    (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
-    (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+    (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+    (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
      (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
-    (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+    (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
      # Comparison simplifications
-    (('inot', ('flt', a, b)), ('fge', a, b)),
-    (('inot', ('fge', a, b)), ('flt', a, b)),
-    (('inot', ('feq', a, b)), ('fne', a, b)),
-    (('inot', ('fne', a, b)), ('feq', a, b)),
+    (('~inot', ('flt', a, b)), ('fge', a, b)),
+    (('~inot', ('fge', a, b)), ('flt', a, b)),
+    (('~inot', ('feq', a, b)), ('fne', a, b)),
+    (('~inot', ('fne', a, b)), ('feq', a, b)),
      (('inot', ('ilt', a, b)), ('ige', a, b)),
      (('inot', ('ige', a, b)), ('ilt', a, b)),
      (('inot', ('ieq', a, b)), ('ine', a, b)),
diff --cc src/compiler/nir/nir_print.c
Simple merge
diff --cc src/compiler/nir/spirv/spirv_to_nir.c

index 5a7184a,0000000..42a1f95

mode 100644,000000..100644
--- 1/src/compiler/nir/spirv/spirv_to_nir.c
--- /dev/null
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@@ -1,2704 -1,0 +1,2712 @@@
-             load->value.u[i] = constant->value.u[i];
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + *
+ + * Authors:
+ + *    Jason Ekstrand (jason@jlekstrand.net)
+ + *
+ + */
+ +
+ +#include "vtn_private.h"
+ +#include "nir/nir_vla.h"
+ +#include "nir/nir_control_flow.h"
+ +#include "nir/nir_constant_expressions.h"
+ +
+ +static struct vtn_ssa_value *
+ +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
+ +{
+ +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ +   val->type = type;
+ +
+ +   if (glsl_type_is_vector_or_scalar(type)) {
+ +      unsigned num_components = glsl_get_vector_elements(val->type);
+ +      nir_ssa_undef_instr *undef =
+ +         nir_ssa_undef_instr_create(b->shader, num_components);
+ +
+ +      nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr);
+ +      val->def = &undef->def;
+ +   } else {
+ +      unsigned elems = glsl_get_length(val->type);
+ +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ +      if (glsl_type_is_matrix(type)) {
+ +         const struct glsl_type *elem_type =
+ +            glsl_vector_type(glsl_get_base_type(type),
+ +                             glsl_get_vector_elements(type));
+ +
+ +         for (unsigned i = 0; i < elems; i++)
+ +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ +      } else if (glsl_type_is_array(type)) {
+ +         const struct glsl_type *elem_type = glsl_get_array_element(type);
+ +         for (unsigned i = 0; i < elems; i++)
+ +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ +      } else {
+ +         for (unsigned i = 0; i < elems; i++) {
+ +            const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
+ +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ +         }
+ +      }
+ +   }
+ +
+ +   return val;
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
+ +                    const struct glsl_type *type)
+ +{
+ +   struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
+ +
+ +   if (entry)
+ +      return entry->data;
+ +
+ +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ +   val->type = type;
+ +
+ +   switch (glsl_get_base_type(type)) {
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_BOOL:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_DOUBLE:
+ +      if (glsl_type_is_vector_or_scalar(type)) {
+ +         unsigned num_components = glsl_get_vector_elements(val->type);
+ +         nir_load_const_instr *load =
+ +            nir_load_const_instr_create(b->shader, num_components);
+ +
+ +         for (unsigned i = 0; i < num_components; i++)
-                load->value.u[j] = constant->value.u[rows * i + j];
++            load->value.u32[i] = constant->value.u[i];
+ +
+ +         nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+ +         val->def = &load->def;
+ +      } else {
+ +         assert(glsl_type_is_matrix(type));
+ +         unsigned rows = glsl_get_vector_elements(val->type);
+ +         unsigned columns = glsl_get_matrix_columns(val->type);
+ +         val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
+ +
+ +         for (unsigned i = 0; i < columns; i++) {
+ +            struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
+ +            col_val->type = glsl_get_column_type(val->type);
+ +            nir_load_const_instr *load =
+ +               nir_load_const_instr_create(b->shader, rows);
+ +
+ +            for (unsigned j = 0; j < rows; j++)
-                src[j].u[k] = c->value.u[k];
++               load->value.u32[j] = constant->value.u[rows * i + j];
+ +
+ +            nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+ +            col_val->def = &load->def;
+ +
+ +            val->elems[i] = col_val;
+ +         }
+ +      }
+ +      break;
+ +
+ +   case GLSL_TYPE_ARRAY: {
+ +      unsigned elems = glsl_get_length(val->type);
+ +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ +      const struct glsl_type *elem_type = glsl_get_array_element(val->type);
+ +      for (unsigned i = 0; i < elems; i++)
+ +         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+ +                                             elem_type);
+ +      break;
+ +   }
+ +
+ +   case GLSL_TYPE_STRUCT: {
+ +      unsigned elems = glsl_get_length(val->type);
+ +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         const struct glsl_type *elem_type =
+ +            glsl_get_struct_field(val->type, i);
+ +         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+ +                                             elem_type);
+ +      }
+ +      break;
+ +   }
+ +
+ +   default:
+ +      unreachable("bad constant type");
+ +   }
+ +
+ +   return val;
+ +}
+ +
+ +struct vtn_ssa_value *
+ +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
+ +{
+ +   struct vtn_value *val = vtn_untyped_value(b, value_id);
+ +   switch (val->value_type) {
+ +   case vtn_value_type_undef:
+ +      return vtn_undef_ssa_value(b, val->type->type);
+ +
+ +   case vtn_value_type_constant:
+ +      return vtn_const_ssa_value(b, val->constant, val->const_type);
+ +
+ +   case vtn_value_type_ssa:
+ +      return val->ssa;
+ +
+ +   case vtn_value_type_access_chain:
+ +      /* This is needed for function parameters */
+ +      return vtn_variable_load(b, val->access_chain);
+ +
+ +   default:
+ +      unreachable("Invalid type for an SSA value");
+ +   }
+ +}
+ +
+ +static char *
+ +vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
+ +                   unsigned word_count, unsigned *words_used)
+ +{
+ +   char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words));
+ +   if (words_used) {
+ +      /* Ammount of space taken by the string (including the null) */
+ +      unsigned len = strlen(dup) + 1;
+ +      *words_used = DIV_ROUND_UP(len, sizeof(*words));
+ +   }
+ +   return dup;
+ +}
+ +
+ +const uint32_t *
+ +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
+ +                        const uint32_t *end, vtn_instruction_handler handler)
+ +{
+ +   b->file = NULL;
+ +   b->line = -1;
+ +   b->col = -1;
+ +
+ +   const uint32_t *w = start;
+ +   while (w < end) {
+ +      SpvOp opcode = w[0] & SpvOpCodeMask;
+ +      unsigned count = w[0] >> SpvWordCountShift;
+ +      assert(count >= 1 && w + count <= end);
+ +
+ +      switch (opcode) {
+ +      case SpvOpNop:
+ +         break; /* Do nothing */
+ +
+ +      case SpvOpLine:
+ +         b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
+ +         b->line = w[2];
+ +         b->col = w[3];
+ +         break;
+ +
+ +      case SpvOpNoLine:
+ +         b->file = NULL;
+ +         b->line = -1;
+ +         b->col = -1;
+ +         break;
+ +
+ +      default:
+ +         if (!handler(b, opcode, w, count))
+ +            return w;
+ +         break;
+ +      }
+ +
+ +      w += count;
+ +   }
+ +   assert(w == end);
+ +   return w;
+ +}
+ +
+ +static void
+ +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
+ +                     const uint32_t *w, unsigned count)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpExtInstImport: {
+ +      struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
+ +      if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
+ +         val->ext_handler = vtn_handle_glsl450_instruction;
+ +      } else {
+ +         assert(!"Unsupported extension");
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpExtInst: {
+ +      struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
+ +      bool handled = val->ext_handler(b, w[4], w, count);
+ +      (void)handled;
+ +      assert(handled);
+ +      break;
+ +   }
+ +
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +}
+ +
+ +static void
+ +_foreach_decoration_helper(struct vtn_builder *b,
+ +                           struct vtn_value *base_value,
+ +                           int parent_member,
+ +                           struct vtn_value *value,
+ +                           vtn_decoration_foreach_cb cb, void *data)
+ +{
+ +   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
+ +      int member;
+ +      if (dec->scope == VTN_DEC_DECORATION) {
+ +         member = parent_member;
+ +      } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
+ +         assert(parent_member == -1);
+ +         member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
+ +      } else {
+ +         /* Not a decoration */
+ +         continue;
+ +      }
+ +
+ +      if (dec->group) {
+ +         assert(dec->group->value_type == vtn_value_type_decoration_group);
+ +         _foreach_decoration_helper(b, base_value, member, dec->group,
+ +                                    cb, data);
+ +      } else {
+ +         cb(b, base_value, member, dec, data);
+ +      }
+ +   }
+ +}
+ +
+ +/** Iterates (recursively if needed) over all of the decorations on a value
+ + *
+ + * This function iterates over all of the decorations applied to a given
+ + * value.  If it encounters a decoration group, it recurses into the group
+ + * and iterates over all of those decorations as well.
+ + */
+ +void
+ +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
+ +                       vtn_decoration_foreach_cb cb, void *data)
+ +{
+ +   _foreach_decoration_helper(b, value, -1, value, cb, data);
+ +}
+ +
+ +void
+ +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
+ +                           vtn_execution_mode_foreach_cb cb, void *data)
+ +{
+ +   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
+ +      if (dec->scope != VTN_DEC_EXECUTION_MODE)
+ +         continue;
+ +
+ +      assert(dec->group == NULL);
+ +      cb(b, value, dec, data);
+ +   }
+ +}
+ +
+ +static void
+ +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
+ +                      const uint32_t *w, unsigned count)
+ +{
+ +   const uint32_t *w_end = w + count;
+ +   const uint32_t target = w[1];
+ +   w += 2;
+ +
+ +   switch (opcode) {
+ +   case SpvOpDecorationGroup:
+ +      vtn_push_value(b, target, vtn_value_type_decoration_group);
+ +      break;
+ +
+ +   case SpvOpDecorate:
+ +   case SpvOpMemberDecorate:
+ +   case SpvOpExecutionMode: {
+ +      struct vtn_value *val = &b->values[target];
+ +
+ +      struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
+ +      switch (opcode) {
+ +      case SpvOpDecorate:
+ +         dec->scope = VTN_DEC_DECORATION;
+ +         break;
+ +      case SpvOpMemberDecorate:
+ +         dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
+ +         break;
+ +      case SpvOpExecutionMode:
+ +         dec->scope = VTN_DEC_EXECUTION_MODE;
+ +         break;
+ +      default:
+ +         unreachable("Invalid decoration opcode");
+ +      }
+ +      dec->decoration = *(w++);
+ +      dec->literals = w;
+ +
+ +      /* Link into the list */
+ +      dec->next = val->decoration;
+ +      val->decoration = dec;
+ +      break;
+ +   }
+ +
+ +   case SpvOpGroupMemberDecorate:
+ +   case SpvOpGroupDecorate: {
+ +      struct vtn_value *group =
+ +         vtn_value(b, target, vtn_value_type_decoration_group);
+ +
+ +      for (; w < w_end; w++) {
+ +         struct vtn_value *val = vtn_untyped_value(b, *w);
+ +         struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
+ +
+ +         dec->group = group;
+ +         if (opcode == SpvOpGroupDecorate) {
+ +            dec->scope = VTN_DEC_DECORATION;
+ +         } else {
+ +            dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
+ +         }
+ +
+ +         /* Link into the list */
+ +         dec->next = val->decoration;
+ +         val->decoration = dec;
+ +      }
+ +      break;
+ +   }
+ +
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +}
+ +
+ +struct member_decoration_ctx {
+ +   unsigned num_fields;
+ +   struct glsl_struct_field *fields;
+ +   struct vtn_type *type;
+ +};
+ +
+ +/* does a shallow copy of a vtn_type */
+ +
+ +static struct vtn_type *
+ +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
+ +{
+ +   struct vtn_type *dest = ralloc(b, struct vtn_type);
+ +   dest->type = src->type;
+ +   dest->is_builtin = src->is_builtin;
+ +   if (src->is_builtin)
+ +      dest->builtin = src->builtin;
+ +
+ +   if (!glsl_type_is_scalar(src->type)) {
+ +      switch (glsl_get_base_type(src->type)) {
+ +      case GLSL_TYPE_INT:
+ +      case GLSL_TYPE_UINT:
+ +      case GLSL_TYPE_BOOL:
+ +      case GLSL_TYPE_FLOAT:
+ +      case GLSL_TYPE_DOUBLE:
+ +      case GLSL_TYPE_ARRAY:
+ +         dest->row_major = src->row_major;
+ +         dest->stride = src->stride;
+ +         dest->array_element = src->array_element;
+ +         break;
+ +
+ +      case GLSL_TYPE_STRUCT: {
+ +         unsigned elems = glsl_get_length(src->type);
+ +
+ +         dest->members = ralloc_array(b, struct vtn_type *, elems);
+ +         memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *));
+ +
+ +         dest->offsets = ralloc_array(b, unsigned, elems);
+ +         memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned));
+ +         break;
+ +      }
+ +
+ +      default:
+ +         unreachable("unhandled type");
+ +      }
+ +   }
+ +
+ +   return dest;
+ +}
+ +
+ +static struct vtn_type *
+ +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
+ +{
+ +   type->members[member] = vtn_type_copy(b, type->members[member]);
+ +   type = type->members[member];
+ +
+ +   /* We may have an array of matrices.... Oh, joy! */
+ +   while (glsl_type_is_array(type->type)) {
+ +      type->array_element = vtn_type_copy(b, type->array_element);
+ +      type = type->array_element;
+ +   }
+ +
+ +   assert(glsl_type_is_matrix(type->type));
+ +
+ +   return type;
+ +}
+ +
+ +static void
+ +struct_member_decoration_cb(struct vtn_builder *b,
+ +                            struct vtn_value *val, int member,
+ +                            const struct vtn_decoration *dec, void *void_ctx)
+ +{
+ +   struct member_decoration_ctx *ctx = void_ctx;
+ +
+ +   if (member < 0)
+ +      return;
+ +
+ +   assert(member < ctx->num_fields);
+ +
+ +   switch (dec->decoration) {
+ +   case SpvDecorationRelaxedPrecision:
+ +      break; /* FIXME: Do nothing with this for now. */
+ +   case SpvDecorationNoPerspective:
+ +      ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ +      break;
+ +   case SpvDecorationFlat:
+ +      ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT;
+ +      break;
+ +   case SpvDecorationCentroid:
+ +      ctx->fields[member].centroid = true;
+ +      break;
+ +   case SpvDecorationSample:
+ +      ctx->fields[member].sample = true;
+ +      break;
+ +   case SpvDecorationLocation:
+ +      ctx->fields[member].location = dec->literals[0];
+ +      break;
+ +   case SpvDecorationBuiltIn:
+ +      ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
+ +      ctx->type->members[member]->is_builtin = true;
+ +      ctx->type->members[member]->builtin = dec->literals[0];
+ +      ctx->type->builtin_block = true;
+ +      break;
+ +   case SpvDecorationOffset:
+ +      ctx->type->offsets[member] = dec->literals[0];
+ +      break;
+ +   case SpvDecorationMatrixStride:
+ +      mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0];
+ +      break;
+ +   case SpvDecorationColMajor:
+ +      break; /* Nothing to do here.  Column-major is the default. */
+ +   case SpvDecorationRowMajor:
+ +      mutable_matrix_member(b, ctx->type, member)->row_major = true;
+ +      break;
+ +   default:
+ +      unreachable("Unhandled member decoration");
+ +   }
+ +}
+ +
+ +static void
+ +type_decoration_cb(struct vtn_builder *b,
+ +                   struct vtn_value *val, int member,
+ +                    const struct vtn_decoration *dec, void *ctx)
+ +{
+ +   struct vtn_type *type = val->type;
+ +
+ +   if (member != -1)
+ +      return;
+ +
+ +   switch (dec->decoration) {
+ +   case SpvDecorationArrayStride:
+ +      type->stride = dec->literals[0];
+ +      break;
+ +   case SpvDecorationBlock:
+ +      type->block = true;
+ +      break;
+ +   case SpvDecorationBufferBlock:
+ +      type->buffer_block = true;
+ +      break;
+ +   case SpvDecorationGLSLShared:
+ +   case SpvDecorationGLSLPacked:
+ +      /* Ignore these, since we get explicit offsets anyways */
+ +      break;
+ +
+ +   case SpvDecorationStream:
+ +      assert(dec->literals[0] == 0);
+ +      break;
+ +
+ +   default:
+ +      unreachable("Unhandled type decoration");
+ +   }
+ +}
+ +
+ +static unsigned
+ +translate_image_format(SpvImageFormat format)
+ +{
+ +   switch (format) {
+ +   case SpvImageFormatUnknown:      return 0;      /* GL_NONE */
+ +   case SpvImageFormatRgba32f:      return 0x8814; /* GL_RGBA32F */
+ +   case SpvImageFormatRgba16f:      return 0x881A; /* GL_RGBA16F */
+ +   case SpvImageFormatR32f:         return 0x822E; /* GL_R32F */
+ +   case SpvImageFormatRgba8:        return 0x8058; /* GL_RGBA8 */
+ +   case SpvImageFormatRgba8Snorm:   return 0x8F97; /* GL_RGBA8_SNORM */
+ +   case SpvImageFormatRg32f:        return 0x8230; /* GL_RG32F */
+ +   case SpvImageFormatRg16f:        return 0x822F; /* GL_RG16F */
+ +   case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */
+ +   case SpvImageFormatR16f:         return 0x822D; /* GL_R16F */
+ +   case SpvImageFormatRgba16:       return 0x805B; /* GL_RGBA16 */
+ +   case SpvImageFormatRgb10A2:      return 0x8059; /* GL_RGB10_A2 */
+ +   case SpvImageFormatRg16:         return 0x822C; /* GL_RG16 */
+ +   case SpvImageFormatRg8:          return 0x822B; /* GL_RG8 */
+ +   case SpvImageFormatR16:          return 0x822A; /* GL_R16 */
+ +   case SpvImageFormatR8:           return 0x8229; /* GL_R8 */
+ +   case SpvImageFormatRgba16Snorm:  return 0x8F9B; /* GL_RGBA16_SNORM */
+ +   case SpvImageFormatRg16Snorm:    return 0x8F99; /* GL_RG16_SNORM */
+ +   case SpvImageFormatRg8Snorm:     return 0x8F95; /* GL_RG8_SNORM */
+ +   case SpvImageFormatR16Snorm:     return 0x8F98; /* GL_R16_SNORM */
+ +   case SpvImageFormatR8Snorm:      return 0x8F94; /* GL_R8_SNORM */
+ +   case SpvImageFormatRgba32i:      return 0x8D82; /* GL_RGBA32I */
+ +   case SpvImageFormatRgba16i:      return 0x8D88; /* GL_RGBA16I */
+ +   case SpvImageFormatRgba8i:       return 0x8D8E; /* GL_RGBA8I */
+ +   case SpvImageFormatR32i:         return 0x8235; /* GL_R32I */
+ +   case SpvImageFormatRg32i:        return 0x823B; /* GL_RG32I */
+ +   case SpvImageFormatRg16i:        return 0x8239; /* GL_RG16I */
+ +   case SpvImageFormatRg8i:         return 0x8237; /* GL_RG8I */
+ +   case SpvImageFormatR16i:         return 0x8233; /* GL_R16I */
+ +   case SpvImageFormatR8i:          return 0x8231; /* GL_R8I */
+ +   case SpvImageFormatRgba32ui:     return 0x8D70; /* GL_RGBA32UI */
+ +   case SpvImageFormatRgba16ui:     return 0x8D76; /* GL_RGBA16UI */
+ +   case SpvImageFormatRgba8ui:      return 0x8D7C; /* GL_RGBA8UI */
+ +   case SpvImageFormatR32ui:        return 0x8236; /* GL_R32UI */
+ +   case SpvImageFormatRgb10a2ui:    return 0x906F; /* GL_RGB10_A2UI */
+ +   case SpvImageFormatRg32ui:       return 0x823C; /* GL_RG32UI */
+ +   case SpvImageFormatRg16ui:       return 0x823A; /* GL_RG16UI */
+ +   case SpvImageFormatRg8ui:        return 0x8238; /* GL_RG8UI */
+ +   case SpvImageFormatR16ui:        return 0x823A; /* GL_RG16UI */
+ +   case SpvImageFormatR8ui:         return 0x8232; /* GL_R8UI */
+ +   default:
+ +      assert(!"Invalid image format");
+ +      return 0;
+ +   }
+ +}
+ +
+ +static void
+ +vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
+ +                const uint32_t *w, unsigned count)
+ +{
+ +   struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
+ +
+ +   val->type = rzalloc(b, struct vtn_type);
+ +   val->type->is_builtin = false;
+ +   val->type->val = val;
+ +
+ +   switch (opcode) {
+ +   case SpvOpTypeVoid:
+ +      val->type->type = glsl_void_type();
+ +      break;
+ +   case SpvOpTypeBool:
+ +      val->type->type = glsl_bool_type();
+ +      break;
+ +   case SpvOpTypeInt: {
+ +      const bool signedness = w[3];
+ +      val->type->type = (signedness ? glsl_int_type() : glsl_uint_type());
+ +      break;
+ +   }
+ +   case SpvOpTypeFloat:
+ +      val->type->type = glsl_float_type();
+ +      break;
+ +
+ +   case SpvOpTypeVector: {
+ +      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
+ +      unsigned elems = w[3];
+ +
+ +      assert(glsl_type_is_scalar(base->type));
+ +      val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
+ +
+ +      /* Vectors implicitly have sizeof(base_type) stride.  For now, this
+ +       * is always 4 bytes.  This will have to change if we want to start
+ +       * supporting doubles or half-floats.
+ +       */
+ +      val->type->stride = 4;
+ +      val->type->array_element = base;
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypeMatrix: {
+ +      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
+ +      unsigned columns = w[3];
+ +
+ +      assert(glsl_type_is_vector(base->type));
+ +      val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
+ +                                         glsl_get_vector_elements(base->type),
+ +                                         columns);
+ +      assert(!glsl_type_is_error(val->type->type));
+ +      val->type->array_element = base;
+ +      val->type->row_major = false;
+ +      val->type->stride = 0;
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypeRuntimeArray:
+ +   case SpvOpTypeArray: {
+ +      struct vtn_type *array_element =
+ +         vtn_value(b, w[2], vtn_value_type_type)->type;
+ +
+ +      unsigned length;
+ +      if (opcode == SpvOpTypeRuntimeArray) {
+ +         /* A length of 0 is used to denote unsized arrays */
+ +         length = 0;
+ +      } else {
+ +         length =
+ +            vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0];
+ +      }
+ +
+ +      val->type->type = glsl_array_type(array_element->type, length);
+ +      val->type->array_element = array_element;
+ +      val->type->stride = 0;
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypeStruct: {
+ +      unsigned num_fields = count - 2;
+ +      val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
+ +      val->type->offsets = ralloc_array(b, unsigned, num_fields);
+ +
+ +      NIR_VLA(struct glsl_struct_field, fields, count);
+ +      for (unsigned i = 0; i < num_fields; i++) {
+ +         val->type->members[i] =
+ +            vtn_value(b, w[i + 2], vtn_value_type_type)->type;
+ +         fields[i] = (struct glsl_struct_field) {
+ +            .type = val->type->members[i]->type,
+ +            .name = ralloc_asprintf(b, "field%d", i),
+ +            .location = -1,
+ +         };
+ +      }
+ +
+ +      struct member_decoration_ctx ctx = {
+ +         .num_fields = num_fields,
+ +         .fields = fields,
+ +         .type = val->type
+ +      };
+ +
+ +      vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
+ +
+ +      const char *name = val->name ? val->name : "struct";
+ +
+ +      val->type->type = glsl_struct_type(fields, num_fields, name);
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypeFunction: {
+ +      const struct glsl_type *return_type =
+ +         vtn_value(b, w[2], vtn_value_type_type)->type->type;
+ +      NIR_VLA(struct glsl_function_param, params, count - 3);
+ +      for (unsigned i = 0; i < count - 3; i++) {
+ +         params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type;
+ +
+ +         /* FIXME: */
+ +         params[i].in = true;
+ +         params[i].out = true;
+ +      }
+ +      val->type->type = glsl_function_type(return_type, params, count - 3);
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypePointer:
+ +      /* FIXME:  For now, we'll just do the really lame thing and return
+ +       * the same type.  The validator should ensure that the proper number
+ +       * of dereferences happen
+ +       */
+ +      val->type = vtn_value(b, w[3], vtn_value_type_type)->type;
+ +      break;
+ +
+ +   case SpvOpTypeImage: {
+ +      const struct glsl_type *sampled_type =
+ +         vtn_value(b, w[2], vtn_value_type_type)->type->type;
+ +
+ +      assert(glsl_type_is_vector_or_scalar(sampled_type));
+ +
+ +      enum glsl_sampler_dim dim;
+ +      switch ((SpvDim)w[3]) {
+ +      case SpvDim1D:       dim = GLSL_SAMPLER_DIM_1D;    break;
+ +      case SpvDim2D:       dim = GLSL_SAMPLER_DIM_2D;    break;
+ +      case SpvDim3D:       dim = GLSL_SAMPLER_DIM_3D;    break;
+ +      case SpvDimCube:     dim = GLSL_SAMPLER_DIM_CUBE;  break;
+ +      case SpvDimRect:     dim = GLSL_SAMPLER_DIM_RECT;  break;
+ +      case SpvDimBuffer:   dim = GLSL_SAMPLER_DIM_BUF;   break;
+ +      default:
+ +         unreachable("Invalid SPIR-V Sampler dimension");
+ +      }
+ +
+ +      bool is_shadow = w[4];
+ +      bool is_array = w[5];
+ +      bool multisampled = w[6];
+ +      unsigned sampled = w[7];
+ +      SpvImageFormat format = w[8];
+ +
+ +      if (count > 9)
+ +         val->type->access_qualifier = w[9];
+ +      else
+ +         val->type->access_qualifier = SpvAccessQualifierReadWrite;
+ +
+ +      if (multisampled) {
+ +         assert(dim == GLSL_SAMPLER_DIM_2D);
+ +         dim = GLSL_SAMPLER_DIM_MS;
+ +      }
+ +
+ +      val->type->image_format = translate_image_format(format);
+ +
+ +      if (sampled == 1) {
+ +         val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
+ +                                             glsl_get_base_type(sampled_type));
+ +      } else if (sampled == 2) {
+ +         assert(format);
+ +         assert(!is_shadow);
+ +         val->type->type = glsl_image_type(dim, is_array,
+ +                                           glsl_get_base_type(sampled_type));
+ +      } else {
+ +         assert(!"We need to know if the image will be sampled");
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpTypeSampledImage:
+ +      val->type = vtn_value(b, w[2], vtn_value_type_type)->type;
+ +      break;
+ +
+ +   case SpvOpTypeSampler:
+ +      /* The actual sampler type here doesn't really matter.  It gets
+ +       * thrown away the moment you combine it with an image.  What really
+ +       * matters is that it's a sampler type as opposed to an integer type
+ +       * so the backend knows what to do.
+ +       */
+ +      val->type->type = glsl_bare_sampler_type();
+ +      break;
+ +
+ +   case SpvOpTypeOpaque:
+ +   case SpvOpTypeEvent:
+ +   case SpvOpTypeDeviceEvent:
+ +   case SpvOpTypeReserveId:
+ +   case SpvOpTypeQueue:
+ +   case SpvOpTypePipe:
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +
+ +   vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
+ +}
+ +
+ +static nir_constant *
+ +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type)
+ +{
+ +   nir_constant *c = rzalloc(b, nir_constant);
+ +
+ +   switch (glsl_get_base_type(type)) {
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_BOOL:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_DOUBLE:
+ +      /* Nothing to do here.  It's already initialized to zero */
+ +      break;
+ +
+ +   case GLSL_TYPE_ARRAY:
+ +      assert(glsl_get_length(type) > 0);
+ +      c->num_elements = glsl_get_length(type);
+ +      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
+ +
+ +      c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type));
+ +      for (unsigned i = 1; i < c->num_elements; i++)
+ +         c->elements[i] = c->elements[0];
+ +      break;
+ +
+ +   case GLSL_TYPE_STRUCT:
+ +      c->num_elements = glsl_get_length(type);
+ +      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
+ +
+ +      for (unsigned i = 0; i < c->num_elements; i++) {
+ +         c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i));
+ +      }
+ +      break;
+ +
+ +   default:
+ +      unreachable("Invalid type for null constant");
+ +   }
+ +
+ +   return c;
+ +}
+ +
+ +static void
+ +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v,
+ +                             int member, const struct vtn_decoration *dec,
+ +                             void *data)
+ +{
+ +   assert(member == -1);
+ +   if (dec->decoration != SpvDecorationSpecId)
+ +      return;
+ +
+ +   uint32_t *const_value = data;
+ +
+ +   for (unsigned i = 0; i < b->num_specializations; i++) {
+ +      if (b->specializations[i].id == dec->literals[0]) {
+ +         *const_value = b->specializations[i].data;
+ +         return;
+ +      }
+ +   }
+ +}
+ +
+ +static uint32_t
+ +get_specialization(struct vtn_builder *b, struct vtn_value *val,
+ +                   uint32_t const_value)
+ +{
+ +   vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value);
+ +   return const_value;
+ +}
+ +
+ +static void
+ +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
+ +                    const uint32_t *w, unsigned count)
+ +{
+ +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
+ +   val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ +   val->constant = rzalloc(b, nir_constant);
+ +   switch (opcode) {
+ +   case SpvOpConstantTrue:
+ +      assert(val->const_type == glsl_bool_type());
+ +      val->constant->value.u[0] = NIR_TRUE;
+ +      break;
+ +   case SpvOpConstantFalse:
+ +      assert(val->const_type == glsl_bool_type());
+ +      val->constant->value.u[0] = NIR_FALSE;
+ +      break;
+ +
+ +   case SpvOpSpecConstantTrue:
+ +   case SpvOpSpecConstantFalse: {
+ +      assert(val->const_type == glsl_bool_type());
+ +      uint32_t int_val =
+ +         get_specialization(b, val, (opcode == SpvOpSpecConstantTrue));
+ +      val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE;
+ +      break;
+ +   }
+ +
+ +   case SpvOpConstant:
+ +      assert(glsl_type_is_scalar(val->const_type));
+ +      val->constant->value.u[0] = w[3];
+ +      break;
+ +   case SpvOpSpecConstant:
+ +      assert(glsl_type_is_scalar(val->const_type));
+ +      val->constant->value.u[0] = get_specialization(b, val, w[3]);
+ +      break;
+ +   case SpvOpSpecConstantComposite:
+ +   case SpvOpConstantComposite: {
+ +      unsigned elem_count = count - 3;
+ +      nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
+ +      for (unsigned i = 0; i < elem_count; i++)
+ +         elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant;
+ +
+ +      switch (glsl_get_base_type(val->const_type)) {
+ +      case GLSL_TYPE_UINT:
+ +      case GLSL_TYPE_INT:
+ +      case GLSL_TYPE_FLOAT:
+ +      case GLSL_TYPE_BOOL:
+ +         if (glsl_type_is_matrix(val->const_type)) {
+ +            unsigned rows = glsl_get_vector_elements(val->const_type);
+ +            assert(glsl_get_matrix_columns(val->const_type) == elem_count);
+ +            for (unsigned i = 0; i < elem_count; i++)
+ +               for (unsigned j = 0; j < rows; j++)
+ +                  val->constant->value.u[rows * i + j] = elems[i]->value.u[j];
+ +         } else {
+ +            assert(glsl_type_is_vector(val->const_type));
+ +            assert(glsl_get_vector_elements(val->const_type) == elem_count);
+ +            for (unsigned i = 0; i < elem_count; i++)
+ +               val->constant->value.u[i] = elems[i]->value.u[0];
+ +         }
+ +         ralloc_free(elems);
+ +         break;
+ +
+ +      case GLSL_TYPE_STRUCT:
+ +      case GLSL_TYPE_ARRAY:
+ +         ralloc_steal(val->constant, elems);
+ +         val->constant->num_elements = elem_count;
+ +         val->constant->elements = elems;
+ +         break;
+ +
+ +      default:
+ +         unreachable("Unsupported type for constants");
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpSpecConstantOp: {
+ +      SpvOp opcode = get_specialization(b, val, w[3]);
+ +      switch (opcode) {
+ +      case SpvOpVectorShuffle: {
+ +         struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
+ +         struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
+ +         unsigned len0 = glsl_get_vector_elements(v0->const_type);
+ +         unsigned len1 = glsl_get_vector_elements(v1->const_type);
+ +
+ +         uint32_t u[8];
+ +         for (unsigned i = 0; i < len0; i++)
+ +            u[i] = v0->constant->value.u[i];
+ +         for (unsigned i = 0; i < len1; i++)
+ +            u[len0 + i] = v1->constant->value.u[i];
+ +
+ +         for (unsigned i = 0; i < count - 6; i++) {
+ +            uint32_t comp = w[i + 6];
+ +            if (comp == (uint32_t)-1) {
+ +               val->constant->value.u[i] = 0xdeadbeef;
+ +            } else {
+ +               val->constant->value.u[i] = u[comp];
+ +            }
+ +         }
+ +         return;
+ +      }
+ +
+ +      case SpvOpCompositeExtract:
+ +      case SpvOpCompositeInsert: {
+ +         struct vtn_value *comp;
+ +         unsigned deref_start;
+ +         struct nir_constant **c;
+ +         if (opcode == SpvOpCompositeExtract) {
+ +            comp = vtn_value(b, w[4], vtn_value_type_constant);
+ +            deref_start = 5;
+ +            c = &comp->constant;
+ +         } else {
+ +            comp = vtn_value(b, w[5], vtn_value_type_constant);
+ +            deref_start = 6;
+ +            val->constant = nir_constant_clone(comp->constant,
+ +                                               (nir_variable *)b);
+ +            c = &val->constant;
+ +         }
+ +
+ +         int elem = -1;
+ +         const struct glsl_type *type = comp->const_type;
+ +         for (unsigned i = deref_start; i < count; i++) {
+ +            switch (glsl_get_base_type(type)) {
+ +            case GLSL_TYPE_UINT:
+ +            case GLSL_TYPE_INT:
+ +            case GLSL_TYPE_FLOAT:
+ +            case GLSL_TYPE_BOOL:
+ +               /* If we hit this granularity, we're picking off an element */
+ +               if (elem < 0)
+ +                  elem = 0;
+ +
+ +               if (glsl_type_is_matrix(type)) {
+ +                  elem += w[i] * glsl_get_vector_elements(type);
+ +                  type = glsl_get_column_type(type);
+ +               } else {
+ +                  assert(glsl_type_is_vector(type));
+ +                  elem += w[i];
+ +                  type = glsl_scalar_type(glsl_get_base_type(type));
+ +               }
+ +               continue;
+ +
+ +            case GLSL_TYPE_ARRAY:
+ +               c = &(*c)->elements[w[i]];
+ +               type = glsl_get_array_element(type);
+ +               continue;
+ +
+ +            case GLSL_TYPE_STRUCT:
+ +               c = &(*c)->elements[w[i]];
+ +               type = glsl_get_struct_field(type, w[i]);
+ +               continue;
+ +
+ +            default:
+ +               unreachable("Invalid constant type");
+ +            }
+ +         }
+ +
+ +         if (opcode == SpvOpCompositeExtract) {
+ +            if (elem == -1) {
+ +               val->constant = *c;
+ +            } else {
+ +               unsigned num_components = glsl_get_vector_elements(type);
+ +               for (unsigned i = 0; i < num_components; i++)
+ +                  val->constant->value.u[i] = (*c)->value.u[elem + i];
+ +            }
+ +         } else {
+ +            struct vtn_value *insert =
+ +               vtn_value(b, w[4], vtn_value_type_constant);
+ +            assert(insert->const_type == type);
+ +            if (elem == -1) {
+ +               *c = insert->constant;
+ +            } else {
+ +               unsigned num_components = glsl_get_vector_elements(type);
+ +               for (unsigned i = 0; i < num_components; i++)
+ +                  (*c)->value.u[elem + i] = insert->constant->value.u[i];
+ +            }
+ +         }
+ +         return;
+ +      }
+ +
+ +      default: {
+ +         bool swap;
+ +         nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
+ +
+ +         unsigned num_components = glsl_get_vector_elements(val->const_type);
++         unsigned bit_size =
++            glsl_get_bit_size(glsl_get_base_type(val->const_type));
+ +
+ +         nir_const_value src[3];
+ +         assert(count <= 7);
+ +         for (unsigned i = 0; i < count - 4; i++) {
+ +            nir_constant *c =
+ +               vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
+ +
+ +            unsigned j = swap ? 1 - i : i;
++            assert(bit_size == 32);
+ +            for (unsigned k = 0; k < num_components; k++)
-          nir_const_value res = nir_eval_const_opcode(op, num_components, src);
++               src[j].u32[k] = c->value.u[k];
+ +         }
+ +
-             val->constant->value.u[k] = res.u[k];
++         nir_const_value res = nir_eval_const_opcode(op, num_components,
++                                                     bit_size, src);
+ +
+ +         for (unsigned k = 0; k < num_components; k++)
-                      nir_tex_instr_dest_size(instr), NULL);
++            val->constant->value.u[k] = res.u32[k];
+ +
+ +         return;
+ +      } /* default */
+ +      }
+ +   }
+ +
+ +   case SpvOpConstantNull:
+ +      val->constant = vtn_null_constant(b, val->const_type);
+ +      break;
+ +
+ +   case SpvOpConstantSampler:
+ +      assert(!"OpConstantSampler requires Kernel Capability");
+ +      break;
+ +
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +}
+ +
+ +static void
+ +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
+ +                         const uint32_t *w, unsigned count)
+ +{
+ +   struct nir_function *callee =
+ +      vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
+ +
+ +   nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
+ +   for (unsigned i = 0; i < call->num_params; i++) {
+ +      unsigned arg_id = w[4 + i];
+ +      struct vtn_value *arg = vtn_untyped_value(b, arg_id);
+ +      if (arg->value_type == vtn_value_type_access_chain) {
+ +         nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain);
+ +         call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref));
+ +      } else {
+ +         struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
+ +
+ +         /* Make a temporary to store the argument in */
+ +         nir_variable *tmp =
+ +            nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
+ +         call->params[i] = nir_deref_var_create(call, tmp);
+ +
+ +         vtn_local_store(b, arg_ssa, call->params[i]);
+ +      }
+ +   }
+ +
+ +   nir_variable *out_tmp = NULL;
+ +   if (!glsl_type_is_void(callee->return_type)) {
+ +      out_tmp = nir_local_variable_create(b->impl, callee->return_type,
+ +                                          "out_tmp");
+ +      call->return_deref = nir_deref_var_create(call, out_tmp);
+ +   }
+ +
+ +   nir_builder_instr_insert(&b->nb, &call->instr);
+ +
+ +   if (glsl_type_is_void(callee->return_type)) {
+ +      vtn_push_value(b, w[2], vtn_value_type_undef);
+ +   } else {
+ +      struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      retval->ssa = vtn_local_load(b, call->return_deref);
+ +   }
+ +}
+ +
+ +struct vtn_ssa_value *
+ +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
+ +{
+ +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ +   val->type = type;
+ +
+ +   if (!glsl_type_is_vector_or_scalar(type)) {
+ +      unsigned elems = glsl_get_length(type);
+ +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         const struct glsl_type *child_type;
+ +
+ +         switch (glsl_get_base_type(type)) {
+ +         case GLSL_TYPE_INT:
+ +         case GLSL_TYPE_UINT:
+ +         case GLSL_TYPE_BOOL:
+ +         case GLSL_TYPE_FLOAT:
+ +         case GLSL_TYPE_DOUBLE:
+ +            child_type = glsl_get_column_type(type);
+ +            break;
+ +         case GLSL_TYPE_ARRAY:
+ +            child_type = glsl_get_array_element(type);
+ +            break;
+ +         case GLSL_TYPE_STRUCT:
+ +            child_type = glsl_get_struct_field(type, i);
+ +            break;
+ +         default:
+ +            unreachable("unkown base type");
+ +         }
+ +
+ +         val->elems[i] = vtn_create_ssa_value(b, child_type);
+ +      }
+ +   }
+ +
+ +   return val;
+ +}
+ +
+ +static nir_tex_src
+ +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
+ +{
+ +   nir_tex_src src;
+ +   src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def);
+ +   src.src_type = type;
+ +   return src;
+ +}
+ +
+ +static void
+ +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
+ +                   const uint32_t *w, unsigned count)
+ +{
+ +   if (opcode == SpvOpSampledImage) {
+ +      struct vtn_value *val =
+ +         vtn_push_value(b, w[2], vtn_value_type_sampled_image);
+ +      val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ +      val->sampled_image->image =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +      val->sampled_image->sampler =
+ +         vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain;
+ +      return;
+ +   } else if (opcode == SpvOpImage) {
+ +      struct vtn_value *val =
+ +         vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ +      struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
+ +      if (src_val->value_type == vtn_value_type_sampled_image) {
+ +         val->access_chain = src_val->sampled_image->image;
+ +      } else {
+ +         assert(src_val->value_type == vtn_value_type_access_chain);
+ +         val->access_chain = src_val->access_chain;
+ +      }
+ +      return;
+ +   }
+ +
+ +   struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +
+ +   struct vtn_sampled_image sampled;
+ +   struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
+ +   if (sampled_val->value_type == vtn_value_type_sampled_image) {
+ +      sampled = *sampled_val->sampled_image;
+ +   } else {
+ +      assert(sampled_val->value_type == vtn_value_type_access_chain);
+ +      sampled.image = NULL;
+ +      sampled.sampler = sampled_val->access_chain;
+ +   }
+ +
+ +   const struct glsl_type *image_type;
+ +   if (sampled.image) {
+ +      image_type = sampled.image->var->var->interface_type;
+ +   } else {
+ +      image_type = sampled.sampler->var->var->interface_type;
+ +   }
+ +
+ +   nir_tex_src srcs[8]; /* 8 should be enough */
+ +   nir_tex_src *p = srcs;
+ +
+ +   unsigned idx = 4;
+ +
+ +   bool has_coord = false;
+ +   switch (opcode) {
+ +   case SpvOpImageSampleImplicitLod:
+ +   case SpvOpImageSampleExplicitLod:
+ +   case SpvOpImageSampleDrefImplicitLod:
+ +   case SpvOpImageSampleDrefExplicitLod:
+ +   case SpvOpImageSampleProjImplicitLod:
+ +   case SpvOpImageSampleProjExplicitLod:
+ +   case SpvOpImageSampleProjDrefImplicitLod:
+ +   case SpvOpImageSampleProjDrefExplicitLod:
+ +   case SpvOpImageFetch:
+ +   case SpvOpImageGather:
+ +   case SpvOpImageDrefGather:
+ +   case SpvOpImageQueryLod: {
+ +      /* All these types have the coordinate as their first real argument */
+ +      struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
+ +      has_coord = true;
+ +      p->src = nir_src_for_ssa(coord->def);
+ +      p->src_type = nir_tex_src_coord;
+ +      p++;
+ +      break;
+ +   }
+ +
+ +   default:
+ +      break;
+ +   }
+ +
+ +   /* These all have an explicit depth value as their next source */
+ +   switch (opcode) {
+ +   case SpvOpImageSampleDrefImplicitLod:
+ +   case SpvOpImageSampleDrefExplicitLod:
+ +   case SpvOpImageSampleProjDrefImplicitLod:
+ +   case SpvOpImageSampleProjDrefExplicitLod:
+ +      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
+ +      break;
+ +   default:
+ +      break;
+ +   }
+ +
+ +   /* For OpImageQuerySizeLod, we always have an LOD */
+ +   if (opcode == SpvOpImageQuerySizeLod)
+ +      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
+ +
+ +   /* Figure out the base texture operation */
+ +   nir_texop texop;
+ +   switch (opcode) {
+ +   case SpvOpImageSampleImplicitLod:
+ +   case SpvOpImageSampleDrefImplicitLod:
+ +   case SpvOpImageSampleProjImplicitLod:
+ +   case SpvOpImageSampleProjDrefImplicitLod:
+ +      texop = nir_texop_tex;
+ +      break;
+ +
+ +   case SpvOpImageSampleExplicitLod:
+ +   case SpvOpImageSampleDrefExplicitLod:
+ +   case SpvOpImageSampleProjExplicitLod:
+ +   case SpvOpImageSampleProjDrefExplicitLod:
+ +      texop = nir_texop_txl;
+ +      break;
+ +
+ +   case SpvOpImageFetch:
+ +      if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
+ +         texop = nir_texop_txf_ms;
+ +      } else {
+ +         texop = nir_texop_txf;
+ +      }
+ +      break;
+ +
+ +   case SpvOpImageGather:
+ +   case SpvOpImageDrefGather:
+ +      texop = nir_texop_tg4;
+ +      break;
+ +
+ +   case SpvOpImageQuerySizeLod:
+ +   case SpvOpImageQuerySize:
+ +      texop = nir_texop_txs;
+ +      break;
+ +
+ +   case SpvOpImageQueryLod:
+ +      texop = nir_texop_lod;
+ +      break;
+ +
+ +   case SpvOpImageQueryLevels:
+ +      texop = nir_texop_query_levels;
+ +      break;
+ +
+ +   case SpvOpImageQuerySamples:
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +
+ +   /* Now we need to handle some number of optional arguments */
+ +   if (idx < count) {
+ +      uint32_t operands = w[idx++];
+ +
+ +      if (operands & SpvImageOperandsBiasMask) {
+ +         assert(texop == nir_texop_tex);
+ +         texop = nir_texop_txb;
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias);
+ +      }
+ +
+ +      if (operands & SpvImageOperandsLodMask) {
+ +         assert(texop == nir_texop_txl || texop == nir_texop_txf ||
+ +                texop == nir_texop_txf_ms || texop == nir_texop_txs);
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
+ +      }
+ +
+ +      if (operands & SpvImageOperandsGradMask) {
+ +         assert(texop == nir_texop_tex);
+ +         texop = nir_texop_txd;
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
+ +      }
+ +
+ +      if (operands & SpvImageOperandsOffsetMask ||
+ +          operands & SpvImageOperandsConstOffsetMask)
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
+ +
+ +      if (operands & SpvImageOperandsConstOffsetsMask)
+ +         assert(!"Constant offsets to texture gather not yet implemented");
+ +
+ +      if (operands & SpvImageOperandsSampleMask) {
+ +         assert(texop == nir_texop_txf_ms);
+ +         texop = nir_texop_txf_ms;
+ +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
+ +      }
+ +   }
+ +   /* We should have now consumed exactly all of the arguments */
+ +   assert(idx == count);
+ +
+ +   nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
+ +   instr->op = texop;
+ +
+ +   memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
+ +
+ +   instr->sampler_dim = glsl_get_sampler_dim(image_type);
+ +   instr->is_array = glsl_sampler_type_is_array(image_type);
+ +   instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
+ +   instr->is_new_style_shadow = instr->is_shadow;
+ +
+ +   if (has_coord) {
+ +      switch (instr->sampler_dim) {
+ +      case GLSL_SAMPLER_DIM_1D:
+ +      case GLSL_SAMPLER_DIM_BUF:
+ +         instr->coord_components = 1;
+ +         break;
+ +      case GLSL_SAMPLER_DIM_2D:
+ +      case GLSL_SAMPLER_DIM_RECT:
+ +      case GLSL_SAMPLER_DIM_MS:
+ +         instr->coord_components = 2;
+ +         break;
+ +      case GLSL_SAMPLER_DIM_3D:
+ +      case GLSL_SAMPLER_DIM_CUBE:
+ +         instr->coord_components = 3;
+ +         break;
+ +      default:
+ +         assert("Invalid sampler type");
+ +      }
+ +
+ +      if (instr->is_array)
+ +         instr->coord_components++;
+ +   } else {
+ +      instr->coord_components = 0;
+ +   }
+ +
+ +   switch (glsl_get_sampler_result_type(image_type)) {
+ +   case GLSL_TYPE_FLOAT:   instr->dest_type = nir_type_float;     break;
+ +   case GLSL_TYPE_INT:     instr->dest_type = nir_type_int;       break;
+ +   case GLSL_TYPE_UINT:    instr->dest_type = nir_type_uint;  break;
+ +   case GLSL_TYPE_BOOL:    instr->dest_type = nir_type_bool;      break;
+ +   default:
+ +      unreachable("Invalid base type for sampler result");
+ +   }
+ +
+ +   nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler);
+ +   if (sampled.image) {
+ +      nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image);
+ +      instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref));
+ +   } else {
+ +      instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
+ +   }
+ +
+ +   switch (instr->op) {
+ +   case nir_texop_tex:
+ +   case nir_texop_txb:
+ +   case nir_texop_txl:
+ +   case nir_texop_txd:
+ +      /* These operations require a sampler */
+ +      instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
+ +      break;
+ +   case nir_texop_txf:
+ +   case nir_texop_txf_ms:
+ +   case nir_texop_txs:
+ +   case nir_texop_lod:
+ +   case nir_texop_tg4:
+ +   case nir_texop_query_levels:
+ +   case nir_texop_texture_samples:
+ +   case nir_texop_samples_identical:
+ +      /* These don't */
+ +      instr->sampler = NULL;
+ +      break;
+ +   }
+ +
+ +   nir_ssa_dest_init(&instr->instr, &instr->dest,
-       nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
++                     nir_tex_instr_dest_size(instr), 32, NULL);
+ +
+ +   assert(glsl_get_vector_elements(ret_type->type) ==
+ +          nir_tex_instr_dest_size(instr));
+ +
+ +   val->ssa = vtn_create_ssa_value(b, ret_type->type);
+ +   val->ssa->def = &instr->dest.ssa;
+ +
+ +   nir_builder_instr_insert(&b->nb, &instr->instr);
+ +}
+ +
+ +static nir_ssa_def *
+ +get_image_coord(struct vtn_builder *b, uint32_t value)
+ +{
+ +   struct vtn_ssa_value *coord = vtn_ssa_value(b, value);
+ +
+ +   /* The image_load_store intrinsics assume a 4-dim coordinate */
+ +   unsigned dim = glsl_get_vector_elements(coord->type);
+ +   unsigned swizzle[4];
+ +   for (unsigned i = 0; i < 4; i++)
+ +      swizzle[i] = MIN2(i, dim - 1);
+ +
+ +   return nir_swizzle(&b->nb, coord->def, swizzle, 4, false);
+ +}
+ +
+ +static void
+ +vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
+ +                 const uint32_t *w, unsigned count)
+ +{
+ +   /* Just get this one out of the way */
+ +   if (opcode == SpvOpImageTexelPointer) {
+ +      struct vtn_value *val =
+ +         vtn_push_value(b, w[2], vtn_value_type_image_pointer);
+ +      val->image = ralloc(b, struct vtn_image_pointer);
+ +
+ +      val->image->image =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +      val->image->coord = get_image_coord(b, w[4]);
+ +      val->image->sample = vtn_ssa_value(b, w[5])->def;
+ +      return;
+ +   }
+ +
+ +   struct vtn_image_pointer image;
+ +
+ +   switch (opcode) {
+ +   case SpvOpAtomicExchange:
+ +   case SpvOpAtomicCompareExchange:
+ +   case SpvOpAtomicCompareExchangeWeak:
+ +   case SpvOpAtomicIIncrement:
+ +   case SpvOpAtomicIDecrement:
+ +   case SpvOpAtomicIAdd:
+ +   case SpvOpAtomicISub:
+ +   case SpvOpAtomicSMin:
+ +   case SpvOpAtomicUMin:
+ +   case SpvOpAtomicSMax:
+ +   case SpvOpAtomicUMax:
+ +   case SpvOpAtomicAnd:
+ +   case SpvOpAtomicOr:
+ +   case SpvOpAtomicXor:
+ +      image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
+ +      break;
+ +
+ +   case SpvOpImageQuerySize:
+ +      image.image =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +      image.coord = NULL;
+ +      image.sample = NULL;
+ +      break;
+ +
+ +   case SpvOpImageRead:
+ +      image.image =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +      image.coord = get_image_coord(b, w[4]);
+ +
+ +      if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) {
+ +         assert(w[5] == SpvImageOperandsSampleMask);
+ +         image.sample = vtn_ssa_value(b, w[6])->def;
+ +      } else {
+ +         image.sample = nir_ssa_undef(&b->nb, 1);
+ +      }
+ +      break;
+ +
+ +   case SpvOpImageWrite:
+ +      image.image =
+ +         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
+ +      image.coord = get_image_coord(b, w[2]);
+ +
+ +      /* texel = w[3] */
+ +
+ +      if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) {
+ +         assert(w[4] == SpvImageOperandsSampleMask);
+ +         image.sample = vtn_ssa_value(b, w[5])->def;
+ +      } else {
+ +         image.sample = nir_ssa_undef(&b->nb, 1);
+ +      }
+ +      break;
+ +
+ +   default:
+ +      unreachable("Invalid image opcode");
+ +   }
+ +
+ +   nir_intrinsic_op op;
+ +   switch (opcode) {
+ +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break;
+ +   OP(ImageQuerySize,         size)
+ +   OP(ImageRead,              load)
+ +   OP(ImageWrite,             store)
+ +   OP(AtomicExchange,         atomic_exchange)
+ +   OP(AtomicCompareExchange,  atomic_comp_swap)
+ +   OP(AtomicIIncrement,       atomic_add)
+ +   OP(AtomicIDecrement,       atomic_add)
+ +   OP(AtomicIAdd,             atomic_add)
+ +   OP(AtomicISub,             atomic_add)
+ +   OP(AtomicSMin,             atomic_min)
+ +   OP(AtomicUMin,             atomic_min)
+ +   OP(AtomicSMax,             atomic_max)
+ +   OP(AtomicUMax,             atomic_max)
+ +   OP(AtomicAnd,              atomic_and)
+ +   OP(AtomicOr,               atomic_or)
+ +   OP(AtomicXor,              atomic_xor)
+ +#undef OP
+ +   default:
+ +      unreachable("Invalid image opcode");
+ +   }
+ +
+ +   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
+ +
+ +   nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image);
+ +   intrin->variables[0] =
+ +      nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref));
+ +
+ +   /* ImageQuerySize doesn't take any extra parameters */
+ +   if (opcode != SpvOpImageQuerySize) {
+ +      /* The image coordinate is always 4 components but we may not have that
+ +       * many.  Swizzle to compensate.
+ +       */
+ +      unsigned swiz[4];
+ +      for (unsigned i = 0; i < 4; i++)
+ +         swiz[i] = i < image.coord->num_components ? i : 0;
+ +      intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord,
+ +                                                   swiz, 4, false));
+ +      intrin->src[1] = nir_src_for_ssa(image.sample);
+ +   }
+ +
+ +   switch (opcode) {
+ +   case SpvOpImageQuerySize:
+ +   case SpvOpImageRead:
+ +      break;
+ +   case SpvOpImageWrite:
+ +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
+ +      break;
+ +   case SpvOpAtomicIIncrement:
+ +      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
+ +      break;
+ +   case SpvOpAtomicIDecrement:
+ +      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
+ +      break;
+ +
+ +   case SpvOpAtomicExchange:
+ +   case SpvOpAtomicIAdd:
+ +   case SpvOpAtomicSMin:
+ +   case SpvOpAtomicUMin:
+ +   case SpvOpAtomicSMax:
+ +   case SpvOpAtomicUMax:
+ +   case SpvOpAtomicAnd:
+ +   case SpvOpAtomicOr:
+ +   case SpvOpAtomicXor:
+ +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ +      break;
+ +
+ +   case SpvOpAtomicCompareExchange:
+ +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
+ +      intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ +      break;
+ +
+ +   case SpvOpAtomicISub:
+ +      intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
+ +      break;
+ +
+ +   default:
+ +      unreachable("Invalid image opcode");
+ +   }
+ +
+ +   if (opcode != SpvOpImageWrite) {
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-    nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
++      nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
+ +
+ +      nir_builder_instr_insert(&b->nb, &intrin->instr);
+ +
+ +      /* The image intrinsics always return 4 channels but we may not want
+ +       * that many.  Emit a mov to trim it down.
+ +       */
+ +      unsigned swiz[4] = {0, 1, 2, 3};
+ +      val->ssa = vtn_create_ssa_value(b, type->type);
+ +      val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz,
+ +                                  glsl_get_vector_elements(type->type), false);
+ +   } else {
+ +      nir_builder_instr_insert(&b->nb, &intrin->instr);
+ +   }
+ +}
+ +
+ +static nir_intrinsic_op
+ +get_ssbo_nir_atomic_op(SpvOp opcode)
+ +{
+ +   switch (opcode) {
+ +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N;
+ +   OP(AtomicExchange,         atomic_exchange)
+ +   OP(AtomicCompareExchange,  atomic_comp_swap)
+ +   OP(AtomicIIncrement,       atomic_add)
+ +   OP(AtomicIDecrement,       atomic_add)
+ +   OP(AtomicIAdd,             atomic_add)
+ +   OP(AtomicISub,             atomic_add)
+ +   OP(AtomicSMin,             atomic_imin)
+ +   OP(AtomicUMin,             atomic_umin)
+ +   OP(AtomicSMax,             atomic_imax)
+ +   OP(AtomicUMax,             atomic_umax)
+ +   OP(AtomicAnd,              atomic_and)
+ +   OP(AtomicOr,               atomic_or)
+ +   OP(AtomicXor,              atomic_xor)
+ +#undef OP
+ +   default:
+ +      unreachable("Invalid SSBO atomic");
+ +   }
+ +}
+ +
+ +static nir_intrinsic_op
+ +get_shared_nir_atomic_op(SpvOp opcode)
+ +{
+ +   switch (opcode) {
+ +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N;
+ +   OP(AtomicExchange,         atomic_exchange)
+ +   OP(AtomicCompareExchange,  atomic_comp_swap)
+ +   OP(AtomicIIncrement,       atomic_add)
+ +   OP(AtomicIDecrement,       atomic_add)
+ +   OP(AtomicIAdd,             atomic_add)
+ +   OP(AtomicISub,             atomic_add)
+ +   OP(AtomicSMin,             atomic_imin)
+ +   OP(AtomicUMin,             atomic_umin)
+ +   OP(AtomicSMax,             atomic_imax)
+ +   OP(AtomicUMax,             atomic_umax)
+ +   OP(AtomicAnd,              atomic_and)
+ +   OP(AtomicOr,               atomic_or)
+ +   OP(AtomicXor,              atomic_xor)
+ +#undef OP
+ +   default:
+ +      unreachable("Invalid shared atomic");
+ +   }
+ +}
+ +
+ +static void
+ +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
+ +                           const uint32_t *w, nir_src *src)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpAtomicIIncrement:
+ +      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
+ +      break;
+ +
+ +   case SpvOpAtomicIDecrement:
+ +      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
+ +      break;
+ +
+ +   case SpvOpAtomicISub:
+ +      src[0] =
+ +         nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
+ +      break;
+ +
+ +   case SpvOpAtomicCompareExchange:
+ +      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
+ +      src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+ +      break;
+ +      /* Fall through */
+ +
+ +   case SpvOpAtomicExchange:
+ +   case SpvOpAtomicIAdd:
+ +   case SpvOpAtomicSMin:
+ +   case SpvOpAtomicUMin:
+ +   case SpvOpAtomicSMax:
+ +   case SpvOpAtomicUMax:
+ +   case SpvOpAtomicAnd:
+ +   case SpvOpAtomicOr:
+ +   case SpvOpAtomicXor:
+ +      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ +      break;
+ +
+ +   default:
+ +      unreachable("Invalid SPIR-V atomic");
+ +   }
+ +}
+ +
+ +static void
+ +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
+ +                                 const uint32_t *w, unsigned count)
+ +{
+ +   struct vtn_access_chain *chain =
+ +      vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +   nir_intrinsic_instr *atomic;
+ +
+ +   /*
+ +   SpvScope scope = w[4];
+ +   SpvMemorySemanticsMask semantics = w[5];
+ +   */
+ +
+ +   if (chain->var->mode == vtn_variable_mode_workgroup) {
+ +      nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref;
+ +      nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
+ +      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
+ +      atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref));
+ +      fill_common_atomic_sources(b, opcode, w, &atomic->src[0]);
+ +   } else {
+ +      assert(chain->var->mode == vtn_variable_mode_ssbo);
+ +      struct vtn_type *type;
+ +      nir_ssa_def *offset, *index;
+ +      offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false);
+ +
+ +      nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
+ +
+ +      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
+ +      atomic->src[0] = nir_src_for_ssa(index);
+ +      atomic->src[1] = nir_src_for_ssa(offset);
+ +      fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
+ +   }
+ +
- create_vec(nir_shader *shader, unsigned num_components)
++   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
+ +
+ +   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +   val->ssa = rzalloc(b, struct vtn_ssa_value);
+ +   val->ssa->def = &atomic->dest.ssa;
+ +   val->ssa->type = type->type;
+ +
+ +   nir_builder_instr_insert(&b->nb, &atomic->instr);
+ +}
+ +
+ +static nir_alu_instr *
-    nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
++create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
+ +{
+ +   nir_op op;
+ +   switch (num_components) {
+ +   case 1: op = nir_op_fmov; break;
+ +   case 2: op = nir_op_vec2; break;
+ +   case 3: op = nir_op_vec3; break;
+ +   case 4: op = nir_op_vec4; break;
+ +   default: unreachable("bad vector size");
+ +   }
+ +
+ +   nir_alu_instr *vec = nir_alu_instr_create(shader, op);
-                                       glsl_get_matrix_columns(src->type));
++   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
++                     bit_size, NULL);
+ +   vec->dest.write_mask = (1 << num_components) - 1;
+ +
+ +   return vec;
+ +}
+ +
+ +struct vtn_ssa_value *
+ +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
+ +{
+ +   if (src->transposed)
+ +      return src->transposed;
+ +
+ +   struct vtn_ssa_value *dest =
+ +      vtn_create_ssa_value(b, glsl_transposed_type(src->type));
+ +
+ +   for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
+ +      nir_alu_instr *vec = create_vec(b->shader,
-    nir_alu_instr *vec = create_vec(b->shader, src->num_components);
++                                      glsl_get_matrix_columns(src->type),
++                                      glsl_get_bit_size(glsl_get_base_type(src->type)));
+ +      if (glsl_type_is_vector_or_scalar(src->type)) {
+ +          vec->src[0].src = nir_src_for_ssa(src->def);
+ +          vec->src[0].swizzle[0] = i;
+ +      } else {
+ +         for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
+ +            vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
+ +            vec->src[j].swizzle[0] = i;
+ +         }
+ +      }
+ +      nir_builder_instr_insert(&b->nb, &vec->instr);
+ +      dest->elems[i]->def = &vec->dest.dest.ssa;
+ +   }
+ +
+ +   dest->transposed = src;
+ +
+ +   return dest;
+ +}
+ +
+ +nir_ssa_def *
+ +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
+ +{
+ +   unsigned swiz[4] = { index };
+ +   return nir_swizzle(&b->nb, src, swiz, 1, true);
+ +}
+ +
+ +nir_ssa_def *
+ +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
+ +                  unsigned index)
+ +{
-    nir_alu_instr *vec = create_vec(b->shader, num_components);
++   nir_alu_instr *vec = create_vec(b->shader, src->num_components,
++                                   src->bit_size);
+ +
+ +   for (unsigned i = 0; i < src->num_components; i++) {
+ +      if (i == index) {
+ +         vec->src[i].src = nir_src_for_ssa(insert);
+ +      } else {
+ +         vec->src[i].src = nir_src_for_ssa(src);
+ +         vec->src[i].swizzle[0] = i;
+ +      }
+ +   }
+ +
+ +   nir_builder_instr_insert(&b->nb, &vec->instr);
+ +
+ +   return &vec->dest.dest.ssa;
+ +}
+ +
+ +nir_ssa_def *
+ +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+ +                           nir_ssa_def *index)
+ +{
+ +   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
+ +   for (unsigned i = 1; i < src->num_components; i++)
+ +      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ +                       vtn_vector_extract(b, src, i), dest);
+ +
+ +   return dest;
+ +}
+ +
+ +nir_ssa_def *
+ +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+ +                          nir_ssa_def *insert, nir_ssa_def *index)
+ +{
+ +   nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
+ +   for (unsigned i = 1; i < src->num_components; i++)
+ +      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ +                       vtn_vector_insert(b, src, insert, i), dest);
+ +
+ +   return dest;
+ +}
+ +
+ +static nir_ssa_def *
+ +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
+ +                   nir_ssa_def *src0, nir_ssa_def *src1,
+ +                   const uint32_t *indices)
+ +{
-    nir_alu_instr *vec = create_vec(b->shader, num_components);
++   nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
+ +
+ +   nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
+ +   nir_builder_instr_insert(&b->nb, &undef->instr);
+ +
+ +   for (unsigned i = 0; i < num_components; i++) {
+ +      uint32_t index = indices[i];
+ +      if (index == 0xffffffff) {
+ +         vec->src[i].src = nir_src_for_ssa(&undef->def);
+ +      } else if (index < src0->num_components) {
+ +         vec->src[i].src = nir_src_for_ssa(src0);
+ +         vec->src[i].swizzle[0] = index;
+ +      } else {
+ +         vec->src[i].src = nir_src_for_ssa(src1);
+ +         vec->src[i].swizzle[0] = index - src0->num_components;
+ +      }
+ +   }
+ +
+ +   nir_builder_instr_insert(&b->nb, &vec->instr);
+ +
+ +   return &vec->dest.dest.ssa;
+ +}
+ +
+ +/*
+ + * Concatentates a number of vectors/scalars together to produce a vector
+ + */
+ +static nir_ssa_def *
+ +vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
+ +                     unsigned num_srcs, nir_ssa_def **srcs)
+ +{
++   nir_alu_instr *vec = create_vec(b->shader, num_components,
++                                   srcs[0]->bit_size);
+ +
+ +   unsigned dest_idx = 0;
+ +   for (unsigned i = 0; i < num_srcs; i++) {
+ +      nir_ssa_def *src = srcs[i];
+ +      for (unsigned j = 0; j < src->num_components; j++) {
+ +         vec->src[dest_idx].src = nir_src_for_ssa(src);
+ +         vec->src[dest_idx].swizzle[0] = j;
+ +         dest_idx++;
+ +      }
+ +   }
+ +
+ +   nir_builder_instr_insert(&b->nb, &vec->instr);
+ +
+ +   return &vec->dest.dest.ssa;
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
+ +{
+ +   struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
+ +   dest->type = src->type;
+ +
+ +   if (glsl_type_is_vector_or_scalar(src->type)) {
+ +      dest->def = src->def;
+ +   } else {
+ +      unsigned elems = glsl_get_length(src->type);
+ +
+ +      dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
+ +      for (unsigned i = 0; i < elems; i++)
+ +         dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
+ +   }
+ +
+ +   return dest;
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
+ +                     struct vtn_ssa_value *insert, const uint32_t *indices,
+ +                     unsigned num_indices)
+ +{
+ +   struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
+ +
+ +   struct vtn_ssa_value *cur = dest;
+ +   unsigned i;
+ +   for (i = 0; i < num_indices - 1; i++) {
+ +      cur = cur->elems[indices[i]];
+ +   }
+ +
+ +   if (glsl_type_is_vector_or_scalar(cur->type)) {
+ +      /* According to the SPIR-V spec, OpCompositeInsert may work down to
+ +       * the component granularity. In that case, the last index will be
+ +       * the index to insert the scalar into the vector.
+ +       */
+ +
+ +      cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
+ +   } else {
+ +      cur->elems[indices[i]] = insert;
+ +   }
+ +
+ +   return dest;
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
+ +                      const uint32_t *indices, unsigned num_indices)
+ +{
+ +   struct vtn_ssa_value *cur = src;
+ +   for (unsigned i = 0; i < num_indices; i++) {
+ +      if (glsl_type_is_vector_or_scalar(cur->type)) {
+ +         assert(i == num_indices - 1);
+ +         /* According to the SPIR-V spec, OpCompositeExtract may work down to
+ +          * the component granularity. The last index will be the index of the
+ +          * vector to extract.
+ +          */
+ +
+ +         struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
+ +         ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
+ +         ret->def = vtn_vector_extract(b, cur->def, indices[i]);
+ +         return ret;
+ +      } else {
+ +         cur = cur->elems[indices[i]];
+ +      }
+ +   }
+ +
+ +   return cur;
+ +}
+ +
+ +static void
+ +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
+ +                     const uint32_t *w, unsigned count)
+ +{
+ +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +   const struct glsl_type *type =
+ +      vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ +   val->ssa = vtn_create_ssa_value(b, type);
+ +
+ +   switch (opcode) {
+ +   case SpvOpVectorExtractDynamic:
+ +      val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
+ +                                                 vtn_ssa_value(b, w[4])->def);
+ +      break;
+ +
+ +   case SpvOpVectorInsertDynamic:
+ +      val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
+ +                                                vtn_ssa_value(b, w[4])->def,
+ +                                                vtn_ssa_value(b, w[5])->def);
+ +      break;
+ +
+ +   case SpvOpVectorShuffle:
+ +      val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
+ +                                         vtn_ssa_value(b, w[3])->def,
+ +                                         vtn_ssa_value(b, w[4])->def,
+ +                                         w + 5);
+ +      break;
+ +
+ +   case SpvOpCompositeConstruct: {
+ +      unsigned elems = count - 3;
+ +      if (glsl_type_is_vector_or_scalar(type)) {
+ +         nir_ssa_def *srcs[4];
+ +         for (unsigned i = 0; i < elems; i++)
+ +            srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
+ +         val->ssa->def =
+ +            vtn_vector_construct(b, glsl_get_vector_elements(type),
+ +                                 elems, srcs);
+ +      } else {
+ +         val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ +         for (unsigned i = 0; i < elems; i++)
+ +            val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
+ +      }
+ +      break;
+ +   }
+ +   case SpvOpCompositeExtract:
+ +      val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
+ +                                       w + 4, count - 4);
+ +      break;
+ +
+ +   case SpvOpCompositeInsert:
+ +      val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
+ +                                      vtn_ssa_value(b, w[3]),
+ +                                      w + 5, count - 5);
+ +      break;
+ +
+ +   case SpvOpCopyObject:
+ +      val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
+ +      break;
+ +
+ +   default:
+ +      unreachable("unknown composite operation");
+ +   }
+ +}
+ +
+ +static void
+ +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
+ +                   const uint32_t *w, unsigned count)
+ +{
+ +   nir_intrinsic_op intrinsic_op;
+ +   switch (opcode) {
+ +   case SpvOpEmitVertex:
+ +   case SpvOpEmitStreamVertex:
+ +      intrinsic_op = nir_intrinsic_emit_vertex;
+ +      break;
+ +   case SpvOpEndPrimitive:
+ +   case SpvOpEndStreamPrimitive:
+ +      intrinsic_op = nir_intrinsic_end_primitive;
+ +      break;
+ +   case SpvOpMemoryBarrier:
+ +      intrinsic_op = nir_intrinsic_memory_barrier;
+ +      break;
+ +   case SpvOpControlBarrier:
+ +      intrinsic_op = nir_intrinsic_barrier;
+ +      break;
+ +   default:
+ +      unreachable("unknown barrier instruction");
+ +   }
+ +
+ +   nir_intrinsic_instr *intrin =
+ +      nir_intrinsic_instr_create(b->shader, intrinsic_op);
+ +
+ +   if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
+ +      nir_intrinsic_set_stream_id(intrin, w[1]);
+ +
+ +   nir_builder_instr_insert(&b->nb, &intrin->instr);
+ +}
+ +
+ +static unsigned
+ +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
+ +{
+ +   switch (mode) {
+ +   case SpvExecutionModeInputPoints:
+ +   case SpvExecutionModeOutputPoints:
+ +      return 0; /* GL_POINTS */
+ +   case SpvExecutionModeInputLines:
+ +      return 1; /* GL_LINES */
+ +   case SpvExecutionModeInputLinesAdjacency:
+ +      return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
+ +   case SpvExecutionModeTriangles:
+ +      return 4; /* GL_TRIANGLES */
+ +   case SpvExecutionModeInputTrianglesAdjacency:
+ +      return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
+ +   case SpvExecutionModeQuads:
+ +      return 7; /* GL_QUADS */
+ +   case SpvExecutionModeIsolines:
+ +      return 0x8E7A; /* GL_ISOLINES */
+ +   case SpvExecutionModeOutputLineStrip:
+ +      return 3; /* GL_LINE_STRIP */
+ +   case SpvExecutionModeOutputTriangleStrip:
+ +      return 5; /* GL_TRIANGLE_STRIP */
+ +   default:
+ +      assert(!"Invalid primitive type");
+ +      return 4;
+ +   }
+ +}
+ +
+ +static unsigned
+ +vertices_in_from_spv_execution_mode(SpvExecutionMode mode)
+ +{
+ +   switch (mode) {
+ +   case SpvExecutionModeInputPoints:
+ +      return 1;
+ +   case SpvExecutionModeInputLines:
+ +      return 2;
+ +   case SpvExecutionModeInputLinesAdjacency:
+ +      return 4;
+ +   case SpvExecutionModeTriangles:
+ +      return 3;
+ +   case SpvExecutionModeInputTrianglesAdjacency:
+ +      return 6;
+ +   default:
+ +      assert(!"Invalid GS input mode");
+ +      return 0;
+ +   }
+ +}
+ +
+ +static gl_shader_stage
+ +stage_for_execution_model(SpvExecutionModel model)
+ +{
+ +   switch (model) {
+ +   case SpvExecutionModelVertex:
+ +      return MESA_SHADER_VERTEX;
+ +   case SpvExecutionModelTessellationControl:
+ +      return MESA_SHADER_TESS_CTRL;
+ +   case SpvExecutionModelTessellationEvaluation:
+ +      return MESA_SHADER_TESS_EVAL;
+ +   case SpvExecutionModelGeometry:
+ +      return MESA_SHADER_GEOMETRY;
+ +   case SpvExecutionModelFragment:
+ +      return MESA_SHADER_FRAGMENT;
+ +   case SpvExecutionModelGLCompute:
+ +      return MESA_SHADER_COMPUTE;
+ +   default:
+ +      unreachable("Unsupported execution model");
+ +   }
+ +}
+ +
+ +static bool
+ +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
+ +                                const uint32_t *w, unsigned count)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpSource:
+ +   case SpvOpSourceExtension:
+ +   case SpvOpSourceContinued:
+ +   case SpvOpExtension:
+ +      /* Unhandled, but these are for debug so that's ok. */
+ +      break;
+ +
+ +   case SpvOpCapability: {
+ +      SpvCapability cap = w[1];
+ +      switch (cap) {
+ +      case SpvCapabilityMatrix:
+ +      case SpvCapabilityShader:
+ +      case SpvCapabilityGeometry:
+ +      case SpvCapabilityTessellationPointSize:
+ +      case SpvCapabilityGeometryPointSize:
+ +      case SpvCapabilityUniformBufferArrayDynamicIndexing:
+ +      case SpvCapabilitySampledImageArrayDynamicIndexing:
+ +      case SpvCapabilityStorageBufferArrayDynamicIndexing:
+ +      case SpvCapabilityStorageImageArrayDynamicIndexing:
+ +      case SpvCapabilityImageRect:
+ +      case SpvCapabilitySampledRect:
+ +      case SpvCapabilitySampled1D:
+ +      case SpvCapabilityImage1D:
+ +      case SpvCapabilitySampledCubeArray:
+ +      case SpvCapabilitySampledBuffer:
+ +      case SpvCapabilityImageBuffer:
+ +      case SpvCapabilityImageQuery:
+ +         break;
+ +      case SpvCapabilityClipDistance:
+ +      case SpvCapabilityCullDistance:
+ +      case SpvCapabilityGeometryStreams:
+ +         fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n");
+ +         break;
+ +      default:
+ +         assert(!"Unsupported capability");
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpExtInstImport:
+ +      vtn_handle_extension(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpMemoryModel:
+ +      assert(w[1] == SpvAddressingModelLogical);
+ +      assert(w[2] == SpvMemoryModelGLSL450);
+ +      break;
+ +
+ +   case SpvOpEntryPoint: {
+ +      struct vtn_value *entry_point = &b->values[w[2]];
+ +      /* Let this be a name label regardless */
+ +      unsigned name_words;
+ +      entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
+ +
+ +      if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
+ +          stage_for_execution_model(w[1]) != b->entry_point_stage)
+ +         break;
+ +
+ +      assert(b->entry_point == NULL);
+ +      b->entry_point = entry_point;
+ +      break;
+ +   }
+ +
+ +   case SpvOpString:
+ +      vtn_push_value(b, w[1], vtn_value_type_string)->str =
+ +         vtn_string_literal(b, &w[2], count - 2, NULL);
+ +      break;
+ +
+ +   case SpvOpName:
+ +      b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
+ +      break;
+ +
+ +   case SpvOpMemberName:
+ +      /* TODO */
+ +      break;
+ +
+ +   case SpvOpExecutionMode:
+ +   case SpvOpDecorationGroup:
+ +   case SpvOpDecorate:
+ +   case SpvOpMemberDecorate:
+ +   case SpvOpGroupDecorate:
+ +   case SpvOpGroupMemberDecorate:
+ +      vtn_handle_decoration(b, opcode, w, count);
+ +      break;
+ +
+ +   default:
+ +      return false; /* End of preamble */
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
+ +                          const struct vtn_decoration *mode, void *data)
+ +{
+ +   assert(b->entry_point == entry_point);
+ +
+ +   switch(mode->exec_mode) {
+ +   case SpvExecutionModeOriginUpperLeft:
+ +   case SpvExecutionModeOriginLowerLeft:
+ +      b->origin_upper_left =
+ +         (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
+ +      break;
+ +
+ +   case SpvExecutionModeEarlyFragmentTests:
+ +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ +      b->shader->info.fs.early_fragment_tests = true;
+ +      break;
+ +
+ +   case SpvExecutionModeInvocations:
+ +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ +      b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
+ +      break;
+ +
+ +   case SpvExecutionModeDepthReplacing:
+ +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+ +      break;
+ +   case SpvExecutionModeDepthGreater:
+ +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
+ +      break;
+ +   case SpvExecutionModeDepthLess:
+ +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
+ +      break;
+ +   case SpvExecutionModeDepthUnchanged:
+ +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+ +      break;
+ +
+ +   case SpvExecutionModeLocalSize:
+ +      assert(b->shader->stage == MESA_SHADER_COMPUTE);
+ +      b->shader->info.cs.local_size[0] = mode->literals[0];
+ +      b->shader->info.cs.local_size[1] = mode->literals[1];
+ +      b->shader->info.cs.local_size[2] = mode->literals[2];
+ +      break;
+ +   case SpvExecutionModeLocalSizeHint:
+ +      break; /* Nothing do do with this */
+ +
+ +   case SpvExecutionModeOutputVertices:
+ +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ +      b->shader->info.gs.vertices_out = mode->literals[0];
+ +      break;
+ +
+ +   case SpvExecutionModeInputPoints:
+ +   case SpvExecutionModeInputLines:
+ +   case SpvExecutionModeInputLinesAdjacency:
+ +   case SpvExecutionModeTriangles:
+ +   case SpvExecutionModeInputTrianglesAdjacency:
+ +   case SpvExecutionModeQuads:
+ +   case SpvExecutionModeIsolines:
+ +      if (b->shader->stage == MESA_SHADER_GEOMETRY) {
+ +         b->shader->info.gs.vertices_in =
+ +            vertices_in_from_spv_execution_mode(mode->exec_mode);
+ +      } else {
+ +         assert(!"Tesselation shaders not yet supported");
+ +      }
+ +      break;
+ +
+ +   case SpvExecutionModeOutputPoints:
+ +   case SpvExecutionModeOutputLineStrip:
+ +   case SpvExecutionModeOutputTriangleStrip:
+ +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ +      b->shader->info.gs.output_primitive =
+ +         gl_primitive_from_spv_execution_mode(mode->exec_mode);
+ +      break;
+ +
+ +   case SpvExecutionModeSpacingEqual:
+ +   case SpvExecutionModeSpacingFractionalEven:
+ +   case SpvExecutionModeSpacingFractionalOdd:
+ +   case SpvExecutionModeVertexOrderCw:
+ +   case SpvExecutionModeVertexOrderCcw:
+ +   case SpvExecutionModePointMode:
+ +      assert(!"TODO: Add tessellation metadata");
+ +      break;
+ +
+ +   case SpvExecutionModePixelCenterInteger:
+ +   case SpvExecutionModeXfb:
+ +      assert(!"Unhandled execution mode");
+ +      break;
+ +
+ +   case SpvExecutionModeVecTypeHint:
+ +   case SpvExecutionModeContractionOff:
+ +      break; /* OpenCL */
+ +   }
+ +}
+ +
+ +static bool
+ +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
+ +                                        const uint32_t *w, unsigned count)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpSource:
+ +   case SpvOpSourceContinued:
+ +   case SpvOpSourceExtension:
+ +   case SpvOpExtension:
+ +   case SpvOpCapability:
+ +   case SpvOpExtInstImport:
+ +   case SpvOpMemoryModel:
+ +   case SpvOpEntryPoint:
+ +   case SpvOpExecutionMode:
+ +   case SpvOpString:
+ +   case SpvOpName:
+ +   case SpvOpMemberName:
+ +   case SpvOpDecorationGroup:
+ +   case SpvOpDecorate:
+ +   case SpvOpMemberDecorate:
+ +   case SpvOpGroupDecorate:
+ +   case SpvOpGroupMemberDecorate:
+ +      assert(!"Invalid opcode types and variables section");
+ +      break;
+ +
+ +   case SpvOpTypeVoid:
+ +   case SpvOpTypeBool:
+ +   case SpvOpTypeInt:
+ +   case SpvOpTypeFloat:
+ +   case SpvOpTypeVector:
+ +   case SpvOpTypeMatrix:
+ +   case SpvOpTypeImage:
+ +   case SpvOpTypeSampler:
+ +   case SpvOpTypeSampledImage:
+ +   case SpvOpTypeArray:
+ +   case SpvOpTypeRuntimeArray:
+ +   case SpvOpTypeStruct:
+ +   case SpvOpTypeOpaque:
+ +   case SpvOpTypePointer:
+ +   case SpvOpTypeFunction:
+ +   case SpvOpTypeEvent:
+ +   case SpvOpTypeDeviceEvent:
+ +   case SpvOpTypeReserveId:
+ +   case SpvOpTypeQueue:
+ +   case SpvOpTypePipe:
+ +      vtn_handle_type(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpConstantTrue:
+ +   case SpvOpConstantFalse:
+ +   case SpvOpConstant:
+ +   case SpvOpConstantComposite:
+ +   case SpvOpConstantSampler:
+ +   case SpvOpConstantNull:
+ +   case SpvOpSpecConstantTrue:
+ +   case SpvOpSpecConstantFalse:
+ +   case SpvOpSpecConstant:
+ +   case SpvOpSpecConstantComposite:
+ +   case SpvOpSpecConstantOp:
+ +      vtn_handle_constant(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpVariable:
+ +      vtn_handle_variables(b, opcode, w, count);
+ +      break;
+ +
+ +   default:
+ +      return false; /* End of preamble */
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +static bool
+ +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
+ +                            const uint32_t *w, unsigned count)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpLabel:
+ +      break;
+ +
+ +   case SpvOpLoopMerge:
+ +   case SpvOpSelectionMerge:
+ +      /* This is handled by cfg pre-pass and walk_blocks */
+ +      break;
+ +
+ +   case SpvOpUndef: {
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
+ +      val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ +      break;
+ +   }
+ +
+ +   case SpvOpExtInst:
+ +      vtn_handle_extension(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpVariable:
+ +   case SpvOpLoad:
+ +   case SpvOpStore:
+ +   case SpvOpCopyMemory:
+ +   case SpvOpCopyMemorySized:
+ +   case SpvOpAccessChain:
+ +   case SpvOpInBoundsAccessChain:
+ +   case SpvOpArrayLength:
+ +      vtn_handle_variables(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpFunctionCall:
+ +      vtn_handle_function_call(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpSampledImage:
+ +   case SpvOpImage:
+ +   case SpvOpImageSampleImplicitLod:
+ +   case SpvOpImageSampleExplicitLod:
+ +   case SpvOpImageSampleDrefImplicitLod:
+ +   case SpvOpImageSampleDrefExplicitLod:
+ +   case SpvOpImageSampleProjImplicitLod:
+ +   case SpvOpImageSampleProjExplicitLod:
+ +   case SpvOpImageSampleProjDrefImplicitLod:
+ +   case SpvOpImageSampleProjDrefExplicitLod:
+ +   case SpvOpImageFetch:
+ +   case SpvOpImageGather:
+ +   case SpvOpImageDrefGather:
+ +   case SpvOpImageQuerySizeLod:
+ +   case SpvOpImageQueryLod:
+ +   case SpvOpImageQueryLevels:
+ +   case SpvOpImageQuerySamples:
+ +      vtn_handle_texture(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpImageRead:
+ +   case SpvOpImageWrite:
+ +   case SpvOpImageTexelPointer:
+ +      vtn_handle_image(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpImageQuerySize: {
+ +      struct vtn_access_chain *image =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +      if (glsl_type_is_image(image->var->var->interface_type)) {
+ +         vtn_handle_image(b, opcode, w, count);
+ +      } else {
+ +         vtn_handle_texture(b, opcode, w, count);
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpAtomicExchange:
+ +   case SpvOpAtomicCompareExchange:
+ +   case SpvOpAtomicCompareExchangeWeak:
+ +   case SpvOpAtomicIIncrement:
+ +   case SpvOpAtomicIDecrement:
+ +   case SpvOpAtomicIAdd:
+ +   case SpvOpAtomicISub:
+ +   case SpvOpAtomicSMin:
+ +   case SpvOpAtomicUMin:
+ +   case SpvOpAtomicSMax:
+ +   case SpvOpAtomicUMax:
+ +   case SpvOpAtomicAnd:
+ +   case SpvOpAtomicOr:
+ +   case SpvOpAtomicXor: {
+ +      struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
+ +      if (pointer->value_type == vtn_value_type_image_pointer) {
+ +         vtn_handle_image(b, opcode, w, count);
+ +      } else {
+ +         assert(pointer->value_type == vtn_value_type_access_chain);
+ +         vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count);
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpSNegate:
+ +   case SpvOpFNegate:
+ +   case SpvOpNot:
+ +   case SpvOpAny:
+ +   case SpvOpAll:
+ +   case SpvOpConvertFToU:
+ +   case SpvOpConvertFToS:
+ +   case SpvOpConvertSToF:
+ +   case SpvOpConvertUToF:
+ +   case SpvOpUConvert:
+ +   case SpvOpSConvert:
+ +   case SpvOpFConvert:
+ +   case SpvOpQuantizeToF16:
+ +   case SpvOpConvertPtrToU:
+ +   case SpvOpConvertUToPtr:
+ +   case SpvOpPtrCastToGeneric:
+ +   case SpvOpGenericCastToPtr:
+ +   case SpvOpBitcast:
+ +   case SpvOpIsNan:
+ +   case SpvOpIsInf:
+ +   case SpvOpIsFinite:
+ +   case SpvOpIsNormal:
+ +   case SpvOpSignBitSet:
+ +   case SpvOpLessOrGreater:
+ +   case SpvOpOrdered:
+ +   case SpvOpUnordered:
+ +   case SpvOpIAdd:
+ +   case SpvOpFAdd:
+ +   case SpvOpISub:
+ +   case SpvOpFSub:
+ +   case SpvOpIMul:
+ +   case SpvOpFMul:
+ +   case SpvOpUDiv:
+ +   case SpvOpSDiv:
+ +   case SpvOpFDiv:
+ +   case SpvOpUMod:
+ +   case SpvOpSRem:
+ +   case SpvOpSMod:
+ +   case SpvOpFRem:
+ +   case SpvOpFMod:
+ +   case SpvOpVectorTimesScalar:
+ +   case SpvOpDot:
+ +   case SpvOpIAddCarry:
+ +   case SpvOpISubBorrow:
+ +   case SpvOpUMulExtended:
+ +   case SpvOpSMulExtended:
+ +   case SpvOpShiftRightLogical:
+ +   case SpvOpShiftRightArithmetic:
+ +   case SpvOpShiftLeftLogical:
+ +   case SpvOpLogicalEqual:
+ +   case SpvOpLogicalNotEqual:
+ +   case SpvOpLogicalOr:
+ +   case SpvOpLogicalAnd:
+ +   case SpvOpLogicalNot:
+ +   case SpvOpBitwiseOr:
+ +   case SpvOpBitwiseXor:
+ +   case SpvOpBitwiseAnd:
+ +   case SpvOpSelect:
+ +   case SpvOpIEqual:
+ +   case SpvOpFOrdEqual:
+ +   case SpvOpFUnordEqual:
+ +   case SpvOpINotEqual:
+ +   case SpvOpFOrdNotEqual:
+ +   case SpvOpFUnordNotEqual:
+ +   case SpvOpULessThan:
+ +   case SpvOpSLessThan:
+ +   case SpvOpFOrdLessThan:
+ +   case SpvOpFUnordLessThan:
+ +   case SpvOpUGreaterThan:
+ +   case SpvOpSGreaterThan:
+ +   case SpvOpFOrdGreaterThan:
+ +   case SpvOpFUnordGreaterThan:
+ +   case SpvOpULessThanEqual:
+ +   case SpvOpSLessThanEqual:
+ +   case SpvOpFOrdLessThanEqual:
+ +   case SpvOpFUnordLessThanEqual:
+ +   case SpvOpUGreaterThanEqual:
+ +   case SpvOpSGreaterThanEqual:
+ +   case SpvOpFOrdGreaterThanEqual:
+ +   case SpvOpFUnordGreaterThanEqual:
+ +   case SpvOpDPdx:
+ +   case SpvOpDPdy:
+ +   case SpvOpFwidth:
+ +   case SpvOpDPdxFine:
+ +   case SpvOpDPdyFine:
+ +   case SpvOpFwidthFine:
+ +   case SpvOpDPdxCoarse:
+ +   case SpvOpDPdyCoarse:
+ +   case SpvOpFwidthCoarse:
+ +   case SpvOpBitFieldInsert:
+ +   case SpvOpBitFieldSExtract:
+ +   case SpvOpBitFieldUExtract:
+ +   case SpvOpBitReverse:
+ +   case SpvOpBitCount:
+ +   case SpvOpTranspose:
+ +   case SpvOpOuterProduct:
+ +   case SpvOpMatrixTimesScalar:
+ +   case SpvOpVectorTimesMatrix:
+ +   case SpvOpMatrixTimesVector:
+ +   case SpvOpMatrixTimesMatrix:
+ +      vtn_handle_alu(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpVectorExtractDynamic:
+ +   case SpvOpVectorInsertDynamic:
+ +   case SpvOpVectorShuffle:
+ +   case SpvOpCompositeConstruct:
+ +   case SpvOpCompositeExtract:
+ +   case SpvOpCompositeInsert:
+ +   case SpvOpCopyObject:
+ +      vtn_handle_composite(b, opcode, w, count);
+ +      break;
+ +
+ +   case SpvOpEmitVertex:
+ +   case SpvOpEndPrimitive:
+ +   case SpvOpEmitStreamVertex:
+ +   case SpvOpEndStreamPrimitive:
+ +   case SpvOpControlBarrier:
+ +   case SpvOpMemoryBarrier:
+ +      vtn_handle_barrier(b, opcode, w, count);
+ +      break;
+ +
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +nir_function *
+ +spirv_to_nir(const uint32_t *words, size_t word_count,
+ +             struct nir_spirv_specialization *spec, unsigned num_spec,
+ +             gl_shader_stage stage, const char *entry_point_name,
+ +             const nir_shader_compiler_options *options)
+ +{
+ +   const uint32_t *word_end = words + word_count;
+ +
+ +   /* Handle the SPIR-V header (first 4 dwords)  */
+ +   assert(word_count > 5);
+ +
+ +   assert(words[0] == SpvMagicNumber);
+ +   assert(words[1] >= 0x10000);
+ +   /* words[2] == generator magic */
+ +   unsigned value_id_bound = words[3];
+ +   assert(words[4] == 0);
+ +
+ +   words+= 5;
+ +
+ +   /* Initialize the stn_builder object */
+ +   struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
+ +   b->value_id_bound = value_id_bound;
+ +   b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
+ +   exec_list_make_empty(&b->functions);
+ +   b->entry_point_stage = stage;
+ +   b->entry_point_name = entry_point_name;
+ +
+ +   /* Handle all the preamble instructions */
+ +   words = vtn_foreach_instruction(b, words, word_end,
+ +                                   vtn_handle_preamble_instruction);
+ +
+ +   if (b->entry_point == NULL) {
+ +      assert(!"Entry point not found");
+ +      ralloc_free(b);
+ +      return NULL;
+ +   }
+ +
+ +   b->shader = nir_shader_create(NULL, stage, options);
+ +
+ +   /* Parse execution modes */
+ +   vtn_foreach_execution_mode(b, b->entry_point,
+ +                              vtn_handle_execution_mode, NULL);
+ +
+ +   b->specializations = spec;
+ +   b->num_specializations = num_spec;
+ +
+ +   /* Handle all variable, type, and constant instructions */
+ +   words = vtn_foreach_instruction(b, words, word_end,
+ +                                   vtn_handle_variable_or_type_instruction);
+ +
+ +   vtn_build_cfg(b, words, word_end);
+ +
+ +   foreach_list_typed(struct vtn_function, func, node, &b->functions) {
+ +      b->impl = func->impl;
+ +      b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
+ +                                               _mesa_key_pointer_equal);
+ +
+ +      vtn_function_emit(b, func, vtn_handle_body_instruction);
+ +   }
+ +
+ +   assert(b->entry_point->value_type == vtn_value_type_function);
+ +   nir_function *entry_point = b->entry_point->func->impl->function;
+ +   assert(entry_point);
+ +
+ +   ralloc_free(b);
+ +
+ +   return entry_point;
+ +}
diff --cc src/compiler/nir/spirv/vtn_glsl450.c

index 6b649fd,0000000..3360fda

mode 100644,000000..100644
--- 1/src/compiler/nir/spirv/vtn_glsl450.c
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_glsl450.c
@@@ -1,669 -1,0 +1,671 @@@
-                      glsl_get_vector_elements(val->ssa->type), val->name);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + *
+ + * Authors:
+ + *    Jason Ekstrand (jason@jlekstrand.net)
+ + *
+ + */
+ +
+ +#include "vtn_private.h"
+ +#include "GLSL.std.450.h"
+ +
+ +#define M_PIf   ((float) M_PI)
+ +#define M_PI_2f ((float) M_PI_2)
+ +#define M_PI_4f ((float) M_PI_4)
+ +
+ +static nir_ssa_def *
+ +build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
+ +{
+ +   unsigned swiz[4] = {1, 0, 0, 0};
+ +   nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
+ +   return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
+ +}
+ +
+ +static nir_ssa_def *
+ +build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
+ +{
+ +   unsigned yzx[4] = {1, 2, 0, 0};
+ +   unsigned zxy[4] = {2, 0, 1, 0};
+ +
+ +   nir_ssa_def *prod0 =
+ +      nir_fmul(b, col[0],
+ +               nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
+ +                           nir_swizzle(b, col[2], zxy, 3, true)));
+ +   nir_ssa_def *prod1 =
+ +      nir_fmul(b, col[0],
+ +               nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
+ +                           nir_swizzle(b, col[2], yzx, 3, true)));
+ +
+ +   nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
+ +
+ +   return nir_fadd(b, nir_channel(b, diff, 0),
+ +                      nir_fadd(b, nir_channel(b, diff, 1),
+ +                                  nir_channel(b, diff, 2)));
+ +}
+ +
+ +static nir_ssa_def *
+ +build_mat4_det(nir_builder *b, nir_ssa_def **col)
+ +{
+ +   nir_ssa_def *subdet[4];
+ +   for (unsigned i = 0; i < 4; i++) {
+ +      unsigned swiz[3];
+ +      for (unsigned j = 0; j < 3; j++)
+ +         swiz[j] = j + (j >= i);
+ +
+ +      nir_ssa_def *subcol[3];
+ +      subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
+ +      subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
+ +      subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
+ +
+ +      subdet[i] = build_mat3_det(b, subcol);
+ +   }
+ +
+ +   nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
+ +
+ +   return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
+ +                                  nir_channel(b, prod, 1)),
+ +                      nir_fsub(b, nir_channel(b, prod, 2),
+ +                                  nir_channel(b, prod, 3)));
+ +}
+ +
+ +static nir_ssa_def *
+ +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
+ +{
+ +   unsigned size = glsl_get_vector_elements(src->type);
+ +
+ +   nir_ssa_def *cols[4];
+ +   for (unsigned i = 0; i < size; i++)
+ +      cols[i] = src->elems[i]->def;
+ +
+ +   switch(size) {
+ +   case 2: return build_mat2_det(&b->nb, cols);
+ +   case 3: return build_mat3_det(&b->nb, cols);
+ +   case 4: return build_mat4_det(&b->nb, cols);
+ +   default:
+ +      unreachable("Invalid matrix size");
+ +   }
+ +}
+ +
+ +/* Computes the determinate of the submatrix given by taking src and
+ + * removing the specified row and column.
+ + */
+ +static nir_ssa_def *
+ +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
+ +                 unsigned size, unsigned row, unsigned col)
+ +{
+ +   assert(row < size && col < size);
+ +   if (size == 2) {
+ +      return nir_channel(b, src->elems[1 - col]->def, 1 - row);
+ +   } else {
+ +      /* Swizzle to get all but the specified row */
+ +      unsigned swiz[3];
+ +      for (unsigned j = 0; j < 3; j++)
+ +         swiz[j] = j + (j >= row);
+ +
+ +      /* Grab all but the specified column */
+ +      nir_ssa_def *subcol[3];
+ +      for (unsigned j = 0; j < size; j++) {
+ +         if (j != col) {
+ +            subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
+ +                                                swiz, size - 1, true);
+ +         }
+ +      }
+ +
+ +      if (size == 3) {
+ +         return build_mat2_det(b, subcol);
+ +      } else {
+ +         assert(size == 4);
+ +         return build_mat3_det(b, subcol);
+ +      }
+ +   }
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
+ +{
+ +   nir_ssa_def *adj_col[4];
+ +   unsigned size = glsl_get_vector_elements(src->type);
+ +
+ +   /* Build up an adjugate matrix */
+ +   for (unsigned c = 0; c < size; c++) {
+ +      nir_ssa_def *elem[4];
+ +      for (unsigned r = 0; r < size; r++) {
+ +         elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
+ +
+ +         if ((r + c) % 2)
+ +            elem[r] = nir_fneg(&b->nb, elem[r]);
+ +      }
+ +
+ +      adj_col[c] = nir_vec(&b->nb, elem, size);
+ +   }
+ +
+ +   nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
+ +
+ +   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
+ +   for (unsigned i = 0; i < size; i++)
+ +      val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
+ +
+ +   return val;
+ +}
+ +
+ +static nir_ssa_def*
+ +build_length(nir_builder *b, nir_ssa_def *vec)
+ +{
+ +   switch (vec->num_components) {
+ +   case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
+ +   case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
+ +   case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
+ +   case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
+ +   default:
+ +      unreachable("Invalid number of components");
+ +   }
+ +}
+ +
+ +static inline nir_ssa_def *
+ +build_fclamp(nir_builder *b,
+ +             nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
+ +{
+ +   return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
+ +}
+ +
+ +/**
+ + * Return e^x.
+ + */
+ +static nir_ssa_def *
+ +build_exp(nir_builder *b, nir_ssa_def *x)
+ +{
+ +   return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
+ +}
+ +
+ +/**
+ + * Return ln(x) - the natural logarithm of x.
+ + */
+ +static nir_ssa_def *
+ +build_log(nir_builder *b, nir_ssa_def *x)
+ +{
+ +   return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
+ +}
+ +
+ +/**
+ + * Approximate asin(x) by the formula:
+ + *    asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
+ + *
+ + * which is correct to first order at x=0 and x=±1 regardless of the p
+ + * coefficients but can be made second-order correct at both ends by selecting
+ + * the fit coefficients appropriately.  Different p coefficients can be used
+ + * in the asin and acos implementation to minimize some relative error metric
+ + * in each case.
+ + */
+ +static nir_ssa_def *
+ +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
+ +{
+ +   nir_ssa_def *abs_x = nir_fabs(b, x);
+ +   return nir_fmul(b, nir_fsign(b, x),
+ +                   nir_fsub(b, nir_imm_float(b, M_PI_2f),
+ +                            nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
+ +                                     nir_fadd(b, nir_imm_float(b, M_PI_2f),
+ +                                              nir_fmul(b, abs_x,
+ +                                                       nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
+ +                                                                nir_fmul(b, abs_x,
+ +                                                                         nir_fadd(b, nir_imm_float(b, p0),
+ +                                                                                  nir_fmul(b, abs_x,
+ +                                                                                           nir_imm_float(b, p1))))))))));
+ +}
+ +
+ +/**
+ + * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
+ + */
+ +static nir_ssa_def *
+ +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
+ +{
+ +   nir_ssa_def *accum = xs[0];
+ +
+ +   for (int i = 1; i < terms; i++)
+ +      accum = nir_fadd(b, accum, xs[i]);
+ +
+ +   return accum;
+ +}
+ +
+ +static nir_ssa_def *
+ +build_atan(nir_builder *b, nir_ssa_def *y_over_x)
+ +{
+ +   nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
+ +   nir_ssa_def *one = nir_imm_float(b, 1.0f);
+ +
+ +   /*
+ +    * range-reduction, first step:
+ +    *
+ +    *      / y_over_x         if |y_over_x| <= 1.0;
+ +    * x = <
+ +    *      \ 1.0 / y_over_x   otherwise
+ +    */
+ +   nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
+ +                                nir_fmax(b, abs_y_over_x, one));
+ +
+ +   /*
+ +    * approximate atan by evaluating polynomial:
+ +    *
+ +    * x   * 0.9999793128310355 - x^3  * 0.3326756418091246 +
+ +    * x^5 * 0.1938924977115610 - x^7  * 0.1173503194786851 +
+ +    * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
+ +    */
+ +   nir_ssa_def *x_2  = nir_fmul(b, x,   x);
+ +   nir_ssa_def *x_3  = nir_fmul(b, x_2, x);
+ +   nir_ssa_def *x_5  = nir_fmul(b, x_3, x_2);
+ +   nir_ssa_def *x_7  = nir_fmul(b, x_5, x_2);
+ +   nir_ssa_def *x_9  = nir_fmul(b, x_7, x_2);
+ +   nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
+ +
+ +   nir_ssa_def *polynomial_terms[] = {
+ +      nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
+ +      nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
+ +      nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
+ +      nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
+ +      nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
+ +      nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
+ +   };
+ +
+ +   nir_ssa_def *tmp =
+ +      build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
+ +
+ +   /* range-reduction fixup */
+ +   tmp = nir_fadd(b, tmp,
+ +                  nir_fmul(b,
+ +                           nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
+ +                           nir_fadd(b, nir_fmul(b, tmp,
+ +                                                nir_imm_float(b, -2.0f)),
+ +                                       nir_imm_float(b, M_PI_2f))));
+ +
+ +   /* sign fixup */
+ +   return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
+ +}
+ +
+ +static nir_ssa_def *
+ +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
+ +{
+ +   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
+ +
+ +   /* If |x| >= 1.0e-8 * |y|: */
+ +   nir_ssa_def *condition =
+ +      nir_fge(b, nir_fabs(b, x),
+ +              nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
+ +
+ +   /* Then...call atan(y/x) and fix it up: */
+ +   nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
+ +   nir_ssa_def *r_then =
+ +      nir_bcsel(b, nir_flt(b, x, zero),
+ +                   nir_fadd(b, atan1,
+ +                               nir_bcsel(b, nir_fge(b, y, zero),
+ +                                            nir_imm_float(b, M_PIf),
+ +                                            nir_imm_float(b, -M_PIf))),
+ +                   atan1);
+ +
+ +   /* Else... */
+ +   nir_ssa_def *r_else =
+ +      nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
+ +
+ +   return nir_bcsel(b, condition, r_then, r_else);
+ +}
+ +
+ +static nir_ssa_def *
+ +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
+ +{
+ +   nir_ssa_def *abs_x = nir_fabs(b, x);
+ +   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
+ +
+ +   /* Single-precision floating-point values are stored as
+ +    *   1 sign bit;
+ +    *   8 exponent bits;
+ +    *   23 mantissa bits.
+ +    *
+ +    * An exponent shift of 23 will shift the mantissa out, leaving only the
+ +    * exponent and sign bit (which itself may be zero, if the absolute value
+ +    * was taken before the bitcast and shift.
+ +    */
+ +   nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
+ +   nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
+ +
+ +   nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
+ +
+ +   /* Exponent of floating-point values in the range [0.5, 1.0). */
+ +   nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
+ +
+ +   nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
+ +
+ +   *exponent =
+ +      nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
+ +                  nir_bcsel(b, is_not_zero, exponent_bias, zero));
+ +
+ +   return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
+ +                     nir_bcsel(b, is_not_zero, exponent_value, zero));
+ +}
+ +
+ +static void
+ +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
+ +                   const uint32_t *w, unsigned count)
+ +{
+ +   struct nir_builder *nb = &b->nb;
+ +   const struct glsl_type *dest_type =
+ +      vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ +
+ +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +   val->ssa = vtn_create_ssa_value(b, dest_type);
+ +
+ +   /* Collect the various SSA sources */
+ +   unsigned num_inputs = count - 5;
+ +   nir_ssa_def *src[3];
+ +   for (unsigned i = 0; i < num_inputs; i++)
+ +      src[i] = vtn_ssa_value(b, w[i + 5])->def;
+ +
+ +   nir_op op;
+ +   switch (entrypoint) {
+ +   case GLSLstd450Round:       op = nir_op_fround_even;   break; /* TODO */
+ +   case GLSLstd450RoundEven:   op = nir_op_fround_even;   break;
+ +   case GLSLstd450Trunc:       op = nir_op_ftrunc;        break;
+ +   case GLSLstd450FAbs:        op = nir_op_fabs;          break;
+ +   case GLSLstd450SAbs:        op = nir_op_iabs;          break;
+ +   case GLSLstd450FSign:       op = nir_op_fsign;         break;
+ +   case GLSLstd450SSign:       op = nir_op_isign;         break;
+ +   case GLSLstd450Floor:       op = nir_op_ffloor;        break;
+ +   case GLSLstd450Ceil:        op = nir_op_fceil;         break;
+ +   case GLSLstd450Fract:       op = nir_op_ffract;        break;
+ +   case GLSLstd450Radians:
+ +      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
+ +      return;
+ +   case GLSLstd450Degrees:
+ +      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
+ +      return;
+ +   case GLSLstd450Sin:         op = nir_op_fsin;       break;
+ +   case GLSLstd450Cos:         op = nir_op_fcos;       break;
+ +   case GLSLstd450Tan:
+ +      val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
+ +                               nir_fcos(nb, src[0]));
+ +      return;
+ +   case GLSLstd450Pow:         op = nir_op_fpow;       break;
+ +   case GLSLstd450Exp2:        op = nir_op_fexp2;      break;
+ +   case GLSLstd450Log2:        op = nir_op_flog2;      break;
+ +   case GLSLstd450Sqrt:        op = nir_op_fsqrt;      break;
+ +   case GLSLstd450InverseSqrt: op = nir_op_frsq;       break;
+ +
+ +   case GLSLstd450Modf: {
+ +      nir_ssa_def *sign = nir_fsign(nb, src[0]);
+ +      nir_ssa_def *abs = nir_fabs(nb, src[0]);
+ +      val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+ +      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
+ +                          nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450ModfStruct: {
+ +      nir_ssa_def *sign = nir_fsign(nb, src[0]);
+ +      nir_ssa_def *abs = nir_fabs(nb, src[0]);
+ +      assert(glsl_type_is_struct(val->ssa->type));
+ +      val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+ +      val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450FMin:        op = nir_op_fmin;       break;
+ +   case GLSLstd450UMin:        op = nir_op_umin;       break;
+ +   case GLSLstd450SMin:        op = nir_op_imin;       break;
+ +   case GLSLstd450FMax:        op = nir_op_fmax;       break;
+ +   case GLSLstd450UMax:        op = nir_op_umax;       break;
+ +   case GLSLstd450SMax:        op = nir_op_imax;       break;
+ +   case GLSLstd450FMix:        op = nir_op_flrp;       break;
+ +   case GLSLstd450Step:
+ +      val->ssa->def = nir_sge(nb, src[1], src[0]);
+ +      return;
+ +
+ +   case GLSLstd450Fma:         op = nir_op_ffma;       break;
+ +   case GLSLstd450Ldexp:       op = nir_op_ldexp;      break;
+ +
+ +   /* Packing/Unpacking functions */
+ +   case GLSLstd450PackSnorm4x8:      op = nir_op_pack_snorm_4x8;      break;
+ +   case GLSLstd450PackUnorm4x8:      op = nir_op_pack_unorm_4x8;      break;
+ +   case GLSLstd450PackSnorm2x16:     op = nir_op_pack_snorm_2x16;     break;
+ +   case GLSLstd450PackUnorm2x16:     op = nir_op_pack_unorm_2x16;     break;
+ +   case GLSLstd450PackHalf2x16:      op = nir_op_pack_half_2x16;      break;
+ +   case GLSLstd450UnpackSnorm4x8:    op = nir_op_unpack_snorm_4x8;    break;
+ +   case GLSLstd450UnpackUnorm4x8:    op = nir_op_unpack_unorm_4x8;    break;
+ +   case GLSLstd450UnpackSnorm2x16:   op = nir_op_unpack_snorm_2x16;   break;
+ +   case GLSLstd450UnpackUnorm2x16:   op = nir_op_unpack_unorm_2x16;   break;
+ +   case GLSLstd450UnpackHalf2x16:    op = nir_op_unpack_half_2x16;    break;
+ +
+ +   case GLSLstd450Length:
+ +      val->ssa->def = build_length(nb, src[0]);
+ +      return;
+ +   case GLSLstd450Distance:
+ +      val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
+ +      return;
+ +   case GLSLstd450Normalize:
+ +      val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
+ +      return;
+ +
+ +   case GLSLstd450Exp:
+ +      val->ssa->def = build_exp(nb, src[0]);
+ +      return;
+ +
+ +   case GLSLstd450Log:
+ +      val->ssa->def = build_log(nb, src[0]);
+ +      return;
+ +
+ +   case GLSLstd450FClamp:
+ +      val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
+ +      return;
+ +   case GLSLstd450UClamp:
+ +      val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
+ +      return;
+ +   case GLSLstd450SClamp:
+ +      val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
+ +      return;
+ +
+ +   case GLSLstd450Cross: {
+ +      unsigned yzx[4] = { 1, 2, 0, 0 };
+ +      unsigned zxy[4] = { 2, 0, 1, 0 };
+ +      val->ssa->def =
+ +         nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
+ +                                   nir_swizzle(nb, src[1], zxy, 3, true)),
+ +                      nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
+ +                                   nir_swizzle(nb, src[1], yzx, 3, true)));
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450SmoothStep: {
+ +      /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
+ +      nir_ssa_def *t =
+ +         build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
+ +                                       nir_fsub(nb, src[1], src[0])),
+ +                          nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
+ +      /* result = t * t * (3 - 2 * t) */
+ +      val->ssa->def =
+ +         nir_fmul(nb, t, nir_fmul(nb, t,
+ +            nir_fsub(nb, nir_imm_float(nb, 3.0),
+ +                         nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450FaceForward:
+ +      val->ssa->def =
+ +         nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
+ +                                   nir_imm_float(nb, 0.0)),
+ +                       src[0], nir_fneg(nb, src[0]));
+ +      return;
+ +
+ +   case GLSLstd450Reflect:
+ +      /* I - 2 * dot(N, I) * N */
+ +      val->ssa->def =
+ +         nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
+ +                              nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
+ +                                           src[1])));
+ +      return;
+ +
+ +   case GLSLstd450Refract: {
+ +      nir_ssa_def *I = src[0];
+ +      nir_ssa_def *N = src[1];
+ +      nir_ssa_def *eta = src[2];
+ +      nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
+ +      nir_ssa_def *one = nir_imm_float(nb, 1.0);
+ +      nir_ssa_def *zero = nir_imm_float(nb, 0.0);
+ +      /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
+ +      nir_ssa_def *k =
+ +         nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
+ +                      nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
+ +      nir_ssa_def *result =
+ +         nir_fsub(nb, nir_fmul(nb, eta, I),
+ +                      nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
+ +                                                nir_fsqrt(nb, k)), N));
+ +      /* XXX: bcsel, or if statement? */
+ +      val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450Sinh:
+ +      /* 0.5 * (e^x - e^(-x)) */
+ +      val->ssa->def =
+ +         nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ +                      nir_fsub(nb, build_exp(nb, src[0]),
+ +                                   build_exp(nb, nir_fneg(nb, src[0]))));
+ +      return;
+ +
+ +   case GLSLstd450Cosh:
+ +      /* 0.5 * (e^x + e^(-x)) */
+ +      val->ssa->def =
+ +         nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ +                      nir_fadd(nb, build_exp(nb, src[0]),
+ +                                   build_exp(nb, nir_fneg(nb, src[0]))));
+ +      return;
+ +
+ +   case GLSLstd450Tanh:
+ +      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
+ +      val->ssa->def =
+ +         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ +                                   nir_fsub(nb, build_exp(nb, src[0]),
+ +                                                build_exp(nb, nir_fneg(nb, src[0])))),
+ +                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ +                                   nir_fadd(nb, build_exp(nb, src[0]),
+ +                                                build_exp(nb, nir_fneg(nb, src[0])))));
+ +      return;
+ +
+ +   case GLSLstd450Asinh:
+ +      val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
+ +         build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
+ +                       nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
+ +                                                  nir_imm_float(nb, 1.0f))))));
+ +      return;
+ +   case GLSLstd450Acosh:
+ +      val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
+ +         nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
+ +                                    nir_imm_float(nb, 1.0f)))));
+ +      return;
+ +   case GLSLstd450Atanh: {
+ +      nir_ssa_def *one = nir_imm_float(nb, 1.0);
+ +      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ +         build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
+ +                                    nir_fsub(nb, one, src[0]))));
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450FindILsb:   op = nir_op_find_lsb;   break;
+ +   case GLSLstd450FindSMsb:   op = nir_op_ifind_msb;  break;
+ +   case GLSLstd450FindUMsb:   op = nir_op_ufind_msb;  break;
+ +
+ +   case GLSLstd450Asin:
+ +      val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
+ +      return;
+ +
+ +   case GLSLstd450Acos:
+ +      val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
+ +                               build_asin(nb, src[0], 0.08132463, -0.02363318));
+ +      return;
+ +
+ +   case GLSLstd450Atan:
+ +      val->ssa->def = build_atan(nb, src[0]);
+ +      return;
+ +
+ +   case GLSLstd450Atan2:
+ +      val->ssa->def = build_atan2(nb, src[0], src[1]);
+ +      return;
+ +
+ +   case GLSLstd450Frexp: {
+ +      nir_ssa_def *exponent;
+ +      val->ssa->def = build_frexp(nb, src[0], &exponent);
+ +      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450FrexpStruct: {
+ +      assert(glsl_type_is_struct(val->ssa->type));
+ +      val->ssa->elems[0]->def = build_frexp(nb, src[0],
+ +                                            &val->ssa->elems[1]->def);
+ +      return;
+ +   }
+ +
+ +   case GLSLstd450PackDouble2x32:
+ +   case GLSLstd450UnpackDouble2x32:
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +
+ +   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
+ +   nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
++                     glsl_get_vector_elements(val->ssa->type),
++                     glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
++                     val->name);
+ +   instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
+ +   val->ssa->def = &instr->dest.dest.ssa;
+ +
+ +   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
+ +      instr->src[i].src = nir_src_for_ssa(src[i]);
+ +
+ +   nir_builder_instr_insert(nb, &instr->instr);
+ +}
+ +
+ +bool
+ +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
+ +                               const uint32_t *w, unsigned count)
+ +{
+ +   switch ((enum GLSLstd450)ext_opcode) {
+ +   case GLSLstd450Determinant: {
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      val->ssa = rzalloc(b, struct vtn_ssa_value);
+ +      val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ +      val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
+ +      break;
+ +   }
+ +
+ +   case GLSLstd450MatrixInverse: {
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
+ +      break;
+ +   }
+ +
+ +   case GLSLstd450InterpolateAtCentroid:
+ +   case GLSLstd450InterpolateAtSample:
+ +   case GLSLstd450InterpolateAtOffset:
+ +      unreachable("Unhandled opcode");
+ +
+ +   default:
+ +      handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
+ +   }
+ +
+ +   return true;
+ +}
diff --cc src/compiler/nir/spirv/vtn_variables.c

index 31bf416,0000000..3cbac1e

mode 100644,000000..100644
--- 1/src/compiler/nir/spirv/vtn_variables.c
--- /dev/null
+++ b/src/compiler/nir/spirv/vtn_variables.c
@@@ -1,1412 -1,0 +1,1415 @@@
-                            intrin->num_components, NULL);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + *
+ + * Authors:
+ + *    Jason Ekstrand (jason@jlekstrand.net)
+ + *
+ + */
+ +
+ +#include "vtn_private.h"
+ +
+ +static struct vtn_access_chain *
+ +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
+ +                        unsigned new_ids)
+ +{
+ +   struct vtn_access_chain *chain;
+ +
+ +   unsigned new_len = old->length + new_ids;
+ +   chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0]));
+ +
+ +   chain->var = old->var;
+ +   chain->length = new_len;
+ +
+ +   for (unsigned i = 0; i < old->length; i++)
+ +      chain->link[i] = old->link[i];
+ +
+ +   return chain;
+ +}
+ +
+ +static nir_ssa_def *
+ +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
+ +                       unsigned stride)
+ +{
+ +   assert(stride > 0);
+ +   if (link.mode == vtn_access_mode_literal) {
+ +      return nir_imm_int(&b->nb, link.id * stride);
+ +   } else if (stride == 1) {
+ +      return vtn_ssa_value(b, link.id)->def;
+ +   } else {
+ +      return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
+ +                              nir_imm_int(&b->nb, stride));
+ +   }
+ +}
+ +
+ +static struct vtn_type *
+ +vtn_access_chain_tail_type(struct vtn_builder *b,
+ +                           struct vtn_access_chain *chain)
+ +{
+ +   struct vtn_type *type = chain->var->type;
+ +   for (unsigned i = 0; i < chain->length; i++) {
+ +      if (glsl_type_is_struct(type->type)) {
+ +         assert(chain->link[i].mode == vtn_access_mode_literal);
+ +         type = type->members[chain->link[i].id];
+ +      } else {
+ +         type = type->array_element;
+ +      }
+ +   }
+ +   return type;
+ +}
+ +
+ +/* Crawls a chain of array derefs and rewrites the types so that the
+ + * lengths stay the same but the terminal type is the one given by
+ + * tail_type.  This is useful for split structures.
+ + */
+ +static void
+ +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
+ +{
+ +   deref->type = type;
+ +   if (deref->child) {
+ +      assert(deref->child->deref_type == nir_deref_type_array);
+ +      assert(glsl_type_is_array(deref->type));
+ +      rewrite_deref_types(deref->child, glsl_get_array_element(type));
+ +   }
+ +}
+ +
+ +nir_deref_var *
+ +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
+ +{
+ +   nir_deref_var *deref_var;
+ +   if (chain->var->var) {
+ +      deref_var = nir_deref_var_create(b, chain->var->var);
+ +   } else {
+ +      assert(chain->var->members);
+ +      /* Create the deref_var manually.  It will get filled out later. */
+ +      deref_var = rzalloc(b, nir_deref_var);
+ +      deref_var->deref.deref_type = nir_deref_type_var;
+ +   }
+ +
+ +   struct vtn_type *deref_type = chain->var->type;
+ +   nir_deref *tail = &deref_var->deref;
+ +   nir_variable **members = chain->var->members;
+ +
+ +   for (unsigned i = 0; i < chain->length; i++) {
+ +      enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
+ +      switch (base_type) {
+ +      case GLSL_TYPE_UINT:
+ +      case GLSL_TYPE_INT:
+ +      case GLSL_TYPE_FLOAT:
+ +      case GLSL_TYPE_DOUBLE:
+ +      case GLSL_TYPE_BOOL:
+ +      case GLSL_TYPE_ARRAY: {
+ +         deref_type = deref_type->array_element;
+ +
+ +         nir_deref_array *deref_arr = nir_deref_array_create(b);
+ +         deref_arr->deref.type = deref_type->type;
+ +
+ +         if (chain->link[i].mode == vtn_access_mode_literal) {
+ +            deref_arr->deref_array_type = nir_deref_array_type_direct;
+ +            deref_arr->base_offset = chain->link[i].id;
+ +         } else {
+ +            assert(chain->link[i].mode == vtn_access_mode_id);
+ +            deref_arr->deref_array_type = nir_deref_array_type_indirect;
+ +            deref_arr->base_offset = 0;
+ +            deref_arr->indirect =
+ +               nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
+ +         }
+ +         tail->child = &deref_arr->deref;
+ +         tail = tail->child;
+ +         break;
+ +      }
+ +
+ +      case GLSL_TYPE_STRUCT: {
+ +         assert(chain->link[i].mode == vtn_access_mode_literal);
+ +         unsigned idx = chain->link[i].id;
+ +         deref_type = deref_type->members[idx];
+ +         if (members) {
+ +            /* This is a pre-split structure. */
+ +            deref_var->var = members[idx];
+ +            rewrite_deref_types(&deref_var->deref, members[idx]->type);
+ +            assert(tail->type == deref_type->type);
+ +            members = NULL;
+ +         } else {
+ +            nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
+ +            deref_struct->deref.type = deref_type->type;
+ +            tail->child = &deref_struct->deref;
+ +            tail = tail->child;
+ +         }
+ +         break;
+ +      }
+ +      default:
+ +         unreachable("Invalid type for deref");
+ +      }
+ +   }
+ +
+ +   assert(members == NULL);
+ +   return deref_var;
+ +}
+ +
+ +static void
+ +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
+ +                      nir_deref *tail, struct vtn_ssa_value *inout)
+ +{
+ +   /* The deref tail may contain a deref to select a component of a vector (in
+ +    * other words, it might not be an actual tail) so we have to save it away
+ +    * here since we overwrite it later.
+ +    */
+ +   nir_deref *old_child = tail->child;
+ +
+ +   if (glsl_type_is_vector_or_scalar(tail->type)) {
+ +      /* Terminate the deref chain in case there is one more link to pick
+ +       * off a component of the vector.
+ +       */
+ +      tail->child = NULL;
+ +
+ +      nir_intrinsic_op op = load ? nir_intrinsic_load_var :
+ +                                   nir_intrinsic_store_var;
+ +
+ +      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
+ +      intrin->variables[0] =
+ +         nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
+ +      intrin->num_components = glsl_get_vector_elements(tail->type);
+ +
+ +      if (load) {
+ +         nir_ssa_dest_init(&intrin->instr, &intrin->dest,
-    nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++                           intrin->num_components,
++                           glsl_get_bit_size(glsl_get_base_type(tail->type)),
++                           NULL);
+ +         inout->def = &intrin->dest.ssa;
+ +      } else {
+ +         nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
+ +         intrin->src[0] = nir_src_for_ssa(inout->def);
+ +      }
+ +
+ +      nir_builder_instr_insert(&b->nb, &intrin->instr);
+ +   } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
+ +              glsl_type_is_matrix(tail->type)) {
+ +      unsigned elems = glsl_get_length(tail->type);
+ +      nir_deref_array *deref_arr = nir_deref_array_create(b);
+ +      deref_arr->deref_array_type = nir_deref_array_type_direct;
+ +      deref_arr->deref.type = glsl_get_array_element(tail->type);
+ +      tail->child = &deref_arr->deref;
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         deref_arr->base_offset = i;
+ +         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
+ +      }
+ +   } else {
+ +      assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
+ +      unsigned elems = glsl_get_length(tail->type);
+ +      nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
+ +      tail->child = &deref_struct->deref;
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         deref_struct->index = i;
+ +         deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
+ +         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
+ +      }
+ +   }
+ +
+ +   tail->child = old_child;
+ +}
+ +
+ +nir_deref_var *
+ +vtn_nir_deref(struct vtn_builder *b, uint32_t id)
+ +{
+ +   struct vtn_access_chain *chain =
+ +      vtn_value(b, id, vtn_value_type_access_chain)->access_chain;
+ +
+ +   return vtn_access_chain_to_deref(b, chain);
+ +}
+ +
+ +/*
+ + * Gets the NIR-level deref tail, which may have as a child an array deref
+ + * selecting which component due to OpAccessChain supporting per-component
+ + * indexing in SPIR-V.
+ + */
+ +static nir_deref *
+ +get_deref_tail(nir_deref_var *deref)
+ +{
+ +   nir_deref *cur = &deref->deref;
+ +   while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
+ +      cur = cur->child;
+ +
+ +   return cur;
+ +}
+ +
+ +struct vtn_ssa_value *
+ +vtn_local_load(struct vtn_builder *b, nir_deref_var *src)
+ +{
+ +   nir_deref *src_tail = get_deref_tail(src);
+ +   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
+ +   _vtn_local_load_store(b, true, src, src_tail, val);
+ +
+ +   if (src_tail->child) {
+ +      nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
+ +      assert(vec_deref->deref.child == NULL);
+ +      val->type = vec_deref->deref.type;
+ +      if (vec_deref->deref_array_type == nir_deref_array_type_direct)
+ +         val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
+ +      else
+ +         val->def = vtn_vector_extract_dynamic(b, val->def,
+ +                                               vec_deref->indirect.ssa);
+ +   }
+ +
+ +   return val;
+ +}
+ +
+ +void
+ +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ +                nir_deref_var *dest)
+ +{
+ +   nir_deref *dest_tail = get_deref_tail(dest);
+ +
+ +   if (dest_tail->child) {
+ +      struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
+ +      _vtn_local_load_store(b, true, dest, dest_tail, val);
+ +      nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
+ +      assert(deref->deref.child == NULL);
+ +      if (deref->deref_array_type == nir_deref_array_type_direct)
+ +         val->def = vtn_vector_insert(b, val->def, src->def,
+ +                                      deref->base_offset);
+ +      else
+ +         val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
+ +                                              deref->indirect.ssa);
+ +      _vtn_local_load_store(b, false, dest, dest_tail, val);
+ +   } else {
+ +      _vtn_local_load_store(b, false, dest, dest_tail, src);
+ +   }
+ +}
+ +
+ +static nir_ssa_def *
+ +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
+ +                          struct vtn_type **type, unsigned *chain_idx)
+ +{
+ +   /* Push constants have no explicit binding */
+ +   if (chain->var->mode == vtn_variable_mode_push_constant) {
+ +      *chain_idx = 0;
+ +      *type = chain->var->type;
+ +      return NULL;
+ +   }
+ +
+ +   nir_ssa_def *array_index;
+ +   if (glsl_type_is_array(chain->var->type->type)) {
+ +      assert(chain->length > 0);
+ +      array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
+ +      *chain_idx = 1;
+ +      *type = chain->var->type->array_element;
+ +   } else {
+ +      array_index = nir_imm_int(&b->nb, 0);
+ +      *chain_idx = 0;
+ +      *type = chain->var->type;
+ +   }
+ +
+ +   nir_intrinsic_instr *instr =
+ +      nir_intrinsic_instr_create(b->nb.shader,
+ +                                 nir_intrinsic_vulkan_resource_index);
+ +   instr->src[0] = nir_src_for_ssa(array_index);
+ +   nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
+ +   nir_intrinsic_set_binding(instr, chain->var->binding);
+ +
-                         instr->num_components, NULL);
++   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+ +   nir_builder_instr_insert(&b->nb, &instr->instr);
+ +
+ +   return &instr->dest.ssa;
+ +}
+ +
+ +nir_ssa_def *
+ +vtn_access_chain_to_offset(struct vtn_builder *b,
+ +                           struct vtn_access_chain *chain,
+ +                           nir_ssa_def **index_out, struct vtn_type **type_out,
+ +                           unsigned *end_idx_out, bool stop_at_matrix)
+ +{
+ +   unsigned idx = 0;
+ +   struct vtn_type *type;
+ +   *index_out = get_vulkan_resource_index(b, chain, &type, &idx);
+ +
+ +   nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
+ +   for (; idx < chain->length; idx++) {
+ +      enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ +      switch (base_type) {
+ +      case GLSL_TYPE_UINT:
+ +      case GLSL_TYPE_INT:
+ +      case GLSL_TYPE_FLOAT:
+ +      case GLSL_TYPE_DOUBLE:
+ +      case GLSL_TYPE_BOOL:
+ +         /* Some users may not want matrix or vector derefs */
+ +         if (stop_at_matrix)
+ +            goto end;
+ +         /* Fall through */
+ +
+ +      case GLSL_TYPE_ARRAY:
+ +         offset = nir_iadd(&b->nb, offset,
+ +                           vtn_access_link_as_ssa(b, chain->link[idx],
+ +                                                  type->stride));
+ +
+ +         type = type->array_element;
+ +         break;
+ +
+ +      case GLSL_TYPE_STRUCT: {
+ +         assert(chain->link[idx].mode == vtn_access_mode_literal);
+ +         unsigned member = chain->link[idx].id;
+ +         offset = nir_iadd(&b->nb, offset,
+ +                           nir_imm_int(&b->nb, type->offsets[member]));
+ +         type = type->members[member];
+ +         break;
+ +      }
+ +
+ +      default:
+ +         unreachable("Invalid type for deref");
+ +      }
+ +   }
+ +
+ +end:
+ +   *type_out = type;
+ +   if (end_idx_out)
+ +      *end_idx_out = idx;
+ +
+ +   return offset;
+ +}
+ +
+ +static void
+ +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
+ +                     nir_ssa_def *index, nir_ssa_def *offset,
+ +                     struct vtn_ssa_value **inout, const struct glsl_type *type)
+ +{
+ +   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
+ +   instr->num_components = glsl_get_vector_elements(type);
+ +
+ +   int src = 0;
+ +   if (!load) {
+ +      nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
+ +      instr->src[src++] = nir_src_for_ssa((*inout)->def);
+ +   }
+ +
+ +   /* We set the base and size for push constant load to the entire push
+ +    * constant block for now.
+ +    */
+ +   if (op == nir_intrinsic_load_push_constant) {
+ +      nir_intrinsic_set_base(instr, 0);
+ +      nir_intrinsic_set_range(instr, 128);
+ +   }
+ +
+ +   if (index)
+ +      instr->src[src++] = nir_src_for_ssa(index);
+ +
+ +   instr->src[src++] = nir_src_for_ssa(offset);
+ +
+ +   if (load) {
+ +      nir_ssa_dest_init(&instr->instr, &instr->dest,
-       nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++                        instr->num_components,
++                        glsl_get_bit_size(glsl_get_base_type(type)), NULL);
+ +      (*inout)->def = &instr->dest.ssa;
+ +   }
+ +
+ +   nir_builder_instr_insert(&b->nb, &instr->instr);
+ +
+ +   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
+ +      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
+ +}
+ +
+ +static void
+ +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
+ +                      nir_ssa_def *index, nir_ssa_def *offset,
+ +                      struct vtn_access_chain *chain, unsigned chain_idx,
+ +                      struct vtn_type *type, struct vtn_ssa_value **inout)
+ +{
+ +   if (chain && chain_idx >= chain->length)
+ +      chain = NULL;
+ +
+ +   if (load && chain == NULL && *inout == NULL)
+ +      *inout = vtn_create_ssa_value(b, type->type);
+ +
+ +   enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ +   switch (base_type) {
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_BOOL:
+ +      /* This is where things get interesting.  At this point, we've hit
+ +       * a vector, a scalar, or a matrix.
+ +       */
+ +      if (glsl_type_is_matrix(type->type)) {
+ +         if (chain == NULL) {
+ +            /* Loading the whole matrix */
+ +            struct vtn_ssa_value *transpose;
+ +            unsigned num_ops, vec_width;
+ +            if (type->row_major) {
+ +               num_ops = glsl_get_vector_elements(type->type);
+ +               vec_width = glsl_get_matrix_columns(type->type);
+ +               if (load) {
+ +                  const struct glsl_type *transpose_type =
+ +                     glsl_matrix_type(base_type, vec_width, num_ops);
+ +                  *inout = vtn_create_ssa_value(b, transpose_type);
+ +               } else {
+ +                  transpose = vtn_ssa_transpose(b, *inout);
+ +                  inout = &transpose;
+ +               }
+ +            } else {
+ +               num_ops = glsl_get_matrix_columns(type->type);
+ +               vec_width = glsl_get_vector_elements(type->type);
+ +            }
+ +
+ +            for (unsigned i = 0; i < num_ops; i++) {
+ +               nir_ssa_def *elem_offset =
+ +                  nir_iadd(&b->nb, offset,
+ +                           nir_imm_int(&b->nb, i * type->stride));
+ +               _vtn_load_store_tail(b, op, load, index, elem_offset,
+ +                                    &(*inout)->elems[i],
+ +                                    glsl_vector_type(base_type, vec_width));
+ +            }
+ +
+ +            if (load && type->row_major)
+ +               *inout = vtn_ssa_transpose(b, *inout);
+ +         } else if (type->row_major) {
+ +            /* Row-major but with an access chiain. */
+ +            nir_ssa_def *col_offset =
+ +               vtn_access_link_as_ssa(b, chain->link[chain_idx],
+ +                                      type->array_element->stride);
+ +            offset = nir_iadd(&b->nb, offset, col_offset);
+ +
+ +            if (chain_idx + 1 < chain->length) {
+ +               /* Picking off a single element */
+ +               nir_ssa_def *row_offset =
+ +                  vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
+ +                                         type->stride);
+ +               offset = nir_iadd(&b->nb, offset, row_offset);
+ +               if (load)
+ +                  *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
+ +               _vtn_load_store_tail(b, op, load, index, offset, inout,
+ +                                    glsl_scalar_type(base_type));
+ +            } else {
+ +               /* Grabbing a column; picking one element off each row */
+ +               unsigned num_comps = glsl_get_vector_elements(type->type);
+ +               const struct glsl_type *column_type =
+ +                  glsl_get_column_type(type->type);
+ +
+ +               nir_ssa_def *comps[4];
+ +               for (unsigned i = 0; i < num_comps; i++) {
+ +                  nir_ssa_def *elem_offset =
+ +                     nir_iadd(&b->nb, offset,
+ +                              nir_imm_int(&b->nb, i * type->stride));
+ +
+ +                  struct vtn_ssa_value *comp, temp_val;
+ +                  if (!load) {
+ +                     temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
+ +                     temp_val.type = glsl_scalar_type(base_type);
+ +                  }
+ +                  comp = &temp_val;
+ +                  _vtn_load_store_tail(b, op, load, index, elem_offset,
+ +                                       &comp, glsl_scalar_type(base_type));
+ +                  comps[i] = comp->def;
+ +               }
+ +
+ +               if (load) {
+ +                  if (*inout == NULL)
+ +                     *inout = vtn_create_ssa_value(b, column_type);
+ +
+ +                  (*inout)->def = nir_vec(&b->nb, comps, num_comps);
+ +               }
+ +            }
+ +         } else {
+ +            /* Column-major with a deref. Fall through to array case. */
+ +            nir_ssa_def *col_offset =
+ +               vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
+ +            offset = nir_iadd(&b->nb, offset, col_offset);
+ +
+ +            _vtn_block_load_store(b, op, load, index, offset,
+ +                                  chain, chain_idx + 1,
+ +                                  type->array_element, inout);
+ +         }
+ +      } else if (chain == NULL) {
+ +         /* Single whole vector */
+ +         assert(glsl_type_is_vector_or_scalar(type->type));
+ +         _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
+ +      } else {
+ +         /* Single component of a vector. Fall through to array case. */
+ +         nir_ssa_def *elem_offset =
+ +            vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
+ +         offset = nir_iadd(&b->nb, offset, elem_offset);
+ +
+ +         _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
+ +                               type->array_element, inout);
+ +      }
+ +      return;
+ +
+ +   case GLSL_TYPE_ARRAY: {
+ +      unsigned elems = glsl_get_length(type->type);
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         nir_ssa_def *elem_off =
+ +            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
+ +         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
+ +                               type->array_element, &(*inout)->elems[i]);
+ +      }
+ +      return;
+ +   }
+ +
+ +   case GLSL_TYPE_STRUCT: {
+ +      unsigned elems = glsl_get_length(type->type);
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         nir_ssa_def *elem_off =
+ +            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
+ +         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
+ +                               type->members[i], &(*inout)->elems[i]);
+ +      }
+ +      return;
+ +   }
+ +
+ +   default:
+ +      unreachable("Invalid block member type");
+ +   }
+ +}
+ +
+ +static struct vtn_ssa_value *
+ +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src)
+ +{
+ +   nir_intrinsic_op op;
+ +   switch (src->var->mode) {
+ +   case vtn_variable_mode_ubo:
+ +      op = nir_intrinsic_load_ubo;
+ +      break;
+ +   case vtn_variable_mode_ssbo:
+ +      op = nir_intrinsic_load_ssbo;
+ +      break;
+ +   case vtn_variable_mode_push_constant:
+ +      op = nir_intrinsic_load_push_constant;
+ +      break;
+ +   default:
+ +      assert(!"Invalid block variable mode");
+ +   }
+ +
+ +   nir_ssa_def *offset, *index = NULL;
+ +   struct vtn_type *type;
+ +   unsigned chain_idx;
+ +   offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true);
+ +
+ +   struct vtn_ssa_value *value = NULL;
+ +   _vtn_block_load_store(b, op, true, index, offset,
+ +                         src, chain_idx, type, &value);
+ +   return value;
+ +}
+ +
+ +static void
+ +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ +                struct vtn_access_chain *dst)
+ +{
+ +   nir_ssa_def *offset, *index = NULL;
+ +   struct vtn_type *type;
+ +   unsigned chain_idx;
+ +   offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true);
+ +
+ +   _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
+ +                         dst, chain_idx, type, &src);
+ +}
+ +
+ +static bool
+ +vtn_variable_is_external_block(struct vtn_variable *var)
+ +{
+ +   return var->mode == vtn_variable_mode_ssbo ||
+ +          var->mode == vtn_variable_mode_ubo ||
+ +          var->mode == vtn_variable_mode_push_constant;
+ +}
+ +
+ +static void
+ +_vtn_variable_load_store(struct vtn_builder *b, bool load,
+ +                         struct vtn_access_chain *chain,
+ +                         struct vtn_type *tail_type,
+ +                         struct vtn_ssa_value **inout)
+ +{
+ +   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
+ +   switch (base_type) {
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_BOOL:
+ +      /* At this point, we have a scalar, vector, or matrix so we know that
+ +       * there cannot be any structure splitting still in the way.  By
+ +       * stopping at the matrix level rather than the vector level, we
+ +       * ensure that matrices get loaded in the optimal way even if they
+ +       * are storred row-major in a UBO.
+ +       */
+ +      if (load) {
+ +         *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
+ +      } else {
+ +         vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
+ +      }
+ +      return;
+ +
+ +   case GLSL_TYPE_ARRAY:
+ +   case GLSL_TYPE_STRUCT: {
+ +      struct vtn_access_chain *new_chain =
+ +         vtn_access_chain_extend(b, chain, 1);
+ +      new_chain->link[chain->length].mode = vtn_access_mode_literal;
+ +      unsigned elems = glsl_get_length(tail_type->type);
+ +      if (load) {
+ +         assert(*inout == NULL);
+ +         *inout = rzalloc(b, struct vtn_ssa_value);
+ +         (*inout)->type = tail_type->type;
+ +         (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
+ +      }
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         new_chain->link[chain->length].id = i;
+ +         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
+ +            tail_type->array_element : tail_type->members[i];
+ +         _vtn_variable_load_store(b, load, new_chain, elem_type,
+ +                                  &(*inout)->elems[i]);
+ +      }
+ +      return;
+ +   }
+ +
+ +   default:
+ +      unreachable("Invalid access chain type");
+ +   }
+ +}
+ +
+ +struct vtn_ssa_value *
+ +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src)
+ +{
+ +   if (vtn_variable_is_external_block(src->var)) {
+ +      return vtn_block_load(b, src);
+ +   } else {
+ +      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
+ +      struct vtn_ssa_value *val = NULL;
+ +      _vtn_variable_load_store(b, true, src, tail_type, &val);
+ +      return val;
+ +   }
+ +}
+ +
+ +void
+ +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ +                   struct vtn_access_chain *dest)
+ +{
+ +   if (vtn_variable_is_external_block(dest->var)) {
+ +      assert(dest->var->mode == vtn_variable_mode_ssbo);
+ +      vtn_block_store(b, src, dest);
+ +   } else {
+ +      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest);
+ +      _vtn_variable_load_store(b, false, dest, tail_type, &src);
+ +   }
+ +}
+ +
+ +static void
+ +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
+ +                   struct vtn_access_chain *src, struct vtn_type *tail_type)
+ +{
+ +   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
+ +   switch (base_type) {
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_BOOL:
+ +      /* At this point, we have a scalar, vector, or matrix so we know that
+ +       * there cannot be any structure splitting still in the way.  By
+ +       * stopping at the matrix level rather than the vector level, we
+ +       * ensure that matrices get loaded in the optimal way even if they
+ +       * are storred row-major in a UBO.
+ +       */
+ +      vtn_variable_store(b, vtn_variable_load(b, src), dest);
+ +      return;
+ +
+ +   case GLSL_TYPE_ARRAY:
+ +   case GLSL_TYPE_STRUCT: {
+ +      struct vtn_access_chain *new_src, *new_dest;
+ +      new_src = vtn_access_chain_extend(b, src, 1);
+ +      new_dest = vtn_access_chain_extend(b, dest, 1);
+ +      new_src->link[src->length].mode = vtn_access_mode_literal;
+ +      new_dest->link[dest->length].mode = vtn_access_mode_literal;
+ +      unsigned elems = glsl_get_length(tail_type->type);
+ +      for (unsigned i = 0; i < elems; i++) {
+ +         new_src->link[src->length].id = i;
+ +         new_dest->link[dest->length].id = i;
+ +         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
+ +            tail_type->array_element : tail_type->members[i];
+ +         _vtn_variable_copy(b, new_dest, new_src, elem_type);
+ +      }
+ +      return;
+ +   }
+ +
+ +   default:
+ +      unreachable("Invalid access chain type");
+ +   }
+ +}
+ +
+ +static void
+ +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
+ +                  struct vtn_access_chain *src)
+ +{
+ +   struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
+ +   assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type);
+ +
+ +   /* TODO: At some point, we should add a special-case for when we can
+ +    * just emit a copy_var intrinsic.
+ +    */
+ +   _vtn_variable_copy(b, dest, src, tail_type);
+ +}
+ +
+ +static void
+ +set_mode_system_value(nir_variable_mode *mode)
+ +{
+ +   assert(*mode == nir_var_system_value || *mode == nir_var_shader_in);
+ +   *mode = nir_var_system_value;
+ +}
+ +
+ +static void
+ +vtn_get_builtin_location(struct vtn_builder *b,
+ +                         SpvBuiltIn builtin, int *location,
+ +                         nir_variable_mode *mode)
+ +{
+ +   switch (builtin) {
+ +   case SpvBuiltInPosition:
+ +      *location = VARYING_SLOT_POS;
+ +      break;
+ +   case SpvBuiltInPointSize:
+ +      *location = VARYING_SLOT_PSIZ;
+ +      break;
+ +   case SpvBuiltInClipDistance:
+ +      *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */
+ +      break;
+ +   case SpvBuiltInCullDistance:
+ +      /* XXX figure this out */
+ +      break;
+ +   case SpvBuiltInVertexIndex:
+ +      *location = SYSTEM_VALUE_VERTEX_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInVertexId:
+ +      /* Vulkan defines VertexID to be zero-based and reserves the new
+ +       * builtin keyword VertexIndex to indicate the non-zero-based value.
+ +       */
+ +      *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInInstanceIndex:
+ +      *location = SYSTEM_VALUE_INSTANCE_INDEX;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInInstanceId:
+ +      *location = SYSTEM_VALUE_INSTANCE_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInPrimitiveId:
+ +      *location = VARYING_SLOT_PRIMITIVE_ID;
+ +      *mode = nir_var_shader_out;
+ +      break;
+ +   case SpvBuiltInInvocationId:
+ +      *location = SYSTEM_VALUE_INVOCATION_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInLayer:
+ +      *location = VARYING_SLOT_LAYER;
+ +      *mode = nir_var_shader_out;
+ +      break;
+ +   case SpvBuiltInViewportIndex:
+ +      *location = VARYING_SLOT_VIEWPORT;
+ +      if (b->shader->stage == MESA_SHADER_GEOMETRY)
+ +         *mode = nir_var_shader_out;
+ +      else if (b->shader->stage == MESA_SHADER_FRAGMENT)
+ +         *mode = nir_var_shader_in;
+ +      else
+ +         unreachable("invalid stage for SpvBuiltInViewportIndex");
+ +      break;
+ +   case SpvBuiltInTessLevelOuter:
+ +   case SpvBuiltInTessLevelInner:
+ +   case SpvBuiltInTessCoord:
+ +   case SpvBuiltInPatchVertices:
+ +      unreachable("no tessellation support");
+ +   case SpvBuiltInFragCoord:
+ +      *location = VARYING_SLOT_POS;
+ +      assert(*mode == nir_var_shader_in);
+ +      break;
+ +   case SpvBuiltInPointCoord:
+ +      *location = VARYING_SLOT_PNTC;
+ +      assert(*mode == nir_var_shader_in);
+ +      break;
+ +   case SpvBuiltInFrontFacing:
+ +      *location = VARYING_SLOT_FACE;
+ +      assert(*mode == nir_var_shader_in);
+ +      break;
+ +   case SpvBuiltInSampleId:
+ +      *location = SYSTEM_VALUE_SAMPLE_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInSamplePosition:
+ +      *location = SYSTEM_VALUE_SAMPLE_POS;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInSampleMask:
+ +      *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInFragDepth:
+ +      *location = FRAG_RESULT_DEPTH;
+ +      assert(*mode == nir_var_shader_out);
+ +      break;
+ +   case SpvBuiltInNumWorkgroups:
+ +      *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInWorkgroupSize:
+ +      /* This should already be handled */
+ +      unreachable("unsupported builtin");
+ +      break;
+ +   case SpvBuiltInWorkgroupId:
+ +      *location = SYSTEM_VALUE_WORK_GROUP_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInLocalInvocationId:
+ +      *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInLocalInvocationIndex:
+ +      *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInGlobalInvocationId:
+ +      *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
+ +      set_mode_system_value(mode);
+ +      break;
+ +   case SpvBuiltInHelperInvocation:
+ +   default:
+ +      unreachable("unsupported builtin");
+ +   }
+ +}
+ +
+ +static void
+ +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
+ +                  const struct vtn_decoration *dec, void *void_var)
+ +{
+ +   struct vtn_variable *vtn_var = void_var;
+ +
+ +   /* Handle decorations that apply to a vtn_variable as a whole */
+ +   switch (dec->decoration) {
+ +   case SpvDecorationBinding:
+ +      vtn_var->binding = dec->literals[0];
+ +      return;
+ +   case SpvDecorationDescriptorSet:
+ +      vtn_var->descriptor_set = dec->literals[0];
+ +      return;
+ +
+ +   case SpvDecorationLocation: {
+ +      unsigned location = dec->literals[0];
+ +      bool is_vertex_input;
+ +      if (b->shader->stage == MESA_SHADER_FRAGMENT &&
+ +          vtn_var->mode == vtn_variable_mode_output) {
+ +         is_vertex_input = false;
+ +         location += FRAG_RESULT_DATA0;
+ +      } else if (b->shader->stage == MESA_SHADER_VERTEX &&
+ +                 vtn_var->mode == vtn_variable_mode_input) {
+ +         is_vertex_input = true;
+ +         location += VERT_ATTRIB_GENERIC0;
+ +      } else if (vtn_var->mode == vtn_variable_mode_input ||
+ +                 vtn_var->mode == vtn_variable_mode_output) {
+ +         is_vertex_input = false;
+ +         location += VARYING_SLOT_VAR0;
+ +      } else {
+ +         assert(!"Location must be on input or output variable");
+ +      }
+ +
+ +      if (vtn_var->var) {
+ +         vtn_var->var->data.location = location;
+ +         vtn_var->var->data.explicit_location = true;
+ +      } else {
+ +         assert(vtn_var->members);
+ +         unsigned length = glsl_get_length(vtn_var->type->type);
+ +         for (unsigned i = 0; i < length; i++) {
+ +            vtn_var->members[i]->data.location = location;
+ +            vtn_var->members[i]->data.explicit_location = true;
+ +            location +=
+ +               glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
+ +                                          is_vertex_input);
+ +         }
+ +      }
+ +      return;
+ +   }
+ +
+ +   default:
+ +      break;
+ +   }
+ +
+ +   /* Now we handle decorations that apply to a particular nir_variable */
+ +   nir_variable *nir_var = vtn_var->var;
+ +   if (val->value_type == vtn_value_type_access_chain) {
+ +      assert(val->access_chain->length == 0);
+ +      assert(val->access_chain->var == void_var);
+ +      assert(member == -1);
+ +   } else {
+ +      assert(val->value_type == vtn_value_type_type);
+ +      if (member != -1)
+ +         nir_var = vtn_var->members[member];
+ +   }
+ +
+ +   if (nir_var == NULL)
+ +      return;
+ +
+ +   switch (dec->decoration) {
+ +   case SpvDecorationRelaxedPrecision:
+ +      break; /* FIXME: Do nothing with this for now. */
+ +   case SpvDecorationNoPerspective:
+ +      nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ +      break;
+ +   case SpvDecorationFlat:
+ +      nir_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ +      break;
+ +   case SpvDecorationCentroid:
+ +      nir_var->data.centroid = true;
+ +      break;
+ +   case SpvDecorationSample:
+ +      nir_var->data.sample = true;
+ +      break;
+ +   case SpvDecorationInvariant:
+ +      nir_var->data.invariant = true;
+ +      break;
+ +   case SpvDecorationConstant:
+ +      assert(nir_var->constant_initializer != NULL);
+ +      nir_var->data.read_only = true;
+ +      break;
+ +   case SpvDecorationNonWritable:
+ +      nir_var->data.read_only = true;
+ +      break;
+ +   case SpvDecorationComponent:
+ +      nir_var->data.location_frac = dec->literals[0];
+ +      break;
+ +   case SpvDecorationIndex:
+ +      nir_var->data.explicit_index = true;
+ +      nir_var->data.index = dec->literals[0];
+ +      break;
+ +   case SpvDecorationBuiltIn: {
+ +      SpvBuiltIn builtin = dec->literals[0];
+ +
+ +      if (builtin == SpvBuiltInWorkgroupSize) {
+ +         /* This shouldn't be a builtin.  It's actually a constant. */
+ +         nir_var->data.mode = nir_var_global;
+ +         nir_var->data.read_only = true;
+ +
+ +         nir_constant *c = rzalloc(nir_var, nir_constant);
+ +         c->value.u[0] = b->shader->info.cs.local_size[0];
+ +         c->value.u[1] = b->shader->info.cs.local_size[1];
+ +         c->value.u[2] = b->shader->info.cs.local_size[2];
+ +         nir_var->constant_initializer = c;
+ +         break;
+ +      }
+ +
+ +      nir_variable_mode mode = nir_var->data.mode;
+ +      vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode);
+ +      nir_var->data.explicit_location = true;
+ +      nir_var->data.mode = mode;
+ +
+ +      if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition)
+ +         nir_var->data.origin_upper_left = b->origin_upper_left;
+ +      break;
+ +   }
+ +   case SpvDecorationRowMajor:
+ +   case SpvDecorationColMajor:
+ +   case SpvDecorationGLSLShared:
+ +   case SpvDecorationPatch:
+ +   case SpvDecorationRestrict:
+ +   case SpvDecorationAliased:
+ +   case SpvDecorationVolatile:
+ +   case SpvDecorationCoherent:
+ +   case SpvDecorationNonReadable:
+ +   case SpvDecorationUniform:
+ +      /* This is really nice but we have no use for it right now. */
+ +   case SpvDecorationCPacked:
+ +   case SpvDecorationSaturatedConversion:
+ +   case SpvDecorationStream:
+ +   case SpvDecorationOffset:
+ +   case SpvDecorationXfbBuffer:
+ +   case SpvDecorationFuncParamAttr:
+ +   case SpvDecorationFPRoundingMode:
+ +   case SpvDecorationFPFastMathMode:
+ +   case SpvDecorationLinkageAttributes:
+ +   case SpvDecorationSpecId:
+ +      break;
+ +   default:
+ +      unreachable("Unhandled variable decoration");
+ +   }
+ +}
+ +
+ +/* Tries to compute the size of an interface block based on the strides and
+ + * offsets that are provided to us in the SPIR-V source.
+ + */
+ +static unsigned
+ +vtn_type_block_size(struct vtn_type *type)
+ +{
+ +   enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ +   switch (base_type) {
+ +   case GLSL_TYPE_UINT:
+ +   case GLSL_TYPE_INT:
+ +   case GLSL_TYPE_FLOAT:
+ +   case GLSL_TYPE_BOOL:
+ +   case GLSL_TYPE_DOUBLE: {
+ +      unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
+ +                                        glsl_get_matrix_columns(type->type);
+ +      if (cols > 1) {
+ +         assert(type->stride > 0);
+ +         return type->stride * cols;
+ +      } else if (base_type == GLSL_TYPE_DOUBLE) {
+ +         return glsl_get_vector_elements(type->type) * 8;
+ +      } else {
+ +         return glsl_get_vector_elements(type->type) * 4;
+ +      }
+ +   }
+ +
+ +   case GLSL_TYPE_STRUCT:
+ +   case GLSL_TYPE_INTERFACE: {
+ +      unsigned size = 0;
+ +      unsigned num_fields = glsl_get_length(type->type);
+ +      for (unsigned f = 0; f < num_fields; f++) {
+ +         unsigned field_end = type->offsets[f] +
+ +                              vtn_type_block_size(type->members[f]);
+ +         size = MAX2(size, field_end);
+ +      }
+ +      return size;
+ +   }
+ +
+ +   case GLSL_TYPE_ARRAY:
+ +      assert(type->stride > 0);
+ +      assert(glsl_get_length(type->type) > 0);
+ +      return type->stride * glsl_get_length(type->type);
+ +
+ +   default:
+ +      assert(!"Invalid block type");
+ +      return 0;
+ +   }
+ +}
+ +
+ +void
+ +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
+ +                     const uint32_t *w, unsigned count)
+ +{
+ +   switch (opcode) {
+ +   case SpvOpVariable: {
+ +      struct vtn_variable *var = rzalloc(b, struct vtn_variable);
+ +      var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ +
+ +      var->chain.var = var;
+ +      var->chain.length = 0;
+ +
+ +      struct vtn_value *val =
+ +         vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ +      val->access_chain = &var->chain;
+ +
+ +      struct vtn_type *without_array = var->type;
+ +      while(glsl_type_is_array(without_array->type))
+ +         without_array = without_array->array_element;
+ +
+ +      nir_variable_mode nir_mode;
+ +      switch ((SpvStorageClass)w[3]) {
+ +      case SpvStorageClassUniform:
+ +      case SpvStorageClassUniformConstant:
+ +         if (without_array->block) {
+ +            var->mode = vtn_variable_mode_ubo;
+ +            b->shader->info.num_ubos++;
+ +         } else if (without_array->buffer_block) {
+ +            var->mode = vtn_variable_mode_ssbo;
+ +            b->shader->info.num_ssbos++;
+ +         } else if (glsl_type_is_image(without_array->type)) {
+ +            var->mode = vtn_variable_mode_image;
+ +            nir_mode = nir_var_uniform;
+ +            b->shader->info.num_images++;
+ +         } else if (glsl_type_is_sampler(without_array->type)) {
+ +            var->mode = vtn_variable_mode_sampler;
+ +            nir_mode = nir_var_uniform;
+ +            b->shader->info.num_textures++;
+ +         } else {
+ +            assert(!"Invalid uniform variable type");
+ +         }
+ +         break;
+ +      case SpvStorageClassPushConstant:
+ +         var->mode = vtn_variable_mode_push_constant;
+ +         assert(b->shader->num_uniforms == 0);
+ +         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+ +         break;
+ +      case SpvStorageClassInput:
+ +         var->mode = vtn_variable_mode_input;
+ +         nir_mode = nir_var_shader_in;
+ +         break;
+ +      case SpvStorageClassOutput:
+ +         var->mode = vtn_variable_mode_output;
+ +         nir_mode = nir_var_shader_out;
+ +         break;
+ +      case SpvStorageClassPrivate:
+ +         var->mode = vtn_variable_mode_global;
+ +         nir_mode = nir_var_global;
+ +         break;
+ +      case SpvStorageClassFunction:
+ +         var->mode = vtn_variable_mode_local;
+ +         nir_mode = nir_var_local;
+ +         break;
+ +      case SpvStorageClassWorkgroup:
+ +         var->mode = vtn_variable_mode_workgroup;
+ +         nir_mode = nir_var_shared;
+ +         break;
+ +      case SpvStorageClassCrossWorkgroup:
+ +      case SpvStorageClassGeneric:
+ +      case SpvStorageClassAtomicCounter:
+ +      default:
+ +         unreachable("Unhandled variable storage class");
+ +      }
+ +
+ +      switch (var->mode) {
+ +      case vtn_variable_mode_local:
+ +      case vtn_variable_mode_global:
+ +      case vtn_variable_mode_image:
+ +      case vtn_variable_mode_sampler:
+ +      case vtn_variable_mode_workgroup:
+ +         /* For these, we create the variable normally */
+ +         var->var = rzalloc(b->shader, nir_variable);
+ +         var->var->name = ralloc_strdup(var->var, val->name);
+ +         var->var->type = var->type->type;
+ +         var->var->data.mode = nir_mode;
+ +
+ +         switch (var->mode) {
+ +         case vtn_variable_mode_image:
+ +         case vtn_variable_mode_sampler:
+ +            var->var->interface_type = without_array->type;
+ +            break;
+ +         default:
+ +            var->var->interface_type = NULL;
+ +            break;
+ +         }
+ +         break;
+ +
+ +      case vtn_variable_mode_input:
+ +      case vtn_variable_mode_output: {
+ +         /* For inputs and outputs, we immediately split structures.  This
+ +          * is for a couple of reasons.  For one, builtins may all come in
+ +          * a struct and we really want those split out into separate
+ +          * variables.  For another, interpolation qualifiers can be
+ +          * applied to members of the top-level struct ane we need to be
+ +          * able to preserve that information.
+ +          */
+ +
+ +         int array_length = -1;
+ +         struct vtn_type *interface_type = var->type;
+ +         if (b->shader->stage == MESA_SHADER_GEOMETRY &&
+ +             glsl_type_is_array(var->type->type)) {
+ +            /* In Geometry shaders (and some tessellation), inputs come
+ +             * in per-vertex arrays.  However, some builtins come in
+ +             * non-per-vertex, hence the need for the is_array check.  In
+ +             * any case, there are no non-builtin arrays allowed so this
+ +             * check should be sufficient.
+ +             */
+ +            interface_type = var->type->array_element;
+ +            array_length = glsl_get_length(var->type->type);
+ +         }
+ +
+ +         if (glsl_type_is_struct(interface_type->type)) {
+ +            /* It's a struct.  Split it. */
+ +            unsigned num_members = glsl_get_length(interface_type->type);
+ +            var->members = ralloc_array(b, nir_variable *, num_members);
+ +
+ +            for (unsigned i = 0; i < num_members; i++) {
+ +               const struct glsl_type *mtype = interface_type->members[i]->type;
+ +               if (array_length >= 0)
+ +                  mtype = glsl_array_type(mtype, array_length);
+ +
+ +               var->members[i] = rzalloc(b->shader, nir_variable);
+ +               var->members[i]->name =
+ +                  ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
+ +               var->members[i]->type = mtype;
+ +               var->members[i]->interface_type =
+ +                  interface_type->members[i]->type;
+ +               var->members[i]->data.mode = nir_mode;
+ +            }
+ +         } else {
+ +            var->var = rzalloc(b->shader, nir_variable);
+ +            var->var->name = ralloc_strdup(var->var, val->name);
+ +            var->var->type = var->type->type;
+ +            var->var->interface_type = interface_type->type;
+ +            var->var->data.mode = nir_mode;
+ +         }
+ +
+ +         /* For inputs and outputs, we need to grab locations and builtin
+ +          * information from the interface type.
+ +          */
+ +         vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
+ +         break;
+ +
+ +      case vtn_variable_mode_param:
+ +         unreachable("Not created through OpVariable");
+ +      }
+ +
+ +      case vtn_variable_mode_ubo:
+ +      case vtn_variable_mode_ssbo:
+ +      case vtn_variable_mode_push_constant:
+ +         /* These don't need actual variables. */
+ +         break;
+ +      }
+ +
+ +      if (count > 4) {
+ +         assert(count == 5);
+ +         nir_constant *constant =
+ +            vtn_value(b, w[4], vtn_value_type_constant)->constant;
+ +         var->var->constant_initializer =
+ +            nir_constant_clone(constant, var->var);
+ +      }
+ +
+ +      vtn_foreach_decoration(b, val, var_decoration_cb, var);
+ +
+ +      if (var->mode == vtn_variable_mode_image ||
+ +          var->mode == vtn_variable_mode_sampler) {
+ +         /* XXX: We still need the binding information in the nir_variable
+ +          * for these. We should fix that.
+ +          */
+ +         var->var->data.binding = var->binding;
+ +         var->var->data.descriptor_set = var->descriptor_set;
+ +
+ +         if (var->mode == vtn_variable_mode_image)
+ +            var->var->data.image.format = without_array->image_format;
+ +      }
+ +
+ +      if (var->mode == vtn_variable_mode_local) {
+ +         assert(var->members == NULL && var->var != NULL);
+ +         nir_function_impl_add_variable(b->impl, var->var);
+ +      } else if (var->var) {
+ +         nir_shader_add_variable(b->shader, var->var);
+ +      } else if (var->members) {
+ +         unsigned count = glsl_get_length(without_array->type);
+ +         for (unsigned i = 0; i < count; i++) {
+ +            assert(var->members[i]->data.mode != nir_var_local);
+ +            nir_shader_add_variable(b->shader, var->members[i]);
+ +         }
+ +      } else {
+ +         assert(var->mode == vtn_variable_mode_ubo ||
+ +                var->mode == vtn_variable_mode_ssbo ||
+ +                var->mode == vtn_variable_mode_push_constant);
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpAccessChain:
+ +   case SpvOpInBoundsAccessChain: {
+ +      struct vtn_access_chain *base, *chain;
+ +      struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
+ +      if (base_val->value_type == vtn_value_type_sampled_image) {
+ +         /* This is rather insane.  SPIR-V allows you to use OpSampledImage
+ +          * to combine an array of images with a single sampler to get an
+ +          * array of sampled images that all share the same sampler.
+ +          * Fortunately, this means that we can more-or-less ignore the
+ +          * sampler when crawling the access chain, but it does leave us
+ +          * with this rather awkward little special-case.
+ +          */
+ +         base = base_val->sampled_image->image;
+ +      } else {
+ +         assert(base_val->value_type == vtn_value_type_access_chain);
+ +         base = base_val->access_chain;
+ +      }
+ +
+ +      chain = vtn_access_chain_extend(b, base, count - 4);
+ +
+ +      unsigned idx = base->length;
+ +      for (int i = 4; i < count; i++) {
+ +         struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
+ +         if (link_val->value_type == vtn_value_type_constant) {
+ +            chain->link[idx].mode = vtn_access_mode_literal;
+ +            chain->link[idx].id = link_val->constant->value.u[0];
+ +         } else {
+ +            chain->link[idx].mode = vtn_access_mode_id;
+ +            chain->link[idx].id = w[i];
+ +         }
+ +         idx++;
+ +      }
+ +
+ +      if (base_val->value_type == vtn_value_type_sampled_image) {
+ +         struct vtn_value *val =
+ +            vtn_push_value(b, w[2], vtn_value_type_sampled_image);
+ +         val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ +         val->sampled_image->image = chain;
+ +         val->sampled_image->sampler = base_val->sampled_image->sampler;
+ +      } else {
+ +         struct vtn_value *val =
+ +            vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ +         val->access_chain = chain;
+ +      }
+ +      break;
+ +   }
+ +
+ +   case SpvOpCopyMemory: {
+ +      struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain);
+ +      struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain);
+ +
+ +      vtn_variable_copy(b, dest->access_chain, src->access_chain);
+ +      break;
+ +   }
+ +
+ +   case SpvOpLoad: {
+ +      struct vtn_access_chain *src =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +
+ +      if (src->var->mode == vtn_variable_mode_image ||
+ +          src->var->mode == vtn_variable_mode_sampler) {
+ +         vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src;
+ +         return;
+ +      }
+ +
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      val->ssa = vtn_variable_load(b, src);
+ +      break;
+ +   }
+ +
+ +   case SpvOpStore: {
+ +      struct vtn_access_chain *dest =
+ +         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
+ +      struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
+ +      vtn_variable_store(b, src, dest);
+ +      break;
+ +   }
+ +
+ +   case SpvOpArrayLength: {
+ +      struct vtn_access_chain *chain =
+ +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ +
+ +      const uint32_t offset = chain->var->type->offsets[w[4]];
+ +      const uint32_t stride = chain->var->type->members[w[4]]->stride;
+ +
+ +      unsigned chain_idx;
+ +      struct vtn_type *type;
+ +      nir_ssa_def *index =
+ +         get_vulkan_resource_index(b, chain, &type, &chain_idx);
+ +
+ +      nir_intrinsic_instr *instr =
+ +         nir_intrinsic_instr_create(b->nb.shader,
+ +                                    nir_intrinsic_get_buffer_size);
+ +      instr->src[0] = nir_src_for_ssa(index);
++      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+ +      nir_builder_instr_insert(&b->nb, &instr->instr);
+ +      nir_ssa_def *buf_size = &instr->dest.ssa;
+ +
+ +      /* array_length = max(buffer_size - offset, 0) / stride */
+ +      nir_ssa_def *array_length =
+ +         nir_idiv(&b->nb,
+ +                  nir_imax(&b->nb,
+ +                           nir_isub(&b->nb,
+ +                                    buf_size,
+ +                                    nir_imm_int(&b->nb, offset)),
+ +                           nir_imm_int(&b->nb, 0u)),
+ +                  nir_imm_int(&b->nb, stride));
+ +
+ +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ +      val->ssa = vtn_create_ssa_value(b, glsl_uint_type());
+ +      val->ssa->def = array_length;
+ +      break;
+ +   }
+ +
+ +   case SpvOpCopyMemorySized:
+ +   default:
+ +      unreachable("Unhandled opcode");
+ +   }
+ +}
diff --cc src/compiler/nir_types.h
Simple merge
diff --cc src/intel/vulkan/anv_meta_blit.c

index e23b697,0000000..218499a

mode 100644,000000..100644
--- 1/src/intel/vulkan/anv_meta_blit.c
--- /dev/null
+++ b/src/intel/vulkan/anv_meta_blit.c
@@@ -1,748 -1,0 +1,748 @@@
-    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_meta.h"
+ +#include "nir/nir_builder.h"
+ +
+ +struct blit_region {
+ +   VkOffset3D src_offset;
+ +   VkExtent3D src_extent;
+ +   VkOffset3D dest_offset;
+ +   VkExtent3D dest_extent;
+ +};
+ +
+ +static nir_shader *
+ +build_nir_vertex_shader(void)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +   nir_builder b;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+ +
+ +   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                              vec4, "a_pos");
+ +   pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ +   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                               vec4, "gl_Position");
+ +   pos_out->data.location = VARYING_SLOT_POS;
+ +   nir_copy_var(&b, pos_out, pos_in);
+ +
+ +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                                  vec4, "a_tex_pos");
+ +   tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
+ +   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                   vec4, "v_tex_pos");
+ +   tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ +   tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ +   nir_copy_var(&b, tex_pos_out, tex_pos_in);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static nir_shader *
+ +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +   nir_builder b;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
+ +
+ +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                                  vec4, "v_tex_pos");
+ +   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ +
+ +   /* Swizzle the array index which comes in as Z coordinate into the right
+ +    * position.
+ +    */
+ +   unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
+ +   nir_ssa_def *const tex_pos =
+ +      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
+ +                  (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+ +
+ +   const struct glsl_type *sampler_type =
+ +      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ +                        glsl_get_base_type(vec4));
+ +   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ +                                               sampler_type, "s_tex");
+ +   sampler->data.descriptor_set = 0;
+ +   sampler->data.binding = 0;
+ +
+ +   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+ +   tex->sampler_dim = tex_dim;
+ +   tex->op = nir_texop_tex;
+ +   tex->src[0].src_type = nir_tex_src_coord;
+ +   tex->src[0].src = nir_src_for_ssa(tex_pos);
+ +   tex->dest_type = nir_type_float; /* TODO */
+ +   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ +   tex->coord_components = tex_pos->num_components;
+ +   tex->texture = nir_deref_var_create(tex, sampler);
+ +   tex->sampler = nir_deref_var_create(tex, sampler);
+ +
++   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ +   nir_builder_instr_insert(&b, &tex->instr);
+ +
+ +   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                 vec4, "f_color");
+ +   color_out->data.location = FRAG_RESULT_DATA0;
+ +   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static void
+ +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
+ +                  struct anv_meta_saved_state *saved_state)
+ +{
+ +   anv_meta_save(saved_state, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
+ +}
+ +
+ +static void
+ +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
+ +               struct anv_image *src_image,
+ +               struct anv_image_view *src_iview,
+ +               VkOffset3D src_offset,
+ +               VkExtent3D src_extent,
+ +               struct anv_image *dest_image,
+ +               struct anv_image_view *dest_iview,
+ +               VkOffset3D dest_offset,
+ +               VkExtent3D dest_extent,
+ +               VkFilter blit_filter)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +
+ +   struct blit_vb_data {
+ +      float pos[2];
+ +      float tex_coord[3];
+ +   } *vb_data;
+ +
+ +   assert(src_image->samples == dest_image->samples);
+ +
+ +   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+ +
+ +   struct anv_state vb_state =
+ +      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ +   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ +   vb_data = vb_state.map + sizeof(struct anv_vue_header);
+ +
+ +   vb_data[0] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x + dest_extent.width,
+ +         dest_offset.y + dest_extent.height,
+ +      },
+ +      .tex_coord = {
+ +         (float)(src_offset.x + src_extent.width)
+ +            / (float)src_iview->extent.width,
+ +         (float)(src_offset.y + src_extent.height)
+ +            / (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   vb_data[1] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y + dest_extent.height,
+ +      },
+ +      .tex_coord = {
+ +         (float)src_offset.x / (float)src_iview->extent.width,
+ +         (float)(src_offset.y + src_extent.height) /
+ +            (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   vb_data[2] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y,
+ +      },
+ +      .tex_coord = {
+ +         (float)src_offset.x / (float)src_iview->extent.width,
+ +         (float)src_offset.y / (float)src_iview->extent.height,
+ +         (float)src_offset.z / (float)src_iview->extent.depth,
+ +      },
+ +   };
+ +
+ +   anv_state_clflush(vb_state);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = vb_size,
+ +      .bo = &device->dynamic_state_block_pool.bo,
+ +      .offset = vb_state.offset,
+ +   };
+ +
+ +   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ +      (VkBuffer[]) {
+ +         anv_buffer_to_handle(&vertex_buffer),
+ +         anv_buffer_to_handle(&vertex_buffer)
+ +      },
+ +      (VkDeviceSize[]) {
+ +         0,
+ +         sizeof(struct anv_vue_header),
+ +      });
+ +
+ +   VkSampler sampler;
+ +   ANV_CALL(CreateSampler)(anv_device_to_handle(device),
+ +      &(VkSamplerCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ +         .magFilter = blit_filter,
+ +         .minFilter = blit_filter,
+ +      }, &cmd_buffer->pool->alloc, &sampler);
+ +
+ +   VkDescriptorPool desc_pool;
+ +   anv_CreateDescriptorPool(anv_device_to_handle(device),
+ +      &(const VkDescriptorPoolCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ +         .pNext = NULL,
+ +         .flags = 0,
+ +         .maxSets = 1,
+ +         .poolSizeCount = 1,
+ +         .pPoolSizes = (VkDescriptorPoolSize[]) {
+ +            {
+ +               .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +               .descriptorCount = 1
+ +            },
+ +         }
+ +      }, &cmd_buffer->pool->alloc, &desc_pool);
+ +
+ +   VkDescriptorSet set;
+ +   anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ +      &(VkDescriptorSetAllocateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ +         .descriptorPool = desc_pool,
+ +         .descriptorSetCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit.ds_layout
+ +      }, &set);
+ +
+ +   anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ +      1, /* writeCount */
+ +      (VkWriteDescriptorSet[]) {
+ +         {
+ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ +            .dstSet = set,
+ +            .dstBinding = 0,
+ +            .dstArrayElement = 0,
+ +            .descriptorCount = 1,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .pImageInfo = (VkDescriptorImageInfo[]) {
+ +               {
+ +                  .sampler = sampler,
+ +                  .imageView = anv_image_view_to_handle(src_iview),
+ +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +               },
+ +            }
+ +         }
+ +      }, 0, NULL);
+ +
+ +   VkFramebuffer fb;
+ +   anv_CreateFramebuffer(anv_device_to_handle(device),
+ +      &(VkFramebufferCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkImageView[]) {
+ +            anv_image_view_to_handle(dest_iview),
+ +         },
+ +         .width = dest_iview->extent.width,
+ +         .height = dest_iview->extent.height,
+ +         .layers = 1
+ +      }, &cmd_buffer->pool->alloc, &fb);
+ +
+ +   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ +      &(VkRenderPassBeginInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +         .renderPass = device->meta_state.blit.render_pass,
+ +         .framebuffer = fb,
+ +         .renderArea = {
+ +            .offset = { dest_offset.x, dest_offset.y },
+ +            .extent = { dest_extent.width, dest_extent.height },
+ +         },
+ +         .clearValueCount = 0,
+ +         .pClearValues = NULL,
+ +      }, VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +   VkPipeline pipeline;
+ +
+ +   switch (src_image->type) {
+ +   case VK_IMAGE_TYPE_1D:
+ +      pipeline = device->meta_state.blit.pipeline_1d_src;
+ +      break;
+ +   case VK_IMAGE_TYPE_2D:
+ +      pipeline = device->meta_state.blit.pipeline_2d_src;
+ +      break;
+ +   case VK_IMAGE_TYPE_3D:
+ +      pipeline = device->meta_state.blit.pipeline_3d_src;
+ +      break;
+ +   default:
+ +      unreachable(!"bad VkImageType");
+ +   }
+ +
+ +   if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ +      anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ +   }
+ +
+ +   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ +                      &(VkViewport) {
+ +                        .x = 0.0f,
+ +                        .y = 0.0f,
+ +                        .width = dest_iview->extent.width,
+ +                        .height = dest_iview->extent.height,
+ +                        .minDepth = 0.0f,
+ +                        .maxDepth = 1.0f,
+ +                      });
+ +
+ +   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                             VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                             device->meta_state.blit.pipeline_layout, 0, 1,
+ +                             &set, 0, NULL);
+ +
+ +   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ +
+ +   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+ +
+ +   /* At the point where we emit the draw call, all data from the
+ +    * descriptor sets, etc. has been used.  We are free to delete it.
+ +    */
+ +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ +                             desc_pool, &cmd_buffer->pool->alloc);
+ +   anv_DestroySampler(anv_device_to_handle(device), sampler,
+ +                      &cmd_buffer->pool->alloc);
+ +   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ +                          &cmd_buffer->pool->alloc);
+ +}
+ +
+ +static void
+ +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
+ +                 const struct anv_meta_saved_state *saved_state)
+ +{
+ +   anv_meta_restore(saved_state, cmd_buffer);
+ +}
+ +
+ +void anv_CmdBlitImage(
+ +    VkCommandBuffer                             commandBuffer,
+ +    VkImage                                     srcImage,
+ +    VkImageLayout                               srcImageLayout,
+ +    VkImage                                     destImage,
+ +    VkImageLayout                               destImageLayout,
+ +    uint32_t                                    regionCount,
+ +    const VkImageBlit*                          pRegions,
+ +    VkFilter                                    filter)
+ +
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ +   ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* From the Vulkan 1.0 spec:
+ +    *
+ +    *    vkCmdBlitImage must not be used for multisampled source or
+ +    *    destination images. Use vkCmdResolveImage for this purpose.
+ +    */
+ +   assert(src_image->samples == 1);
+ +   assert(dest_image->samples == 1);
+ +
+ +   meta_prepare_blit(cmd_buffer, &saved_state);
+ +
+ +   for (unsigned r = 0; r < regionCount; r++) {
+ +      struct anv_image_view src_iview;
+ +      anv_image_view_init(&src_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = srcImage,
+ +            .viewType = anv_meta_get_view_type(src_image),
+ +            .format = src_image->vk_format,
+ +            .subresourceRange = {
+ +               .aspectMask = pRegions[r].srcSubresource.aspectMask,
+ +               .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ +               .layerCount = 1
+ +            },
+ +         },
+ +         cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
+ +
+ +      const VkOffset3D dest_offset = {
+ +         .x = pRegions[r].dstOffsets[0].x,
+ +         .y = pRegions[r].dstOffsets[0].y,
+ +         .z = 0,
+ +      };
+ +
+ +      if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+ +          pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+ +          pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+ +          pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+ +         anv_finishme("FINISHME: Allow flipping in blits");
+ +
+ +      const VkExtent3D dest_extent = {
+ +         .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+ +         .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+ +      };
+ +
+ +      const VkExtent3D src_extent = {
+ +         .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+ +         .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+ +      };
+ +
+ +      const uint32_t dest_array_slice =
+ +         anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ +                                  &pRegions[r].dstOffsets[0]);
+ +
+ +      if (pRegions[r].srcSubresource.layerCount > 1)
+ +         anv_finishme("FINISHME: copy multiple array layers");
+ +
+ +      if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
+ +          pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
+ +         anv_finishme("FINISHME: copy multiple depth layers");
+ +
+ +      struct anv_image_view dest_iview;
+ +      anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +         &(VkImageViewCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +            .image = destImage,
+ +            .viewType = anv_meta_get_view_type(dest_image),
+ +            .format = dest_image->vk_format,
+ +            .subresourceRange = {
+ +               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +               .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ +               .levelCount = 1,
+ +               .baseArrayLayer = dest_array_slice,
+ +               .layerCount = 1
+ +            },
+ +         },
+ +         cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+ +
+ +      meta_emit_blit(cmd_buffer,
+ +                     src_image, &src_iview,
+ +                     pRegions[r].srcOffsets[0], src_extent,
+ +                     dest_image, &dest_iview,
+ +                     dest_offset, dest_extent,
+ +                     filter);
+ +   }
+ +
+ +   meta_finish_blit(cmd_buffer, &saved_state);
+ +}
+ +
+ +void
+ +anv_device_finish_meta_blit_state(struct anv_device *device)
+ +{
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit.render_pass,
+ +                         &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_1d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_2d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_3d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit.ds_layout,
+ +                                  &device->meta_state.alloc);
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta_blit_state(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +
+ +   result = anv_CreateRenderPass(anv_device_to_handle(device),
+ +      &(VkRenderPassCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = &(VkAttachmentDescription) {
+ +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +         },
+ +         .subpassCount = 1,
+ +         .pSubpasses = &(VkSubpassDescription) {
+ +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +            .inputAttachmentCount = 0,
+ +            .colorAttachmentCount = 1,
+ +            .pColorAttachments = &(VkAttachmentReference) {
+ +               .attachment = 0,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .pResolveAttachments = NULL,
+ +            .pDepthStencilAttachment = &(VkAttachmentReference) {
+ +               .attachment = VK_ATTACHMENT_UNUSED,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .preserveAttachmentCount = 1,
+ +            .pPreserveAttachments = (uint32_t[]) { 0 },
+ +         },
+ +         .dependencyCount = 0,
+ +      }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
+ +   if (result != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   /* We don't use a vertex shader for blitting, but instead build and pass
+ +    * the VUEs directly to the rasterization backend.  However, we do need
+ +    * to provide GLSL source for the vertex shader so that the compiler
+ +    * does not dead-code our inputs.
+ +    */
+ +   struct anv_shader_module vs = {
+ +      .nir = build_nir_vertex_shader(),
+ +   };
+ +
+ +   struct anv_shader_module fs_1d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
+ +   };
+ +
+ +   struct anv_shader_module fs_2d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ +   };
+ +
+ +   struct anv_shader_module fs_3d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
+ +   };
+ +
+ +   VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +      .vertexBindingDescriptionCount = 2,
+ +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +         {
+ +            .binding = 0,
+ +            .stride = 0,
+ +            .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+ +         },
+ +         {
+ +            .binding = 1,
+ +            .stride = 5 * sizeof(float),
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +      },
+ +      .vertexAttributeDescriptionCount = 3,
+ +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +         {
+ +            /* VUE Header */
+ +            .location = 0,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_UINT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Position */
+ +            .location = 1,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32_SFLOAT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Texture Coordinate */
+ +            .location = 2,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32B32_SFLOAT,
+ +            .offset = 8
+ +         }
+ +      }
+ +   };
+ +
+ +   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ +      .bindingCount = 1,
+ +      .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ +         {
+ +            .binding = 0,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .descriptorCount = 1,
+ +            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +            .pImmutableSamplers = NULL
+ +         },
+ +      }
+ +   };
+ +   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ +                                          &ds_layout_info,
+ +                                          &device->meta_state.alloc,
+ +                                          &device->meta_state.blit.ds_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_render_pass;
+ +
+ +   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ +      &(VkPipelineLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ +         .setLayoutCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit.ds_layout,
+ +      },
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_descriptor_set_layout;
+ +
+ +   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ +      {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +         .module = anv_shader_module_to_handle(&vs),
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      }, {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      },
+ +   };
+ +
+ +   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ +      .pStages = pipeline_shader_stages,
+ +      .pVertexInputState = &vi_create_info,
+ +      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +         .primitiveRestartEnable = false,
+ +      },
+ +      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +         .viewportCount = 1,
+ +         .scissorCount = 1,
+ +      },
+ +      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +         .rasterizerDiscardEnable = false,
+ +         .polygonMode = VK_POLYGON_MODE_FILL,
+ +         .cullMode = VK_CULL_MODE_NONE,
+ +         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ +      },
+ +      .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +         .rasterizationSamples = 1,
+ +         .sampleShadingEnable = false,
+ +         .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ +      },
+ +      .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +            { .colorWriteMask =
+ +                 VK_COLOR_COMPONENT_A_BIT |
+ +                 VK_COLOR_COMPONENT_R_BIT |
+ +                 VK_COLOR_COMPONENT_G_BIT |
+ +                 VK_COLOR_COMPONENT_B_BIT },
+ +         }
+ +      },
+ +      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +         .dynamicStateCount = 9,
+ +         .pDynamicStates = (VkDynamicState[]) {
+ +            VK_DYNAMIC_STATE_VIEWPORT,
+ +            VK_DYNAMIC_STATE_SCISSOR,
+ +            VK_DYNAMIC_STATE_LINE_WIDTH,
+ +            VK_DYNAMIC_STATE_DEPTH_BIAS,
+ +            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ +            VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ +            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ +         },
+ +      },
+ +      .flags = 0,
+ +      .layout = device->meta_state.blit.pipeline_layout,
+ +      .renderPass = device->meta_state.blit.render_pass,
+ +      .subpass = 0,
+ +   };
+ +
+ +   const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ +      .color_attachment_count = -1,
+ +      .use_repclear = false,
+ +      .disable_viewport = true,
+ +      .disable_scissor = true,
+ +      .disable_vs = true,
+ +      .use_rectlist = true
+ +   };
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_layout;
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_1d;
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_2d;
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_1d.nir);
+ +   ralloc_free(fs_2d.nir);
+ +   ralloc_free(fs_3d.nir);
+ +
+ +   return VK_SUCCESS;
+ +
+ + fail_pipeline_2d:
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_2d_src,
+ +                       &device->meta_state.alloc);
+ +
+ + fail_pipeline_1d:
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit.pipeline_1d_src,
+ +                       &device->meta_state.alloc);
+ +
+ + fail_pipeline_layout:
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ + fail_descriptor_set_layout:
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit.ds_layout,
+ +                                  &device->meta_state.alloc);
+ + fail_render_pass:
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit.render_pass,
+ +                         &device->meta_state.alloc);
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_1d.nir);
+ +   ralloc_free(fs_2d.nir);
+ +   ralloc_free(fs_3d.nir);
+ + fail:
+ +   return result;
+ +}
diff --cc src/intel/vulkan/anv_meta_blit2d.c

index 4a0bed1,0000000..87c3358

mode 100644,000000..100644
--- 1/src/intel/vulkan/anv_meta_blit2d.c
--- /dev/null
+++ b/src/intel/vulkan/anv_meta_blit2d.c
@@@ -1,723 -1,0 +1,723 @@@
-    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ +/*
+ + * Copyright © 2016 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_meta.h"
+ +#include "nir/nir_builder.h"
+ +
+ +static VkFormat
+ +vk_format_for_size(int bs)
+ +{
+ +   /* The choice of UNORM and UINT formats is very intentional here.  Most of
+ +    * the time, we want to use a UINT format to avoid any rounding error in
+ +    * the blit.  For stencil blits, R8_UINT is required by the hardware.
+ +    * (It's the only format allowed in conjunction with W-tiling.)  Also we
+ +    * intentionally use the 4-channel formats whenever we can.  This is so
+ +    * that, when we do a RGB <-> RGBX copy, the two formats will line up even
+ +    * though one of them is 3/4 the size of the other.  The choice of UNORM
+ +    * vs. UINT is also very intentional because Haswell doesn't handle 8 or
+ +    * 16-bit RGB UINT formats at all so we have to use UNORM there.
+ +    * Fortunately, the only time we should ever use two different formats in
+ +    * the table below is for RGB -> RGBA blits and so we will never have any
+ +    * UNORM/UINT mismatch.
+ +    */
+ +   switch (bs) {
+ +   case 1: return VK_FORMAT_R8_UINT;
+ +   case 2: return VK_FORMAT_R8G8_UINT;
+ +   case 3: return VK_FORMAT_R8G8B8_UNORM;
+ +   case 4: return VK_FORMAT_R8G8B8A8_UNORM;
+ +   case 6: return VK_FORMAT_R16G16B16_UNORM;
+ +   case 8: return VK_FORMAT_R16G16B16A16_UNORM;
+ +   case 12: return VK_FORMAT_R32G32B32_UINT;
+ +   case 16: return VK_FORMAT_R32G32B32A32_UINT;
+ +   default:
+ +      unreachable("Invalid format block size");
+ +   }
+ +}
+ +
+ +static void
+ +meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ +               struct anv_image_view *src_iview,
+ +               VkOffset3D src_offset,
+ +               struct anv_image_view *dest_iview,
+ +               VkOffset3D dest_offset,
+ +               VkExtent3D extent)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +
+ +   struct blit_vb_data {
+ +      float pos[2];
+ +      float tex_coord[3];
+ +   } *vb_data;
+ +
+ +   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+ +
+ +   struct anv_state vb_state =
+ +      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ +   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ +   vb_data = vb_state.map + sizeof(struct anv_vue_header);
+ +
+ +   vb_data[0] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x + extent.width,
+ +         dest_offset.y + extent.height,
+ +      },
+ +      .tex_coord = {
+ +         src_offset.x + extent.width,
+ +         src_offset.y + extent.height,
+ +         src_offset.z,
+ +      },
+ +   };
+ +
+ +   vb_data[1] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y + extent.height,
+ +      },
+ +      .tex_coord = {
+ +         src_offset.x,
+ +         src_offset.y + extent.height,
+ +         src_offset.z,
+ +      },
+ +   };
+ +
+ +   vb_data[2] = (struct blit_vb_data) {
+ +      .pos = {
+ +         dest_offset.x,
+ +         dest_offset.y,
+ +      },
+ +      .tex_coord = {
+ +         src_offset.x,
+ +         src_offset.y,
+ +         src_offset.z,
+ +      },
+ +   };
+ +
+ +   anv_state_clflush(vb_state);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = vb_size,
+ +      .bo = &device->dynamic_state_block_pool.bo,
+ +      .offset = vb_state.offset,
+ +   };
+ +
+ +   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ +      (VkBuffer[]) {
+ +         anv_buffer_to_handle(&vertex_buffer),
+ +         anv_buffer_to_handle(&vertex_buffer)
+ +      },
+ +      (VkDeviceSize[]) {
+ +         0,
+ +         sizeof(struct anv_vue_header),
+ +      });
+ +
+ +   VkDescriptorPool desc_pool;
+ +   anv_CreateDescriptorPool(anv_device_to_handle(device),
+ +      &(const VkDescriptorPoolCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ +         .pNext = NULL,
+ +         .flags = 0,
+ +         .maxSets = 1,
+ +         .poolSizeCount = 1,
+ +         .pPoolSizes = (VkDescriptorPoolSize[]) {
+ +            {
+ +               .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ +               .descriptorCount = 1
+ +            },
+ +         }
+ +      }, &cmd_buffer->pool->alloc, &desc_pool);
+ +
+ +   VkDescriptorSet set;
+ +   anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ +      &(VkDescriptorSetAllocateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ +         .descriptorPool = desc_pool,
+ +         .descriptorSetCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit2d.ds_layout
+ +      }, &set);
+ +
+ +   anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ +      1, /* writeCount */
+ +      (VkWriteDescriptorSet[]) {
+ +         {
+ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ +            .dstSet = set,
+ +            .dstBinding = 0,
+ +            .dstArrayElement = 0,
+ +            .descriptorCount = 1,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ +            .pImageInfo = (VkDescriptorImageInfo[]) {
+ +               {
+ +                  .sampler = NULL,
+ +                  .imageView = anv_image_view_to_handle(src_iview),
+ +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +               },
+ +            }
+ +         }
+ +      }, 0, NULL);
+ +
+ +   VkFramebuffer fb;
+ +   anv_CreateFramebuffer(anv_device_to_handle(device),
+ +      &(VkFramebufferCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkImageView[]) {
+ +            anv_image_view_to_handle(dest_iview),
+ +         },
+ +         .width = dest_iview->extent.width,
+ +         .height = dest_iview->extent.height,
+ +         .layers = 1
+ +      }, &cmd_buffer->pool->alloc, &fb);
+ +
+ +   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ +      &(VkRenderPassBeginInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +         .renderPass = device->meta_state.blit2d.render_pass,
+ +         .framebuffer = fb,
+ +         .renderArea = {
+ +            .offset = { dest_offset.x, dest_offset.y },
+ +            .extent = { extent.width, extent.height },
+ +         },
+ +         .clearValueCount = 0,
+ +         .pClearValues = NULL,
+ +      }, VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +   VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src;
+ +
+ +   if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ +      anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ +   }
+ +
+ +   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ +                      &(VkViewport) {
+ +                        .x = 0.0f,
+ +                        .y = 0.0f,
+ +                        .width = dest_iview->extent.width,
+ +                        .height = dest_iview->extent.height,
+ +                        .minDepth = 0.0f,
+ +                        .maxDepth = 1.0f,
+ +                      });
+ +
+ +   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ +                             VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                             device->meta_state.blit2d.pipeline_layout, 0, 1,
+ +                             &set, 0, NULL);
+ +
+ +   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ +
+ +   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+ +
+ +   /* At the point where we emit the draw call, all data from the
+ +    * descriptor sets, etc. has been used.  We are free to delete it.
+ +    */
+ +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ +                             desc_pool, &cmd_buffer->pool->alloc);
+ +   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ +                          &cmd_buffer->pool->alloc);
+ +}
+ +
+ +void
+ +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ +                    struct anv_meta_saved_state *save)
+ +{
+ +   anv_meta_restore(save, cmd_buffer);
+ +}
+ +
+ +void
+ +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ +                      struct anv_meta_saved_state *save)
+ +{
+ +   anv_meta_save(save, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
+ +}
+ +
+ +void
+ +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ +                struct anv_meta_blit2d_surf *src,
+ +                struct anv_meta_blit2d_surf *dst,
+ +                unsigned num_rects,
+ +                struct anv_meta_blit2d_rect *rects)
+ +{
+ +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ +   VkFormat src_format = vk_format_for_size(src->bs);
+ +   VkFormat dst_format = vk_format_for_size(dst->bs);
+ +   VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
+ +   VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ +
+ +   for (unsigned r = 0; r < num_rects; ++r) {
+ +
+ +      /* Create VkImages */
+ +      VkImageCreateInfo image_info = {
+ +         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ +         .imageType = VK_IMAGE_TYPE_2D,
+ +         .format = 0, /* TEMPLATE */
+ +         .extent = {
+ +            .width = 0, /* TEMPLATE */
+ +            .height = 0, /* TEMPLATE */
+ +            .depth = 1,
+ +         },
+ +         .mipLevels = 1,
+ +         .arrayLayers = 1,
+ +         .samples = 1,
+ +         .tiling = 0, /* TEMPLATE */
+ +         .usage = 0, /* TEMPLATE */
+ +      };
+ +      struct anv_image_create_info anv_image_info = {
+ +         .vk_info = &image_info,
+ +         .isl_tiling_flags = 0, /* TEMPLATE */
+ +      };
+ +
+ +      /* The image height is the rect height + src/dst y-offset from the
+ +       * tile-aligned base address.
+ +       */
+ +      struct isl_tile_info tile_info;
+ +
+ +      anv_image_info.isl_tiling_flags = 1 << src->tiling;
+ +      image_info.tiling = src->tiling == ISL_TILING_LINEAR ?
+ +                          VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ +      image_info.usage = src_usage;
+ +      image_info.format = src_format,
+ +      isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs,
+ +                          &tile_info);
+ +      image_info.extent.height = rects[r].height +
+ +                                 rects[r].src_y % tile_info.height;
+ +      image_info.extent.width = src->pitch / src->bs;
+ +      VkImage src_image;
+ +      anv_image_create(vk_device, &anv_image_info,
+ +                       &cmd_buffer->pool->alloc, &src_image);
+ +
+ +      anv_image_info.isl_tiling_flags = 1 << dst->tiling;
+ +      image_info.tiling = dst->tiling == ISL_TILING_LINEAR ?
+ +                          VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ +      image_info.usage = dst_usage;
+ +      image_info.format = dst_format,
+ +      isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs,
+ +                          &tile_info);
+ +      image_info.extent.height = rects[r].height +
+ +                                 rects[r].dst_y % tile_info.height;
+ +      image_info.extent.width = dst->pitch / dst->bs;
+ +      VkImage dst_image;
+ +      anv_image_create(vk_device, &anv_image_info,
+ +                       &cmd_buffer->pool->alloc, &dst_image);
+ +
+ +      /* We could use a vk call to bind memory, but that would require
+ +      * creating a dummy memory object etc. so there's really no point.
+ +      */
+ +      anv_image_from_handle(src_image)->bo = src->bo;
+ +      anv_image_from_handle(src_image)->offset = src->base_offset;
+ +      anv_image_from_handle(dst_image)->bo = dst->bo;
+ +      anv_image_from_handle(dst_image)->offset = dst->base_offset;
+ +
+ +      /* Create VkImageViews */
+ +      VkImageViewCreateInfo iview_info = {
+ +         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +         .image = 0, /* TEMPLATE */
+ +         .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ +         .format = 0, /* TEMPLATE */
+ +         .subresourceRange = {
+ +            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +            .baseMipLevel = 0,
+ +            .levelCount = 1,
+ +            .baseArrayLayer = 0,
+ +            .layerCount = 1
+ +         },
+ +      };
+ +      uint32_t img_o = 0;
+ +
+ +      iview_info.image = src_image;
+ +      iview_info.format = src_format;
+ +      VkOffset3D src_offset_el = {0};
+ +      isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
+ +                                                &anv_image_from_handle(src_image)->
+ +                                                   color_surface.isl,
+ +                                                rects[r].src_x,
+ +                                                rects[r].src_y,
+ +                                                &img_o,
+ +                                                (uint32_t*)&src_offset_el.x,
+ +                                                (uint32_t*)&src_offset_el.y);
+ +
+ +      struct anv_image_view src_iview;
+ +      anv_image_view_init(&src_iview, cmd_buffer->device,
+ +         &iview_info, cmd_buffer, img_o, src_usage);
+ +
+ +      iview_info.image = dst_image;
+ +      iview_info.format = dst_format;
+ +      VkOffset3D dst_offset_el = {0};
+ +      isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
+ +                                                &anv_image_from_handle(dst_image)->
+ +                                                   color_surface.isl,
+ +                                                rects[r].dst_x,
+ +                                                rects[r].dst_y,
+ +                                                &img_o,
+ +                                                (uint32_t*)&dst_offset_el.x,
+ +                                                (uint32_t*)&dst_offset_el.y);
+ +      struct anv_image_view dst_iview;
+ +      anv_image_view_init(&dst_iview, cmd_buffer->device,
+ +         &iview_info, cmd_buffer, img_o, dst_usage);
+ +
+ +      /* Perform blit */
+ +      meta_emit_blit2d(cmd_buffer,
+ +                     &src_iview,
+ +                     src_offset_el,
+ +                     &dst_iview,
+ +                     dst_offset_el,
+ +                     (VkExtent3D){rects[r].width, rects[r].height, 1});
+ +
+ +      anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
+ +      anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc);
+ +   }
+ +}
+ +
+ +
+ +static nir_shader *
+ +build_nir_vertex_shader(void)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +   nir_builder b;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+ +
+ +   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                              vec4, "a_pos");
+ +   pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ +   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                               vec4, "gl_Position");
+ +   pos_out->data.location = VARYING_SLOT_POS;
+ +   nir_copy_var(&b, pos_out, pos_in);
+ +
+ +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                                  vec4, "a_tex_pos");
+ +   tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
+ +   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                   vec4, "v_tex_pos");
+ +   tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ +   tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ +   nir_copy_var(&b, tex_pos_out, tex_pos_in);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static nir_shader *
+ +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +   const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ +   nir_builder b;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+ +
+ +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ +                                                  vec3, "v_tex_pos");
+ +   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ +   nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+ +
+ +   const struct glsl_type *sampler_type =
+ +      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ +                        glsl_get_base_type(vec4));
+ +   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ +                                               sampler_type, "s_tex");
+ +   sampler->data.descriptor_set = 0;
+ +   sampler->data.binding = 0;
+ +
+ +   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+ +   tex->sampler_dim = tex_dim;
+ +   tex->op = nir_texop_txf;
+ +   tex->src[0].src_type = nir_tex_src_coord;
+ +   tex->src[0].src = nir_src_for_ssa(tex_pos);
+ +   tex->src[1].src_type = nir_tex_src_lod;
+ +   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ +   tex->dest_type = nir_type_float; /* TODO */
+ +   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ +   tex->coord_components = tex_pos->num_components;
+ +   tex->texture = nir_deref_var_create(tex, sampler);
+ +   tex->sampler = NULL;
+ +
++   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ +   nir_builder_instr_insert(&b, &tex->instr);
+ +
+ +   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ +                                                 vec4, "f_color");
+ +   color_out->data.location = FRAG_RESULT_DATA0;
+ +   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+ +
+ +   return b.shader;
+ +}
+ +
+ +void
+ +anv_device_finish_meta_blit2d_state(struct anv_device *device)
+ +{
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit2d.render_pass,
+ +                         &device->meta_state.alloc);
+ +   anv_DestroyPipeline(anv_device_to_handle(device),
+ +                       device->meta_state.blit2d.pipeline_2d_src,
+ +                       &device->meta_state.alloc);
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit2d.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit2d.ds_layout,
+ +                                  &device->meta_state.alloc);
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta_blit2d_state(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +
+ +   result = anv_CreateRenderPass(anv_device_to_handle(device),
+ +      &(VkRenderPassCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = &(VkAttachmentDescription) {
+ +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +         },
+ +         .subpassCount = 1,
+ +         .pSubpasses = &(VkSubpassDescription) {
+ +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +            .inputAttachmentCount = 0,
+ +            .colorAttachmentCount = 1,
+ +            .pColorAttachments = &(VkAttachmentReference) {
+ +               .attachment = 0,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .pResolveAttachments = NULL,
+ +            .pDepthStencilAttachment = &(VkAttachmentReference) {
+ +               .attachment = VK_ATTACHMENT_UNUSED,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .preserveAttachmentCount = 1,
+ +            .pPreserveAttachments = (uint32_t[]) { 0 },
+ +         },
+ +         .dependencyCount = 0,
+ +      }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
+ +   if (result != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   /* We don't use a vertex shader for blitting, but instead build and pass
+ +    * the VUEs directly to the rasterization backend.  However, we do need
+ +    * to provide GLSL source for the vertex shader so that the compiler
+ +    * does not dead-code our inputs.
+ +    */
+ +   struct anv_shader_module vs = {
+ +      .nir = build_nir_vertex_shader(),
+ +   };
+ +
+ +   struct anv_shader_module fs_2d = {
+ +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ +   };
+ +
+ +   VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +      .vertexBindingDescriptionCount = 2,
+ +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +         {
+ +            .binding = 0,
+ +            .stride = 0,
+ +            .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+ +         },
+ +         {
+ +            .binding = 1,
+ +            .stride = 5 * sizeof(float),
+ +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +         },
+ +      },
+ +      .vertexAttributeDescriptionCount = 3,
+ +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +         {
+ +            /* VUE Header */
+ +            .location = 0,
+ +            .binding = 0,
+ +            .format = VK_FORMAT_R32G32B32A32_UINT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Position */
+ +            .location = 1,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32_SFLOAT,
+ +            .offset = 0
+ +         },
+ +         {
+ +            /* Texture Coordinate */
+ +            .location = 2,
+ +            .binding = 1,
+ +            .format = VK_FORMAT_R32G32B32_SFLOAT,
+ +            .offset = 8
+ +         }
+ +      }
+ +   };
+ +
+ +   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ +      .bindingCount = 1,
+ +      .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ +         {
+ +            .binding = 0,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ +            .descriptorCount = 1,
+ +            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +            .pImmutableSamplers = NULL
+ +         },
+ +      }
+ +   };
+ +   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ +                                          &ds_layout_info,
+ +                                          &device->meta_state.alloc,
+ +                                          &device->meta_state.blit2d.ds_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_render_pass;
+ +
+ +   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ +      &(VkPipelineLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ +         .setLayoutCount = 1,
+ +         .pSetLayouts = &device->meta_state.blit2d.ds_layout,
+ +      },
+ +      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_descriptor_set_layout;
+ +
+ +   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ +      {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +         .module = anv_shader_module_to_handle(&vs),
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      }, {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ +         .pName = "main",
+ +         .pSpecializationInfo = NULL
+ +      },
+ +   };
+ +
+ +   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ +      .pStages = pipeline_shader_stages,
+ +      .pVertexInputState = &vi_create_info,
+ +      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +         .primitiveRestartEnable = false,
+ +      },
+ +      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +         .viewportCount = 1,
+ +         .scissorCount = 1,
+ +      },
+ +      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +         .rasterizerDiscardEnable = false,
+ +         .polygonMode = VK_POLYGON_MODE_FILL,
+ +         .cullMode = VK_CULL_MODE_NONE,
+ +         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ +      },
+ +      .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +         .rasterizationSamples = 1,
+ +         .sampleShadingEnable = false,
+ +         .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ +      },
+ +      .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +            { .colorWriteMask =
+ +                 VK_COLOR_COMPONENT_A_BIT |
+ +                 VK_COLOR_COMPONENT_R_BIT |
+ +                 VK_COLOR_COMPONENT_G_BIT |
+ +                 VK_COLOR_COMPONENT_B_BIT },
+ +         }
+ +      },
+ +      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +         .dynamicStateCount = 9,
+ +         .pDynamicStates = (VkDynamicState[]) {
+ +            VK_DYNAMIC_STATE_VIEWPORT,
+ +            VK_DYNAMIC_STATE_SCISSOR,
+ +            VK_DYNAMIC_STATE_LINE_WIDTH,
+ +            VK_DYNAMIC_STATE_DEPTH_BIAS,
+ +            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ +            VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ +            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ +            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ +         },
+ +      },
+ +      .flags = 0,
+ +      .layout = device->meta_state.blit2d.pipeline_layout,
+ +      .renderPass = device->meta_state.blit2d.render_pass,
+ +      .subpass = 0,
+ +   };
+ +
+ +   const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ +      .color_attachment_count = -1,
+ +      .use_repclear = false,
+ +      .disable_viewport = true,
+ +      .disable_scissor = true,
+ +      .disable_vs = true,
+ +      .use_rectlist = true
+ +   };
+ +
+ +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ +      VK_NULL_HANDLE,
+ +      &vk_pipeline_info, &anv_pipeline_info,
+ +      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src);
+ +   if (result != VK_SUCCESS)
+ +      goto fail_pipeline_layout;
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_2d.nir);
+ +
+ +   return VK_SUCCESS;
+ +
+ + fail_pipeline_layout:
+ +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ +                             device->meta_state.blit2d.pipeline_layout,
+ +                             &device->meta_state.alloc);
+ + fail_descriptor_set_layout:
+ +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ +                                  device->meta_state.blit2d.ds_layout,
+ +                                  &device->meta_state.alloc);
+ + fail_render_pass:
+ +   anv_DestroyRenderPass(anv_device_to_handle(device),
+ +                         device->meta_state.blit2d.render_pass,
+ +                         &device->meta_state.alloc);
+ +
+ +   ralloc_free(vs.nir);
+ +   ralloc_free(fs_2d.nir);
+ + fail:
+ +   return result;
+ +}
diff --cc src/intel/vulkan/anv_meta_resolve.c

index f50af52,0000000..3e7c7d3

mode 100644,000000..100644
--- 1/src/intel/vulkan/anv_meta_resolve.c
--- /dev/null
+++ b/src/intel/vulkan/anv_meta_resolve.c
@@@ -1,902 -1,0 +1,902 @@@
-       nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
+ +/*
+ + * Copyright © 2016 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include <assert.h>
+ +#include <stdbool.h>
+ +
+ +#include "anv_meta.h"
+ +#include "anv_private.h"
+ +#include "nir/nir_builder.h"
+ +
+ +/**
+ + * Vertex attributes used by all pipelines.
+ + */
+ +struct vertex_attrs {
+ +   struct anv_vue_header vue_header;
+ +   float position[2]; /**< 3DPRIM_RECTLIST */
+ +   float tex_position[2];
+ +};
+ +
+ +static void
+ +meta_resolve_save(struct anv_meta_saved_state *saved_state,
+ +                  struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_save(saved_state, cmd_buffer,
+ +                 (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ +                 (1 << VK_DYNAMIC_STATE_SCISSOR));
+ +
+ +   cmd_buffer->state.dynamic.viewport.count = 0;
+ +   cmd_buffer->state.dynamic.scissor.count = 0;
+ +}
+ +
+ +static void
+ +meta_resolve_restore(struct anv_meta_saved_state *saved_state,
+ +                     struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   anv_meta_restore(saved_state, cmd_buffer);
+ +}
+ +
+ +static VkPipeline *
+ +get_pipeline_h(struct anv_device *device, uint32_t samples)
+ +{
+ +   uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
+ +
+ +   assert(samples >= 2);
+ +   assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
+ +
+ +   return &device->meta_state.resolve.pipelines[i];
+ +}
+ +
+ +static nir_shader *
+ +build_nir_vs(void)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +
+ +   nir_builder b;
+ +   nir_variable *a_position;
+ +   nir_variable *v_position;
+ +   nir_variable *a_tex_position;
+ +   nir_variable *v_tex_position;
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ +   b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+ +
+ +   a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "a_position");
+ +   a_position->data.location = VERT_ATTRIB_GENERIC0;
+ +
+ +   v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                    "gl_Position");
+ +   v_position->data.location = VARYING_SLOT_POS;
+ +
+ +   a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "a_tex_position");
+ +   a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
+ +
+ +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                    "v_tex_position");
+ +   v_tex_position->data.location = VARYING_SLOT_VAR0;
+ +
+ +   nir_copy_var(&b, v_position, a_position);
+ +   nir_copy_var(&b, v_tex_position, a_tex_position);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static nir_shader *
+ +build_nir_fs(uint32_t num_samples)
+ +{
+ +   const struct glsl_type *vec4 = glsl_vec4_type();
+ +
+ +   const struct glsl_type *sampler2DMS =
+ +         glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+ +                           /*is_shadow*/ false,
+ +                           /*is_array*/ false,
+ +                           GLSL_TYPE_FLOAT);
+ +
+ +   nir_builder b;
+ +   nir_variable *u_tex; /* uniform sampler */
+ +   nir_variable *v_position; /* vec4, varying fragment position */
+ +   nir_variable *v_tex_position; /* vec4, varying texture coordinate */
+ +   nir_variable *f_color; /* vec4, fragment output color */
+ +   nir_ssa_def *accum; /* vec4, accumulation of sample values */
+ +
+ +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ +   b.shader->info.name = ralloc_asprintf(b.shader,
+ +                                         "meta_resolve_fs_samples%02d",
+ +                                         num_samples);
+ +
+ +   u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
+ +                                   "u_tex");
+ +   u_tex->data.descriptor_set = 0;
+ +   u_tex->data.binding = 0;
+ +
+ +   v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                     "v_position");
+ +   v_position->data.location = VARYING_SLOT_POS;
+ +   v_position->data.origin_upper_left = true;
+ +
+ +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ +                                    "v_tex_position");
+ +   v_tex_position->data.location = VARYING_SLOT_VAR0;
+ +
+ +   f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ +                                 "f_color");
+ +   f_color->data.location = FRAG_RESULT_DATA0;
+ +
+ +   accum = nir_imm_vec4(&b, 0, 0, 0, 0);
+ +
+ +   nir_ssa_def *tex_position_ivec =
+ +      nir_f2i(&b, nir_load_var(&b, v_tex_position));
+ +
+ +   for (uint32_t i = 0; i < num_samples; ++i) {
+ +      nir_tex_instr *tex;
+ +
+ +      tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
+ +      tex->texture = nir_deref_var_create(tex, u_tex);
+ +      tex->sampler = nir_deref_var_create(tex, u_tex);
+ +      tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ +      tex->op = nir_texop_txf_ms;
+ +      tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
+ +      tex->src[0].src_type = nir_tex_src_coord;
+ +      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ +      tex->src[1].src_type = nir_tex_src_ms_index;
+ +      tex->dest_type = nir_type_float;
+ +      tex->is_array = false;
+ +      tex->coord_components = 3;
++      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ +      nir_builder_instr_insert(&b, &tex->instr);
+ +
+ +      accum = nir_fadd(&b, accum, &tex->dest.ssa);
+ +   }
+ +
+ +   accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
+ +   nir_store_var(&b, f_color, accum, /*writemask*/ 4);
+ +
+ +   return b.shader;
+ +}
+ +
+ +static VkResult
+ +create_pass(struct anv_device *device)
+ +{
+ +   VkResult result;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   result = anv_CreateRenderPass(device_h,
+ +      &(VkRenderPassCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ +         .attachmentCount = 1,
+ +         .pAttachments = &(VkAttachmentDescription) {
+ +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ +            .samples = 1,
+ +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +         },
+ +         .subpassCount = 1,
+ +         .pSubpasses = &(VkSubpassDescription) {
+ +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +            .inputAttachmentCount = 0,
+ +            .colorAttachmentCount = 1,
+ +            .pColorAttachments = &(VkAttachmentReference) {
+ +               .attachment = 0,
+ +               .layout = VK_IMAGE_LAYOUT_GENERAL,
+ +            },
+ +            .pResolveAttachments = NULL,
+ +            .pDepthStencilAttachment = &(VkAttachmentReference) {
+ +               .attachment = VK_ATTACHMENT_UNUSED,
+ +            },
+ +            .preserveAttachmentCount = 0,
+ +            .pPreserveAttachments = NULL,
+ +         },
+ +         .dependencyCount = 0,
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.pass);
+ +
+ +   return result;
+ +}
+ +
+ +static VkResult
+ +create_pipeline(struct anv_device *device,
+ +                uint32_t num_samples,
+ +                VkShaderModule vs_module_h)
+ +{
+ +   VkResult result;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +
+ +   struct anv_shader_module fs_module = {
+ +      .nir = build_nir_fs(num_samples),
+ +   };
+ +
+ +   if (!fs_module.nir) {
+ +      /* XXX: Need more accurate error */
+ +      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ +      goto cleanup;
+ +   }
+ +
+ +   result = anv_graphics_pipeline_create(device_h,
+ +      VK_NULL_HANDLE,
+ +      &(VkGraphicsPipelineCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ +         .stageCount = 2,
+ +         .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ +               .module = vs_module_h,
+ +               .pName = "main",
+ +            },
+ +            {
+ +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ +               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +               .module = anv_shader_module_to_handle(&fs_module),
+ +               .pName = "main",
+ +            },
+ +         },
+ +         .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ +            .vertexBindingDescriptionCount = 1,
+ +            .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ +               {
+ +                  .binding = 0,
+ +                  .stride = sizeof(struct vertex_attrs),
+ +                  .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ +               },
+ +            },
+ +            .vertexAttributeDescriptionCount = 3,
+ +            .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ +               {
+ +                  /* VUE Header */
+ +                  .location = 0,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32B32A32_UINT,
+ +                  .offset = offsetof(struct vertex_attrs, vue_header),
+ +               },
+ +               {
+ +                  /* Position */
+ +                  .location = 1,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32_SFLOAT,
+ +                  .offset = offsetof(struct vertex_attrs, position),
+ +               },
+ +               {
+ +                  /* Texture Coordinate */
+ +                  .location = 2,
+ +                  .binding = 0,
+ +                  .format = VK_FORMAT_R32G32_SFLOAT,
+ +                  .offset = offsetof(struct vertex_attrs, tex_position),
+ +               },
+ +            },
+ +         },
+ +         .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ +            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ +            .primitiveRestartEnable = false,
+ +         },
+ +         .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ +            .viewportCount = 1,
+ +            .scissorCount = 1,
+ +         },
+ +         .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ +            .depthClampEnable = false,
+ +            .rasterizerDiscardEnable = false,
+ +            .polygonMode = VK_POLYGON_MODE_FILL,
+ +            .cullMode = VK_CULL_MODE_NONE,
+ +            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ +         },
+ +         .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ +            .rasterizationSamples = 1,
+ +            .sampleShadingEnable = false,
+ +            .pSampleMask = (VkSampleMask[]) { 0x1 },
+ +            .alphaToCoverageEnable = false,
+ +            .alphaToOneEnable = false,
+ +         },
+ +         .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ +            .logicOpEnable = false,
+ +            .attachmentCount = 1,
+ +            .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ +               {
+ +                  .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ +                                    VK_COLOR_COMPONENT_G_BIT |
+ +                                    VK_COLOR_COMPONENT_B_BIT |
+ +                                    VK_COLOR_COMPONENT_A_BIT,
+ +               },
+ +            },
+ +         },
+ +         .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ +            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ +            .dynamicStateCount = 2,
+ +            .pDynamicStates = (VkDynamicState[]) {
+ +               VK_DYNAMIC_STATE_VIEWPORT,
+ +               VK_DYNAMIC_STATE_SCISSOR,
+ +            },
+ +         },
+ +         .layout = device->meta_state.resolve.pipeline_layout,
+ +         .renderPass = device->meta_state.resolve.pass,
+ +         .subpass = 0,
+ +      },
+ +      &(struct anv_graphics_pipeline_create_info) {
+ +         .color_attachment_count = -1,
+ +         .use_repclear = false,
+ +         .disable_viewport = true,
+ +         .disable_scissor = true,
+ +         .disable_vs = true,
+ +         .use_rectlist = true
+ +      },
+ +      &device->meta_state.alloc,
+ +      get_pipeline_h(device, num_samples));
+ +   if (result != VK_SUCCESS)
+ +      goto cleanup;
+ +
+ +   goto cleanup;
+ +
+ +cleanup:
+ +   ralloc_free(fs_module.nir);
+ +   return result;
+ +}
+ +
+ +void
+ +anv_device_finish_meta_resolve_state(struct anv_device *device)
+ +{
+ +   struct anv_meta_state *state = &device->meta_state;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   VkRenderPass pass_h = device->meta_state.resolve.pass;
+ +   VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
+ +   VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   if (pass_h)
+ +      ANV_CALL(DestroyRenderPass)(device_h, pass_h,
+ +                                  &device->meta_state.alloc);
+ +
+ +   if (pipeline_layout_h)
+ +      ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
+ +
+ +   if (ds_layout_h)
+ +      ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
+ +
+ +   for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
+ +      VkPipeline pipeline_h = state->resolve.pipelines[i];
+ +
+ +      if (pipeline_h) {
+ +         ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
+ +      }
+ +   }
+ +}
+ +
+ +VkResult
+ +anv_device_init_meta_resolve_state(struct anv_device *device)
+ +{
+ +   VkResult res = VK_SUCCESS;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ +
+ +   const isl_sample_count_mask_t sample_count_mask =
+ +      isl_device_get_sample_counts(&device->isl_dev);
+ +
+ +   zero(device->meta_state.resolve);
+ +
+ +   struct anv_shader_module vs_module = { .nir = build_nir_vs() };
+ +   if (!vs_module.nir) {
+ +      /* XXX: Need more accurate error */
+ +      res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ +      goto fail;
+ +   }
+ +
+ +   VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
+ +
+ +   res = anv_CreateDescriptorSetLayout(device_h,
+ +      &(VkDescriptorSetLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ +         .bindingCount = 1,
+ +         .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ +            {
+ +               .binding = 0,
+ +               .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +               .descriptorCount = 1,
+ +               .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ +            },
+ +         },
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.ds_layout);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   res = anv_CreatePipelineLayout(device_h,
+ +      &(VkPipelineLayoutCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ +         .setLayoutCount = 1,
+ +         .pSetLayouts = (VkDescriptorSetLayout[]) {
+ +            device->meta_state.resolve.ds_layout,
+ +         },
+ +      },
+ +      alloc,
+ +      &device->meta_state.resolve.pipeline_layout);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   res = create_pass(device);
+ +   if (res != VK_SUCCESS)
+ +      goto fail;
+ +
+ +   for (uint32_t i = 0;
+ +        i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
+ +
+ +      uint32_t sample_count = 1 << (1 + i);
+ +      if (!(sample_count_mask & sample_count))
+ +         continue;
+ +
+ +      res = create_pipeline(device, sample_count, vs_module_h);
+ +      if (res != VK_SUCCESS)
+ +         goto fail;
+ +   }
+ +
+ +   goto cleanup;
+ +
+ +fail:
+ +   anv_device_finish_meta_resolve_state(device);
+ +
+ +cleanup:
+ +   ralloc_free(vs_module.nir);
+ +
+ +   return res;
+ +}
+ +
+ +static void
+ +emit_resolve(struct anv_cmd_buffer *cmd_buffer,
+ +             struct anv_image_view *src_iview,
+ +             const VkOffset2D *src_offset,
+ +             struct anv_image_view *dest_iview,
+ +             const VkOffset2D *dest_offset,
+ +             const VkExtent2D *resolve_extent)
+ +{
+ +   struct anv_device *device = cmd_buffer->device;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +   VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   const struct anv_image *src_image = src_iview->image;
+ +
+ +   const struct vertex_attrs vertex_data[3] = {
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x + resolve_extent->width,
+ +            dest_offset->y + resolve_extent->height,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x + resolve_extent->width,
+ +            src_offset->y + resolve_extent->height,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x,
+ +            dest_offset->y + resolve_extent->height,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x,
+ +            src_offset->y + resolve_extent->height,
+ +         },
+ +      },
+ +      {
+ +         .vue_header = {0},
+ +         .position = {
+ +            dest_offset->x,
+ +            dest_offset->y,
+ +         },
+ +         .tex_position = {
+ +            src_offset->x,
+ +            src_offset->y,
+ +         },
+ +      },
+ +   };
+ +
+ +   struct anv_state vertex_mem =
+ +      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
+ +                                  sizeof(vertex_data), 16);
+ +
+ +   struct anv_buffer vertex_buffer = {
+ +      .device = device,
+ +      .size = sizeof(vertex_data),
+ +      .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
+ +      .offset = vertex_mem.offset,
+ +   };
+ +
+ +   VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
+ +
+ +   anv_CmdBindVertexBuffers(cmd_buffer_h,
+ +      /*firstBinding*/ 0,
+ +      /*bindingCount*/ 1,
+ +      (VkBuffer[]) { vertex_buffer_h },
+ +      (VkDeviceSize[]) { 0 });
+ +
+ +   VkSampler sampler_h;
+ +   ANV_CALL(CreateSampler)(device_h,
+ +      &(VkSamplerCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ +         .magFilter = VK_FILTER_NEAREST,
+ +         .minFilter = VK_FILTER_NEAREST,
+ +         .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ +         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ +         .mipLodBias = 0.0,
+ +         .anisotropyEnable = false,
+ +         .compareEnable = false,
+ +         .minLod = 0.0,
+ +         .maxLod = 0.0,
+ +         .unnormalizedCoordinates = false,
+ +      },
+ +      &cmd_buffer->pool->alloc,
+ +      &sampler_h);
+ +
+ +   VkDescriptorPool desc_pool;
+ +   anv_CreateDescriptorPool(anv_device_to_handle(device),
+ +      &(const VkDescriptorPoolCreateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ +         .pNext = NULL,
+ +         .flags = 0,
+ +         .maxSets = 1,
+ +         .poolSizeCount = 1,
+ +         .pPoolSizes = (VkDescriptorPoolSize[]) {
+ +            {
+ +               .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +               .descriptorCount = 1
+ +            },
+ +         }
+ +      }, &cmd_buffer->pool->alloc, &desc_pool);
+ +
+ +   VkDescriptorSet desc_set_h;
+ +   anv_AllocateDescriptorSets(device_h,
+ +      &(VkDescriptorSetAllocateInfo) {
+ +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ +         .descriptorPool = desc_pool,
+ +         .descriptorSetCount = 1,
+ +         .pSetLayouts = (VkDescriptorSetLayout[]) {
+ +            device->meta_state.resolve.ds_layout,
+ +         },
+ +      },
+ +      &desc_set_h);
+ +
+ +   anv_UpdateDescriptorSets(device_h,
+ +      /*writeCount*/ 1,
+ +      (VkWriteDescriptorSet[]) {
+ +         {
+ +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ +            .dstSet = desc_set_h,
+ +            .dstBinding = 0,
+ +            .dstArrayElement = 0,
+ +            .descriptorCount = 1,
+ +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ +            .pImageInfo = (VkDescriptorImageInfo[]) {
+ +               {
+ +                  .sampler = sampler_h,
+ +                  .imageView = anv_image_view_to_handle(src_iview),
+ +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ +               },
+ +            },
+ +         },
+ +      },
+ +      /*copyCount*/ 0,
+ +      /*copies */ NULL);
+ +
+ +   ANV_CALL(CmdSetViewport)(cmd_buffer_h,
+ +      /*firstViewport*/ 0,
+ +      /*viewportCount*/ 1,
+ +      (VkViewport[]) {
+ +         {
+ +            .x = 0,
+ +            .y = 0,
+ +            .width = fb->width,
+ +            .height = fb->height,
+ +            .minDepth = 0.0,
+ +            .maxDepth = 1.0,
+ +         },
+ +      });
+ +
+ +   ANV_CALL(CmdSetScissor)(cmd_buffer_h,
+ +      /*firstScissor*/ 0,
+ +      /*scissorCount*/ 1,
+ +      (VkRect2D[]) {
+ +         {
+ +            .offset = { 0, 0 },
+ +            .extent = (VkExtent2D) { fb->width, fb->height },
+ +         },
+ +      });
+ +
+ +   VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
+ +   ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
+ +
+ +   if (cmd_buffer->state.pipeline != pipeline) {
+ +      anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +                          pipeline_h);
+ +   }
+ +
+ +   anv_CmdBindDescriptorSets(cmd_buffer_h,
+ +      VK_PIPELINE_BIND_POINT_GRAPHICS,
+ +      device->meta_state.resolve.pipeline_layout,
+ +      /*firstSet*/ 0,
+ +      /* setCount */ 1,
+ +      (VkDescriptorSet[]) {
+ +         desc_set_h,
+ +      },
+ +      /*copyCount*/ 0,
+ +      /*copies */ NULL);
+ +
+ +   ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+ +
+ +   /* All objects below are consumed by the draw call. We may safely destroy
+ +    * them.
+ +    */
+ +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ +                             desc_pool, &cmd_buffer->pool->alloc);
+ +   anv_DestroySampler(device_h, sampler_h,
+ +                      &cmd_buffer->pool->alloc);
+ +}
+ +
+ +void anv_CmdResolveImage(
+ +    VkCommandBuffer                             cmd_buffer_h,
+ +    VkImage                                     src_image_h,
+ +    VkImageLayout                               src_image_layout,
+ +    VkImage                                     dest_image_h,
+ +    VkImageLayout                               dest_image_layout,
+ +    uint32_t                                    region_count,
+ +    const VkImageResolve*                       regions)
+ +{
+ +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
+ +   ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
+ +   ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
+ +   struct anv_device *device = cmd_buffer->device;
+ +   struct anv_meta_saved_state state;
+ +   VkDevice device_h = anv_device_to_handle(device);
+ +
+ +   meta_resolve_save(&state, cmd_buffer);
+ +
+ +   assert(src_image->samples > 1);
+ +   assert(dest_image->samples == 1);
+ +
+ +   if (src_image->samples >= 16) {
+ +      /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
+ +       * glBlitFramebuffer workaround for samples >= 16.
+ +       */
+ +      anv_finishme("vkCmdResolveImage: need interpolation workaround when "
+ +                   "samples >= 16");
+ +   }
+ +
+ +   if (src_image->array_size > 1)
+ +      anv_finishme("vkCmdResolveImage: multisample array images");
+ +
+ +   for (uint32_t r = 0; r < region_count; ++r) {
+ +      const VkImageResolve *region = &regions[r];
+ +
+ +      /* From the Vulkan 1.0 spec:
+ +       *
+ +       *    - The aspectMask member of srcSubresource and dstSubresource must
+ +       *      only contain VK_IMAGE_ASPECT_COLOR_BIT
+ +       *
+ +       *    - The layerCount member of srcSubresource and dstSubresource must
+ +       *      match
+ +       */
+ +      assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ +      assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ +      assert(region->srcSubresource.layerCount ==
+ +             region->dstSubresource.layerCount);
+ +
+ +      const uint32_t src_base_layer =
+ +         anv_meta_get_iview_layer(src_image, &region->srcSubresource,
+ +                                  &region->srcOffset);
+ +
+ +      const uint32_t dest_base_layer =
+ +         anv_meta_get_iview_layer(dest_image, &region->dstSubresource,
+ +                                  &region->dstOffset);
+ +
+ +      /**
+ +       * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
+ +       *
+ +       *    extent is the size in texels of the source image to resolve in width,
+ +       *    height and depth. 1D images use only x and width. 2D images use x, y,
+ +       *    width and height. 3D images use x, y, z, width, height and depth.
+ +       *
+ +       *    srcOffset and dstOffset select the initial x, y, and z offsets in
+ +       *    texels of the sub-regions of the source and destination image data.
+ +       *    extent is the size in texels of the source image to resolve in width,
+ +       *    height and depth. 1D images use only x and width. 2D images use x, y,
+ +       *    width and height. 3D images use x, y, z, width, height and depth.
+ +       */
+ +      const struct VkExtent3D extent =
+ +         anv_sanitize_image_extent(src_image->type, region->extent);
+ +      const struct VkOffset3D srcOffset =
+ +         anv_sanitize_image_offset(src_image->type, region->srcOffset);
+ +      const struct VkOffset3D dstOffset =
+ +         anv_sanitize_image_offset(dest_image->type, region->dstOffset);
+ +
+ +
+ +      for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
+ +           ++layer) {
+ +
+ +         struct anv_image_view src_iview;
+ +         anv_image_view_init(&src_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = src_image_h,
+ +               .viewType = anv_meta_get_view_type(src_image),
+ +               .format = src_image->format->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = region->srcSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = src_base_layer + layer,
+ +                  .layerCount = 1,
+ +               },
+ +            },
+ +            cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
+ +
+ +         struct anv_image_view dest_iview;
+ +         anv_image_view_init(&dest_iview, cmd_buffer->device,
+ +            &(VkImageViewCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ +               .image = dest_image_h,
+ +               .viewType = anv_meta_get_view_type(dest_image),
+ +               .format = dest_image->format->vk_format,
+ +               .subresourceRange = {
+ +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ +                  .baseMipLevel = region->dstSubresource.mipLevel,
+ +                  .levelCount = 1,
+ +                  .baseArrayLayer = dest_base_layer + layer,
+ +                  .layerCount = 1,
+ +               },
+ +            },
+ +            cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+ +
+ +         VkFramebuffer fb_h;
+ +         anv_CreateFramebuffer(device_h,
+ +            &(VkFramebufferCreateInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ +               .attachmentCount = 1,
+ +               .pAttachments = (VkImageView[]) {
+ +                  anv_image_view_to_handle(&dest_iview),
+ +               },
+ +               .width = anv_minify(dest_image->extent.width,
+ +                                   region->dstSubresource.mipLevel),
+ +               .height = anv_minify(dest_image->extent.height,
+ +                                    region->dstSubresource.mipLevel),
+ +               .layers = 1
+ +            },
+ +            &cmd_buffer->pool->alloc,
+ +            &fb_h);
+ +
+ +         ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
+ +            &(VkRenderPassBeginInfo) {
+ +               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ +               .renderPass = device->meta_state.resolve.pass,
+ +               .framebuffer = fb_h,
+ +               .renderArea = {
+ +                  .offset = {
+ +                     dstOffset.x,
+ +                     dstOffset.y,
+ +                  },
+ +                  .extent = {
+ +                     extent.width,
+ +                     extent.height,
+ +                  }
+ +               },
+ +               .clearValueCount = 0,
+ +               .pClearValues = NULL,
+ +            },
+ +            VK_SUBPASS_CONTENTS_INLINE);
+ +
+ +         emit_resolve(cmd_buffer,
+ +             &src_iview,
+ +             &(VkOffset2D) {
+ +               .x = srcOffset.x,
+ +               .y = srcOffset.y,
+ +             },
+ +             &dest_iview,
+ +             &(VkOffset2D) {
+ +               .x = dstOffset.x,
+ +               .y = dstOffset.y,
+ +             },
+ +             &(VkExtent2D) {
+ +               .width = extent.width,
+ +               .height = extent.height,
+ +             });
+ +
+ +         ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
+ +
+ +         anv_DestroyFramebuffer(device_h, fb_h,
+ +                                &cmd_buffer->pool->alloc);
+ +      }
+ +   }
+ +
+ +   meta_resolve_restore(&state, cmd_buffer);
+ +}
+ +
+ +/**
+ + * Emit any needed resolves for the current subpass.
+ + */
+ +void
+ +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
+ +{
+ +   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ +   struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ +   struct anv_meta_saved_state saved_state;
+ +
+ +   /* FINISHME(perf): Skip clears for resolve attachments.
+ +    *
+ +    * From the Vulkan 1.0 spec:
+ +    *
+ +    *    If the first use of an attachment in a render pass is as a resolve
+ +    *    attachment, then the loadOp is effectively ignored as the resolve is
+ +    *    guaranteed to overwrite all pixels in the render area.
+ +    */
+ +
+ +   if (!subpass->has_resolve)
+ +      return;
+ +
+ +   meta_resolve_save(&saved_state, cmd_buffer);
+ +
+ +   for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ +      uint32_t src_att = subpass->color_attachments[i];
+ +      uint32_t dest_att = subpass->resolve_attachments[i];
+ +
+ +      if (dest_att == VK_ATTACHMENT_UNUSED)
+ +         continue;
+ +
+ +      struct anv_image_view *src_iview = fb->attachments[src_att];
+ +      struct anv_image_view *dest_iview = fb->attachments[dest_att];
+ +
+ +      struct anv_subpass resolve_subpass = {
+ +         .color_count = 1,
+ +         .color_attachments = (uint32_t[]) { dest_att },
+ +         .depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
+ +      };
+ +
+ +      anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+ +
+ +      /* Subpass resolves must respect the render area. We can ignore the
+ +       * render area here because vkCmdBeginRenderPass set the render area
+ +       * with 3DSTATE_DRAWING_RECTANGLE.
+ +       *
+ +       * XXX(chadv): Does the hardware really respect
+ +       * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+ +       */
+ +      emit_resolve(cmd_buffer,
+ +          src_iview,
+ +          &(VkOffset2D) { 0, 0 },
+ +          dest_iview,
+ +          &(VkOffset2D) { 0, 0 },
+ +          &(VkExtent2D) { fb->width, fb->height });
+ +   }
+ +
+ +   cmd_buffer->state.subpass = subpass;
+ +   meta_resolve_restore(&saved_state, cmd_buffer);
+ +}
diff --cc src/intel/vulkan/anv_nir_apply_dynamic_offsets.c

index 46bc5d2,0000000..234855c

mode 100644,000000..100644
--- 1/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
--- /dev/null
+++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
@@@ -1,171 -1,0 +1,172 @@@
-       nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_nir.h"
+ +#include "nir/nir_builder.h"
+ +
+ +struct apply_dynamic_offsets_state {
+ +   nir_shader *shader;
+ +   nir_builder builder;
+ +
+ +   const struct anv_pipeline_layout *layout;
+ +
+ +   uint32_t indices_start;
+ +};
+ +
+ +static bool
+ +apply_dynamic_offsets_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_dynamic_offsets_state *state = void_state;
+ +   struct anv_descriptor_set_layout *set_layout;
+ +
+ +   nir_builder *b = &state->builder;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      if (instr->type != nir_instr_type_intrinsic)
+ +         continue;
+ +
+ +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +
+ +      unsigned block_idx_src;
+ +      switch (intrin->intrinsic) {
+ +      case nir_intrinsic_load_ubo:
+ +      case nir_intrinsic_load_ssbo:
+ +         block_idx_src = 0;
+ +         break;
+ +      case nir_intrinsic_store_ssbo:
+ +         block_idx_src = 1;
+ +         break;
+ +      default:
+ +         continue; /* the loop */
+ +      }
+ +
+ +      nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
+ +      assert(res_instr->type == nir_instr_type_intrinsic);
+ +      nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
+ +      assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+ +
+ +      unsigned set = res_intrin->const_index[0];
+ +      unsigned binding = res_intrin->const_index[1];
+ +
+ +      set_layout = state->layout->set[set].layout;
+ +      if (set_layout->binding[binding].dynamic_offset_index < 0)
+ +         continue;
+ +
+ +      b->cursor = nir_before_instr(&intrin->instr);
+ +
+ +      /* First, we need to generate the uniform load for the buffer offset */
+ +      uint32_t index = state->layout->set[set].dynamic_offset_start +
+ +                       set_layout->binding[binding].dynamic_offset_index;
+ +
+ +      nir_intrinsic_instr *offset_load =
+ +         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ +      offset_load->num_components = 2;
+ +      offset_load->const_index[0] = state->indices_start + index * 8;
+ +      offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
+ +                                                     nir_imm_int(b, 8)));
+ +
-                            intrin->num_components, NULL);
++      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
+ +      nir_builder_instr_insert(b, &offset_load->instr);
+ +
+ +      nir_src *offset_src = nir_get_io_offset_src(intrin);
+ +      nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
+ +                                         &offset_load->dest.ssa);
+ +
+ +      /* In order to avoid out-of-bounds access, we predicate */
+ +      nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
+ +                                  offset_src->ssa);
+ +      nir_if *if_stmt = nir_if_create(b->shader);
+ +      if_stmt->condition = nir_src_for_ssa(pred);
+ +      nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+ +
+ +      nir_instr_remove(&intrin->instr);
+ +      *offset_src = nir_src_for_ssa(new_offset);
+ +      nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
+ +
+ +      if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
+ +         /* It's a load, we need a phi node */
+ +         nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ +         nir_ssa_dest_init(&phi->instr, &phi->dest,
-             (nir_const_value) { .u = { 0, 0, 0, 0 } });
++                           intrin->num_components,
++                           intrin->dest.ssa.bit_size, NULL);
+ +
+ +         nir_phi_src *src1 = ralloc(phi, nir_phi_src);
+ +         struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
+ +         src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
+ +         src1->src = nir_src_for_ssa(&intrin->dest.ssa);
+ +         exec_list_push_tail(&phi->srcs, &src1->node);
+ +
+ +         b->cursor = nir_after_cf_list(&if_stmt->else_list);
+ +         nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
++            (nir_const_value) { .u32 = { 0, 0, 0, 0 } });
+ +
+ +         nir_phi_src *src2 = ralloc(phi, nir_phi_src);
+ +         struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
+ +         src2->pred = exec_node_data(nir_block, enode, cf_node.node);
+ +         src2->src = nir_src_for_ssa(zero);
+ +         exec_list_push_tail(&phi->srcs, &src2->node);
+ +
+ +         assert(intrin->dest.is_ssa);
+ +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ +                                  nir_src_for_ssa(&phi->dest.ssa));
+ +
+ +         nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
+ +      }
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +void
+ +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ +                              nir_shader *shader,
+ +                              struct brw_stage_prog_data *prog_data)
+ +{
+ +   struct apply_dynamic_offsets_state state = {
+ +      .shader = shader,
+ +      .layout = pipeline->layout,
+ +      .indices_start = shader->num_uniforms,
+ +   };
+ +
+ +   if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
+ +      return;
+ +
+ +   nir_foreach_function(shader, function) {
+ +      if (function->impl) {
+ +         nir_builder_init(&state.builder, function->impl);
+ +         nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
+ +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ +                                               nir_metadata_dominance);
+ +      }
+ +   }
+ +
+ +   struct anv_push_constants *null_data = NULL;
+ +   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
+ +      prog_data->param[i * 2 + shader->num_uniforms / 4] =
+ +         (const union gl_constant_value *)&null_data->dynamic[i].offset;
+ +      prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] =
+ +         (const union gl_constant_value *)&null_data->dynamic[i].range;
+ +   }
+ +
+ +   shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
+ +}
diff --cc src/intel/vulkan/anv_nir_apply_pipeline_layout.c

index eeb9b97,0000000..ef81afa

mode 100644,000000..100644
--- 1/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
--- /dev/null
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@@ -1,387 -1,0 +1,387 @@@
-       block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_nir.h"
+ +#include "program/prog_parameter.h"
+ +#include "nir/nir_builder.h"
+ +
+ +struct apply_pipeline_layout_state {
+ +   nir_shader *shader;
+ +   nir_builder builder;
+ +
+ +   struct {
+ +      BITSET_WORD *used;
+ +      uint8_t *surface_offsets;
+ +      uint8_t *sampler_offsets;
+ +      uint8_t *image_offsets;
+ +   } set[MAX_SETS];
+ +};
+ +
+ +static void
+ +add_binding(struct apply_pipeline_layout_state *state,
+ +            uint32_t set, uint32_t binding)
+ +{
+ +   BITSET_SET(state->set[set].used, binding);
+ +}
+ +
+ +static void
+ +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
+ +{
+ +   add_binding(state, var->data.descriptor_set, var->data.binding);
+ +}
+ +
+ +static bool
+ +get_used_bindings_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_pipeline_layout_state *state = void_state;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      switch (instr->type) {
+ +      case nir_instr_type_intrinsic: {
+ +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +         switch (intrin->intrinsic) {
+ +         case nir_intrinsic_vulkan_resource_index:
+ +            add_binding(state, nir_intrinsic_desc_set(intrin),
+ +                        nir_intrinsic_binding(intrin));
+ +            break;
+ +
+ +         case nir_intrinsic_image_load:
+ +         case nir_intrinsic_image_store:
+ +         case nir_intrinsic_image_atomic_add:
+ +         case nir_intrinsic_image_atomic_min:
+ +         case nir_intrinsic_image_atomic_max:
+ +         case nir_intrinsic_image_atomic_and:
+ +         case nir_intrinsic_image_atomic_or:
+ +         case nir_intrinsic_image_atomic_xor:
+ +         case nir_intrinsic_image_atomic_exchange:
+ +         case nir_intrinsic_image_atomic_comp_swap:
+ +         case nir_intrinsic_image_size:
+ +         case nir_intrinsic_image_samples:
+ +            add_var_binding(state, intrin->variables[0]->var);
+ +            break;
+ +
+ +         default:
+ +            break;
+ +         }
+ +         break;
+ +      }
+ +      case nir_instr_type_tex: {
+ +         nir_tex_instr *tex = nir_instr_as_tex(instr);
+ +         assert(tex->texture);
+ +         add_var_binding(state, tex->texture->var);
+ +         if (tex->sampler)
+ +            add_var_binding(state, tex->sampler->var);
+ +         break;
+ +      }
+ +      default:
+ +         continue;
+ +      }
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
+ +                          struct apply_pipeline_layout_state *state)
+ +{
+ +   nir_builder *b = &state->builder;
+ +
+ +   b->cursor = nir_before_instr(&intrin->instr);
+ +
+ +   uint32_t set = nir_intrinsic_desc_set(intrin);
+ +   uint32_t binding = nir_intrinsic_binding(intrin);
+ +
+ +   uint32_t surface_index = state->set[set].surface_offsets[binding];
+ +
+ +   nir_const_value *const_block_idx =
+ +      nir_src_as_const_value(intrin->src[0]);
+ +
+ +   nir_ssa_def *block_index;
+ +   if (const_block_idx) {
++      block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
+ +   } else {
+ +      block_index = nir_iadd(b, nir_imm_int(b, surface_index),
+ +                             nir_ssa_for_src(b, intrin->src[0], 1));
+ +   }
+ +
+ +   assert(intrin->dest.is_ssa);
+ +   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
+ +   nir_instr_remove(&intrin->instr);
+ +}
+ +
+ +static void
+ +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
+ +                unsigned *const_index, nir_tex_src_type src_type,
+ +                struct apply_pipeline_layout_state *state)
+ +{
+ +   if (deref->deref.child) {
+ +      assert(deref->deref.child->deref_type == nir_deref_type_array);
+ +      nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+ +
+ +      *const_index += deref_array->base_offset;
+ +
+ +      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ +         nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
+ +                                               tex->num_srcs + 1);
+ +
+ +         for (unsigned i = 0; i < tex->num_srcs; i++) {
+ +            new_srcs[i].src_type = tex->src[i].src_type;
+ +            nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
+ +         }
+ +
+ +         ralloc_free(tex->src);
+ +         tex->src = new_srcs;
+ +
+ +         /* Now we can go ahead and move the source over to being a
+ +          * first-class texture source.
+ +          */
+ +         tex->src[tex->num_srcs].src_type = src_type;
+ +         tex->num_srcs++;
+ +         assert(deref_array->indirect.is_ssa);
+ +         nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
+ +                               deref_array->indirect);
+ +      }
+ +   }
+ +}
+ +
+ +static void
+ +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
+ +{
+ +   if (deref->deref.child == NULL)
+ +      return;
+ +
+ +   nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+ +
+ +   if (deref_array->deref_array_type != nir_deref_array_type_indirect)
+ +      return;
+ +
+ +   nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
+ +}
+ +
+ +static void
+ +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+ +{
+ +   /* No one should have come by and lowered it already */
+ +   assert(tex->texture);
+ +
+ +   unsigned set = tex->texture->var->data.descriptor_set;
+ +   unsigned binding = tex->texture->var->data.binding;
+ +   tex->texture_index = state->set[set].surface_offsets[binding];
+ +   lower_tex_deref(tex, tex->texture, &tex->texture_index,
+ +                   nir_tex_src_texture_offset, state);
+ +
+ +   if (tex->sampler) {
+ +      unsigned set = tex->sampler->var->data.descriptor_set;
+ +      unsigned binding = tex->sampler->var->data.binding;
+ +      tex->sampler_index = state->set[set].sampler_offsets[binding];
+ +      lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
+ +                      nir_tex_src_sampler_offset, state);
+ +   }
+ +
+ +   /* The backend only ever uses this to mark used surfaces.  We don't care
+ +    * about that little optimization so it just needs to be non-zero.
+ +    */
+ +   tex->texture_array_size = 1;
+ +
+ +   cleanup_tex_deref(tex, tex->texture);
+ +   if (tex->sampler)
+ +      cleanup_tex_deref(tex, tex->sampler);
+ +   tex->texture = NULL;
+ +   tex->sampler = NULL;
+ +}
+ +
+ +static bool
+ +apply_pipeline_layout_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_pipeline_layout_state *state = void_state;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      switch (instr->type) {
+ +      case nir_instr_type_intrinsic: {
+ +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +         if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
+ +            lower_res_index_intrinsic(intrin, state);
+ +         }
+ +         break;
+ +      }
+ +      case nir_instr_type_tex:
+ +         lower_tex(nir_instr_as_tex(instr), state);
+ +         break;
+ +      default:
+ +         continue;
+ +      }
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +setup_vec4_uniform_value(const union gl_constant_value **params,
+ +                         const union gl_constant_value *values,
+ +                         unsigned n)
+ +{
+ +   static const gl_constant_value zero = { 0 };
+ +
+ +   for (unsigned i = 0; i < n; ++i)
+ +      params[i] = &values[i];
+ +
+ +   for (unsigned i = n; i < 4; ++i)
+ +      params[i] = &zero;
+ +}
+ +
+ +void
+ +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
+ +                              nir_shader *shader,
+ +                              struct brw_stage_prog_data *prog_data,
+ +                              struct anv_pipeline_bind_map *map)
+ +{
+ +   struct anv_pipeline_layout *layout = pipeline->layout;
+ +
+ +   struct apply_pipeline_layout_state state = {
+ +      .shader = shader,
+ +   };
+ +
+ +   void *mem_ctx = ralloc_context(NULL);
+ +
+ +   for (unsigned s = 0; s < layout->num_sets; s++) {
+ +      const unsigned count = layout->set[s].layout->binding_count;
+ +      const unsigned words = BITSET_WORDS(count);
+ +      state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
+ +      state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ +      state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ +      state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ +   }
+ +
+ +   nir_foreach_function(shader, function) {
+ +      if (function->impl)
+ +         nir_foreach_block(function->impl, get_used_bindings_block, &state);
+ +   }
+ +
+ +   for (uint32_t set = 0; set < layout->num_sets; set++) {
+ +      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+ +
+ +      BITSET_WORD b, _tmp;
+ +      BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
+ +                         set_layout->binding_count) {
+ +         if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
+ +            map->surface_count += set_layout->binding[b].array_size;
+ +         if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
+ +            map->sampler_count += set_layout->binding[b].array_size;
+ +         if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
+ +            map->image_count += set_layout->binding[b].array_size;
+ +      }
+ +   }
+ +
+ +   unsigned surface = 0;
+ +   unsigned sampler = 0;
+ +   unsigned image = 0;
+ +   for (uint32_t set = 0; set < layout->num_sets; set++) {
+ +      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+ +
+ +      BITSET_WORD b, _tmp;
+ +      BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
+ +                         set_layout->binding_count) {
+ +         unsigned array_size = set_layout->binding[b].array_size;
+ +         unsigned set_offset = set_layout->binding[b].descriptor_index;
+ +
+ +         if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
+ +            state.set[set].surface_offsets[b] = surface;
+ +            for (unsigned i = 0; i < array_size; i++) {
+ +               map->surface_to_descriptor[surface + i].set = set;
+ +               map->surface_to_descriptor[surface + i].offset = set_offset + i;
+ +            }
+ +            surface += array_size;
+ +         }
+ +
+ +         if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
+ +            state.set[set].sampler_offsets[b] = sampler;
+ +            for (unsigned i = 0; i < array_size; i++) {
+ +               map->sampler_to_descriptor[sampler + i].set = set;
+ +               map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
+ +            }
+ +            sampler += array_size;
+ +         }
+ +
+ +         if (set_layout->binding[b].stage[shader->stage].image_index >= 0) {
+ +            state.set[set].image_offsets[b] = image;
+ +            image += array_size;
+ +         }
+ +      }
+ +   }
+ +
+ +   nir_foreach_function(shader, function) {
+ +      if (function->impl) {
+ +         nir_builder_init(&state.builder, function->impl);
+ +         nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
+ +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ +                                               nir_metadata_dominance);
+ +      }
+ +   }
+ +
+ +   if (map->image_count > 0) {
+ +      assert(map->image_count <= MAX_IMAGES);
+ +      nir_foreach_variable(var, &shader->uniforms) {
+ +         if (glsl_type_is_image(var->type) ||
+ +             (glsl_type_is_array(var->type) &&
+ +              glsl_type_is_image(glsl_get_array_element(var->type)))) {
+ +            /* Images are represented as uniform push constants and the actual
+ +             * information required for reading/writing to/from the image is
+ +             * storred in the uniform.
+ +             */
+ +            unsigned set = var->data.descriptor_set;
+ +            unsigned binding = var->data.binding;
+ +            unsigned image_index = state.set[set].image_offsets[binding];
+ +
+ +            var->data.driver_location = shader->num_uniforms +
+ +                                        image_index * BRW_IMAGE_PARAM_SIZE * 4;
+ +         }
+ +      }
+ +
+ +      struct anv_push_constants *null_data = NULL;
+ +      const gl_constant_value **param =
+ +         prog_data->param + (shader->num_uniforms / 4);
+ +      const struct brw_image_param *image_param = null_data->images;
+ +      for (uint32_t i = 0; i < map->image_count; i++) {
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ +            (const union gl_constant_value *)&image_param->surface_idx, 1);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ +            (const union gl_constant_value *)image_param->offset, 2);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ +            (const union gl_constant_value *)image_param->size, 3);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ +            (const union gl_constant_value *)image_param->stride, 4);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ +            (const union gl_constant_value *)image_param->tiling, 3);
+ +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ +            (const union gl_constant_value *)image_param->swizzling, 2);
+ +
+ +         param += BRW_IMAGE_PARAM_SIZE;
+ +         image_param ++;
+ +      }
+ +
+ +      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+ +   }
+ +
+ +   ralloc_free(mem_ctx);
+ +}
diff --cc src/mesa/drivers/dri/i965/brw_compiler.c
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 29ef609,4de5599..aa4c745
--- 1/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
--- 2/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@@ -2411,82 -2328,6 +2411,82 @@@ fs_visitor::nir_emit_cs_intrinsic(cons
         nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
         break;
   
-          offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+ +   case nir_intrinsic_load_shared: {
+ +      assert(devinfo->gen >= 7);
+ +
+ +      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+ +
+ +      /* Get the offset to read from */
+ +      fs_reg offset_reg;
+ +      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ +      if (const_offset) {
-             offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
++         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ +      } else {
+ +         offset_reg = vgrf(glsl_type::uint_type);
+ +         bld.ADD(offset_reg,
+ +                 retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ +                 brw_imm_ud(instr->const_index[0]));
+ +      }
+ +
+ +      /* Read the vector */
+ +      fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ +                                             1 /* dims */,
+ +                                             instr->num_components,
+ +                                             BRW_PREDICATE_NONE);
+ +      read_result.type = dest.type;
+ +      for (int i = 0; i < instr->num_components; i++)
+ +         bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
+ +
+ +      break;
+ +   }
+ +
+ +   case nir_intrinsic_store_shared: {
+ +      assert(devinfo->gen >= 7);
+ +
+ +      /* Block index */
+ +      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+ +
+ +      /* Value */
+ +      fs_reg val_reg = get_nir_src(instr->src[0]);
+ +
+ +      /* Writemask */
+ +      unsigned writemask = instr->const_index[1];
+ +
+ +      /* Combine groups of consecutive enabled channels in one write
+ +       * message. We use ffs to find the first enabled channel and then ffs on
+ +       * the bit-inverse, down-shifted writemask to determine the length of
+ +       * the block of enabled bits.
+ +       */
+ +      while (writemask) {
+ +         unsigned first_component = ffs(writemask) - 1;
+ +         unsigned length = ffs(~(writemask >> first_component)) - 1;
+ +         fs_reg offset_reg;
+ +
+ +         nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ +         if (const_offset) {
++            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
+ +                                    4 * first_component);
+ +         } else {
+ +            offset_reg = vgrf(glsl_type::uint_type);
+ +            bld.ADD(offset_reg,
+ +                    retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
+ +                    brw_imm_ud(instr->const_index[0] + 4 * first_component));
+ +         }
+ +
+ +         emit_untyped_write(bld, surf_index, offset_reg,
+ +                            offset(val_reg, bld, first_component),
+ +                            1 /* dims */, length,
+ +                            BRW_PREDICATE_NONE);
+ +
+ +         /* Clear the bits in the writemask that we just wrote, then try
+ +          * again to see if more channels are left.
+ +          */
+ +         writemask &= (15 << (first_component + length));
+ +      }
+ +
+ +      break;
+ +   }
+ +
      default:
         nir_emit_intrinsic(bld, instr);
         break;
@@@ -2695,30 -2536,14 +2695,30 @@@ fs_visitor::nir_emit_intrinsic(const fs
         nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
         if (const_offset) {
            /* Offsets are in bytes but they should always be multiples of 4 */
-          assert(const_offset->u[0] % 4 == 0);
-          src.reg_offset = const_offset->u[0] / 4;
+          assert(const_offset->u32[0] % 4 == 0);
+          src.reg_offset = const_offset->u32[0] / 4;
+ +
+ +         for (unsigned j = 0; j < instr->num_components; j++) {
+ +            bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ +         }
         } else {
- -         src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
- -      }
+ +         fs_reg indirect = retype(get_nir_src(instr->src[0]),
+ +                                  BRW_REGISTER_TYPE_UD);
   
- -      for (unsigned j = 0; j < instr->num_components; j++) {
- -         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ +         /* We need to pass a size to the MOV_INDIRECT but we don't want it to
+ +          * go past the end of the uniform.  In order to keep the n'th
+ +          * component from running past, we subtract off the size of all but
+ +          * one component of the vector.
+ +          */
+ +         assert(instr->const_index[1] >= instr->num_components * 4);
+ +         unsigned read_size = instr->const_index[1] -
+ +                              (instr->num_components - 1) * 4;
+ +
+ +         for (unsigned j = 0; j < instr->num_components; j++) {
+ +            bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+ +                     offset(dest, bld, j), offset(src, bld, j),
+ +                     indirect, brw_imm_ud(read_size));
+ +         }
         }
         break;
      }
diff --cc src/mesa/drivers/dri/i965/brw_nir.c
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_vec4.cpp
Simple merge
diff --cc src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index 4686f20,eef3940..7c06f92
--- 1/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
--- 2/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@@ -697,16 -706,14 +697,16 @@@ vec4_visitor::nir_emit_intrinsic(nir_in
         nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
         if (const_offset) {
            /* Offsets are in bytes but they should always be multiples of 16 */
-          assert(const_offset->u[0] % 16 == 0);
-          src.reg_offset = const_offset->u[0] / 16;
+          assert(const_offset->u32[0] % 16 == 0);
+          src.reg_offset = const_offset->u32[0] / 16;
+ +
+ +         emit(MOV(dest, src));
         } else {
- -         src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
- -         src.reladdr = new(mem_ctx) src_reg(tmp);
- -      }
+ +         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
   
- -      emit(MOV(dest, src));
+ +         emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
+ +              indirect, brw_imm_ud(instr->const_index[1]));
+ +      }
         break;
      }
   
diff --cc src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
Simple merge
diff --cc src/mesa/main/mtypes.h
Simple merge
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
		1	2
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/Makefile.sources	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/glsl/Makefile.sources	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/glsl/glsl_parser_extras.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/glsl_to_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_builder.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_lower_system_values.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_opcodes.py	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_opt_algebraic.py	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/nir_print.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/compiler/nir/spirv/spirv_to_nir.c	patch \|	diff1 \|	\|	blob \| history
src/compiler/nir/spirv/vtn_glsl450.c	patch \|	diff1 \|	\|	blob \| history
src/compiler/nir/spirv/vtn_variables.c	patch \|	diff1 \|	\|	blob \| history
src/compiler/nir_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/intel/vulkan/anv_meta_blit.c	patch \|	diff1 \|	\|	blob \| history
src/intel/vulkan/anv_meta_blit2d.c	patch \|	diff1 \|	\|	blob \| history
src/intel/vulkan/anv_meta_resolve.c	patch \|	diff1 \|	\|	blob \| history
src/intel/vulkan/anv_nir_apply_dynamic_offsets.c	patch \|	diff1 \|	\|	blob \| history
src/intel/vulkan/anv_nir_apply_pipeline_layout.c	patch \|	diff1 \|	\|	blob \| history
src/mesa/drivers/dri/i965/brw_compiler.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_nir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/main/mtypes.h	patch \|	diff1 \|	diff2 \|	blob \| history