OSDN Git Service

Merge remote-tracking branch 'public/master' into vulkan
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 25 Mar 2016 00:30:14 +0000 (17:30 -0700)
29 files changed:
1  2 
configure.ac
src/compiler/Makefile.sources
src/compiler/glsl/Makefile.sources
src/compiler/glsl/glsl_parser_extras.cpp
src/compiler/nir/glsl_to_nir.cpp
src/compiler/nir/nir.h
src/compiler/nir/nir_builder.h
src/compiler/nir/nir_lower_io.c
src/compiler/nir/nir_lower_system_values.c
src/compiler/nir/nir_opcodes.py
src/compiler/nir/nir_opt_algebraic.py
src/compiler/nir/nir_print.c
src/compiler/nir/spirv/spirv_to_nir.c
src/compiler/nir/spirv/vtn_glsl450.c
src/compiler/nir/spirv/vtn_variables.c
src/compiler/nir_types.h
src/intel/vulkan/anv_meta_blit.c
src/intel/vulkan/anv_meta_blit2d.c
src/intel/vulkan/anv_meta_resolve.c
src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/mesa/drivers/dri/i965/brw_compiler.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
src/mesa/main/mtypes.h

diff --cc configure.ac
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -55,77 -55,9 +55,77 @@@ convert_block(nir_block *block, void *v
  
        b->cursor = nir_after_instr(&load_var->instr);
  
 -      nir_intrinsic_op sysval_op =
 -         nir_intrinsic_from_system_value(var->data.location);
 -      nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
 +      nir_ssa_def *sysval;
 +      switch (var->data.location) {
 +      case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
 +         /* From the GLSL man page for gl_GlobalInvocationID:
 +          *
 +          *    "The value of gl_GlobalInvocationID is equal to
 +          *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
 +          */
 +
 +         nir_const_value local_size;
-          local_size.u[0] = b->shader->info.cs.local_size[0];
-          local_size.u[1] = b->shader->info.cs.local_size[1];
-          local_size.u[2] = b->shader->info.cs.local_size[2];
++         local_size.u32[0] = b->shader->info.cs.local_size[0];
++         local_size.u32[1] = b->shader->info.cs.local_size[1];
++         local_size.u32[2] = b->shader->info.cs.local_size[2];
 +
 +         nir_ssa_def *group_id =
 +            nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
 +         nir_ssa_def *local_id =
 +            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
 +
 +         sysval = nir_iadd(b, nir_imul(b, group_id,
 +                                          nir_build_imm(b, 3, local_size)),
 +                              local_id);
 +         break;
 +      }
 +
 +      case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
 +         /* From the GLSL man page for gl_LocalInvocationIndex:
 +          *
 +          *    ?The value of gl_LocalInvocationIndex is equal to
 +          *    gl_LocalInvocationID.z * gl_WorkGroupSize.x *
 +          *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
 +          *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
 +          */
 +         nir_ssa_def *local_id =
 +            nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
 +
 +         unsigned stride_y = b->shader->info.cs.local_size[0];
 +         unsigned stride_z = b->shader->info.cs.local_size[0] *
 +                             b->shader->info.cs.local_size[1];
 +
 +         sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2),
 +                                          nir_imm_int(b, stride_z)),
 +                              nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1),
 +                                                      nir_imm_int(b, stride_y)),
 +                                          nir_channel(b, local_id, 0)));
 +         break;
 +      }
 +
 +      case SYSTEM_VALUE_VERTEX_ID:
 +         if (b->shader->options->vertex_id_zero_based) {
 +            sysval = nir_iadd(b,
 +               nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
 +               nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
 +         } else {
 +            sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
 +         }
 +         break;
 +
 +      case SYSTEM_VALUE_INSTANCE_INDEX:
 +         sysval = nir_iadd(b,
 +            nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
 +            nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
 +         break;
 +
 +      default: {
 +         nir_intrinsic_op sysval_op =
 +            nir_intrinsic_from_system_value(var->data.location);
 +         sysval = nir_load_system_value(b, sysval_op, 0);
 +         break;
 +      } /* default */
 +      }
  
        nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
        nir_instr_remove(&load_var->instr);
@@@ -170,13 -174,12 +174,13 @@@ unop_convert("u2f", tfloat32, tuint32, 
  # Unary floating-point rounding operations.
  
  
- unop("ftrunc", tfloat, "truncf(src0)")
- unop("fceil", tfloat, "ceilf(src0)")
- unop("ffloor", tfloat, "floorf(src0)")
- unop("ffract", tfloat, "src0 - floorf(src0)")
- unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+ unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+ unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+ unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+ unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+ unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
  
 +unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
  
  # Trigonometric operations.
  
@@@ -75,27 -81,29 +82,30 @@@ optimizations = 
     (('imul', a, 1), a),
     (('fmul', a, -1.0), ('fneg', a)),
     (('imul', a, -1), ('ineg', a)),
-    (('ffma', 0.0, a, b), b),
-    (('ffma', a, 0.0, b), b),
-    (('ffma', a, b, 0.0), ('fmul', a, b)),
 +   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+    (('~ffma', 0.0, a, b), b),
+    (('~ffma', a, 0.0, b), b),
+    (('~ffma', a, b, 0.0), ('fmul', a, b)),
     (('ffma', a, 1.0, b), ('fadd', a, b)),
     (('ffma', 1.0, a, b), ('fadd', a, b)),
-    (('flrp', a, b, 0.0), a),
-    (('flrp', a, b, 1.0), b),
-    (('flrp', a, a, b), a),
-    (('flrp', 0.0, a, b), ('fmul', a, b)),
+    (('~flrp', a, b, 0.0), a),
+    (('~flrp', a, b, 1.0), b),
+    (('~flrp', a, a, b), a),
+    (('~flrp', 0.0, a, b), ('fmul', a, b)),
+    (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
     (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
     (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
-    (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
-    (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+    (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp'),
+    (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+    (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
     (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
-    (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+    (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
     # Comparison simplifications
-    (('inot', ('flt', a, b)), ('fge', a, b)),
-    (('inot', ('fge', a, b)), ('flt', a, b)),
-    (('inot', ('feq', a, b)), ('fne', a, b)),
-    (('inot', ('fne', a, b)), ('feq', a, b)),
+    (('~inot', ('flt', a, b)), ('fge', a, b)),
+    (('~inot', ('fge', a, b)), ('flt', a, b)),
+    (('~inot', ('feq', a, b)), ('fne', a, b)),
+    (('~inot', ('fne', a, b)), ('feq', a, b)),
     (('inot', ('ilt', a, b)), ('ige', a, b)),
     (('inot', ('ige', a, b)), ('ilt', a, b)),
     (('inot', ('ieq', a, b)), ('ine', a, b)),
Simple merge
index 5a7184a,0000000..42a1f95
mode 100644,000000..100644
--- /dev/null
@@@ -1,2704 -1,0 +1,2712 @@@
-             load->value.u[i] = constant->value.u[i];
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *    Jason Ekstrand (jason@jlekstrand.net)
 + *
 + */
 +
 +#include "vtn_private.h"
 +#include "nir/nir_vla.h"
 +#include "nir/nir_control_flow.h"
 +#include "nir/nir_constant_expressions.h"
 +
 +static struct vtn_ssa_value *
 +vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
 +{
 +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
 +   val->type = type;
 +
 +   if (glsl_type_is_vector_or_scalar(type)) {
 +      unsigned num_components = glsl_get_vector_elements(val->type);
 +      nir_ssa_undef_instr *undef =
 +         nir_ssa_undef_instr_create(b->shader, num_components);
 +
 +      nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr);
 +      val->def = &undef->def;
 +   } else {
 +      unsigned elems = glsl_get_length(val->type);
 +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
 +      if (glsl_type_is_matrix(type)) {
 +         const struct glsl_type *elem_type =
 +            glsl_vector_type(glsl_get_base_type(type),
 +                             glsl_get_vector_elements(type));
 +
 +         for (unsigned i = 0; i < elems; i++)
 +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
 +      } else if (glsl_type_is_array(type)) {
 +         const struct glsl_type *elem_type = glsl_get_array_element(type);
 +         for (unsigned i = 0; i < elems; i++)
 +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
 +      } else {
 +         for (unsigned i = 0; i < elems; i++) {
 +            const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
 +            val->elems[i] = vtn_undef_ssa_value(b, elem_type);
 +         }
 +      }
 +   }
 +
 +   return val;
 +}
 +
 +static struct vtn_ssa_value *
 +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
 +                    const struct glsl_type *type)
 +{
 +   struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
 +
 +   if (entry)
 +      return entry->data;
 +
 +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
 +   val->type = type;
 +
 +   switch (glsl_get_base_type(type)) {
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_BOOL:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_DOUBLE:
 +      if (glsl_type_is_vector_or_scalar(type)) {
 +         unsigned num_components = glsl_get_vector_elements(val->type);
 +         nir_load_const_instr *load =
 +            nir_load_const_instr_create(b->shader, num_components);
 +
 +         for (unsigned i = 0; i < num_components; i++)
-                load->value.u[j] = constant->value.u[rows * i + j];
++            load->value.u32[i] = constant->value.u[i];
 +
 +         nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
 +         val->def = &load->def;
 +      } else {
 +         assert(glsl_type_is_matrix(type));
 +         unsigned rows = glsl_get_vector_elements(val->type);
 +         unsigned columns = glsl_get_matrix_columns(val->type);
 +         val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
 +
 +         for (unsigned i = 0; i < columns; i++) {
 +            struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
 +            col_val->type = glsl_get_column_type(val->type);
 +            nir_load_const_instr *load =
 +               nir_load_const_instr_create(b->shader, rows);
 +
 +            for (unsigned j = 0; j < rows; j++)
-                src[j].u[k] = c->value.u[k];
++               load->value.u32[j] = constant->value.u[rows * i + j];
 +
 +            nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
 +            col_val->def = &load->def;
 +
 +            val->elems[i] = col_val;
 +         }
 +      }
 +      break;
 +
 +   case GLSL_TYPE_ARRAY: {
 +      unsigned elems = glsl_get_length(val->type);
 +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
 +      const struct glsl_type *elem_type = glsl_get_array_element(val->type);
 +      for (unsigned i = 0; i < elems; i++)
 +         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
 +                                             elem_type);
 +      break;
 +   }
 +
 +   case GLSL_TYPE_STRUCT: {
 +      unsigned elems = glsl_get_length(val->type);
 +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
 +      for (unsigned i = 0; i < elems; i++) {
 +         const struct glsl_type *elem_type =
 +            glsl_get_struct_field(val->type, i);
 +         val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
 +                                             elem_type);
 +      }
 +      break;
 +   }
 +
 +   default:
 +      unreachable("bad constant type");
 +   }
 +
 +   return val;
 +}
 +
 +struct vtn_ssa_value *
 +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
 +{
 +   struct vtn_value *val = vtn_untyped_value(b, value_id);
 +   switch (val->value_type) {
 +   case vtn_value_type_undef:
 +      return vtn_undef_ssa_value(b, val->type->type);
 +
 +   case vtn_value_type_constant:
 +      return vtn_const_ssa_value(b, val->constant, val->const_type);
 +
 +   case vtn_value_type_ssa:
 +      return val->ssa;
 +
 +   case vtn_value_type_access_chain:
 +      /* This is needed for function parameters */
 +      return vtn_variable_load(b, val->access_chain);
 +
 +   default:
 +      unreachable("Invalid type for an SSA value");
 +   }
 +}
 +
 +static char *
 +vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
 +                   unsigned word_count, unsigned *words_used)
 +{
 +   char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words));
 +   if (words_used) {
 +      /* Ammount of space taken by the string (including the null) */
 +      unsigned len = strlen(dup) + 1;
 +      *words_used = DIV_ROUND_UP(len, sizeof(*words));
 +   }
 +   return dup;
 +}
 +
 +const uint32_t *
 +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
 +                        const uint32_t *end, vtn_instruction_handler handler)
 +{
 +   b->file = NULL;
 +   b->line = -1;
 +   b->col = -1;
 +
 +   const uint32_t *w = start;
 +   while (w < end) {
 +      SpvOp opcode = w[0] & SpvOpCodeMask;
 +      unsigned count = w[0] >> SpvWordCountShift;
 +      assert(count >= 1 && w + count <= end);
 +
 +      switch (opcode) {
 +      case SpvOpNop:
 +         break; /* Do nothing */
 +
 +      case SpvOpLine:
 +         b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
 +         b->line = w[2];
 +         b->col = w[3];
 +         break;
 +
 +      case SpvOpNoLine:
 +         b->file = NULL;
 +         b->line = -1;
 +         b->col = -1;
 +         break;
 +
 +      default:
 +         if (!handler(b, opcode, w, count))
 +            return w;
 +         break;
 +      }
 +
 +      w += count;
 +   }
 +   assert(w == end);
 +   return w;
 +}
 +
 +static void
 +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
 +                     const uint32_t *w, unsigned count)
 +{
 +   switch (opcode) {
 +   case SpvOpExtInstImport: {
 +      struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
 +      if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
 +         val->ext_handler = vtn_handle_glsl450_instruction;
 +      } else {
 +         assert(!"Unsupported extension");
 +      }
 +      break;
 +   }
 +
 +   case SpvOpExtInst: {
 +      struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
 +      bool handled = val->ext_handler(b, w[4], w, count);
 +      (void)handled;
 +      assert(handled);
 +      break;
 +   }
 +
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +}
 +
 +static void
 +_foreach_decoration_helper(struct vtn_builder *b,
 +                           struct vtn_value *base_value,
 +                           int parent_member,
 +                           struct vtn_value *value,
 +                           vtn_decoration_foreach_cb cb, void *data)
 +{
 +   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
 +      int member;
 +      if (dec->scope == VTN_DEC_DECORATION) {
 +         member = parent_member;
 +      } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
 +         assert(parent_member == -1);
 +         member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
 +      } else {
 +         /* Not a decoration */
 +         continue;
 +      }
 +
 +      if (dec->group) {
 +         assert(dec->group->value_type == vtn_value_type_decoration_group);
 +         _foreach_decoration_helper(b, base_value, member, dec->group,
 +                                    cb, data);
 +      } else {
 +         cb(b, base_value, member, dec, data);
 +      }
 +   }
 +}
 +
 +/** Iterates (recursively if needed) over all of the decorations on a value
 + *
 + * This function iterates over all of the decorations applied to a given
 + * value.  If it encounters a decoration group, it recurses into the group
 + * and iterates over all of those decorations as well.
 + */
 +void
 +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
 +                       vtn_decoration_foreach_cb cb, void *data)
 +{
 +   _foreach_decoration_helper(b, value, -1, value, cb, data);
 +}
 +
 +void
 +vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
 +                           vtn_execution_mode_foreach_cb cb, void *data)
 +{
 +   for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
 +      if (dec->scope != VTN_DEC_EXECUTION_MODE)
 +         continue;
 +
 +      assert(dec->group == NULL);
 +      cb(b, value, dec, data);
 +   }
 +}
 +
 +static void
 +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
 +                      const uint32_t *w, unsigned count)
 +{
 +   const uint32_t *w_end = w + count;
 +   const uint32_t target = w[1];
 +   w += 2;
 +
 +   switch (opcode) {
 +   case SpvOpDecorationGroup:
 +      vtn_push_value(b, target, vtn_value_type_decoration_group);
 +      break;
 +
 +   case SpvOpDecorate:
 +   case SpvOpMemberDecorate:
 +   case SpvOpExecutionMode: {
 +      struct vtn_value *val = &b->values[target];
 +
 +      struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
 +      switch (opcode) {
 +      case SpvOpDecorate:
 +         dec->scope = VTN_DEC_DECORATION;
 +         break;
 +      case SpvOpMemberDecorate:
 +         dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
 +         break;
 +      case SpvOpExecutionMode:
 +         dec->scope = VTN_DEC_EXECUTION_MODE;
 +         break;
 +      default:
 +         unreachable("Invalid decoration opcode");
 +      }
 +      dec->decoration = *(w++);
 +      dec->literals = w;
 +
 +      /* Link into the list */
 +      dec->next = val->decoration;
 +      val->decoration = dec;
 +      break;
 +   }
 +
 +   case SpvOpGroupMemberDecorate:
 +   case SpvOpGroupDecorate: {
 +      struct vtn_value *group =
 +         vtn_value(b, target, vtn_value_type_decoration_group);
 +
 +      for (; w < w_end; w++) {
 +         struct vtn_value *val = vtn_untyped_value(b, *w);
 +         struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
 +
 +         dec->group = group;
 +         if (opcode == SpvOpGroupDecorate) {
 +            dec->scope = VTN_DEC_DECORATION;
 +         } else {
 +            dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
 +         }
 +
 +         /* Link into the list */
 +         dec->next = val->decoration;
 +         val->decoration = dec;
 +      }
 +      break;
 +   }
 +
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +}
 +
 +struct member_decoration_ctx {
 +   unsigned num_fields;
 +   struct glsl_struct_field *fields;
 +   struct vtn_type *type;
 +};
 +
 +/* does a shallow copy of a vtn_type */
 +
 +static struct vtn_type *
 +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
 +{
 +   struct vtn_type *dest = ralloc(b, struct vtn_type);
 +   dest->type = src->type;
 +   dest->is_builtin = src->is_builtin;
 +   if (src->is_builtin)
 +      dest->builtin = src->builtin;
 +
 +   if (!glsl_type_is_scalar(src->type)) {
 +      switch (glsl_get_base_type(src->type)) {
 +      case GLSL_TYPE_INT:
 +      case GLSL_TYPE_UINT:
 +      case GLSL_TYPE_BOOL:
 +      case GLSL_TYPE_FLOAT:
 +      case GLSL_TYPE_DOUBLE:
 +      case GLSL_TYPE_ARRAY:
 +         dest->row_major = src->row_major;
 +         dest->stride = src->stride;
 +         dest->array_element = src->array_element;
 +         break;
 +
 +      case GLSL_TYPE_STRUCT: {
 +         unsigned elems = glsl_get_length(src->type);
 +
 +         dest->members = ralloc_array(b, struct vtn_type *, elems);
 +         memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *));
 +
 +         dest->offsets = ralloc_array(b, unsigned, elems);
 +         memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned));
 +         break;
 +      }
 +
 +      default:
 +         unreachable("unhandled type");
 +      }
 +   }
 +
 +   return dest;
 +}
 +
 +static struct vtn_type *
 +mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
 +{
 +   type->members[member] = vtn_type_copy(b, type->members[member]);
 +   type = type->members[member];
 +
 +   /* We may have an array of matrices.... Oh, joy! */
 +   while (glsl_type_is_array(type->type)) {
 +      type->array_element = vtn_type_copy(b, type->array_element);
 +      type = type->array_element;
 +   }
 +
 +   assert(glsl_type_is_matrix(type->type));
 +
 +   return type;
 +}
 +
 +static void
 +struct_member_decoration_cb(struct vtn_builder *b,
 +                            struct vtn_value *val, int member,
 +                            const struct vtn_decoration *dec, void *void_ctx)
 +{
 +   struct member_decoration_ctx *ctx = void_ctx;
 +
 +   if (member < 0)
 +      return;
 +
 +   assert(member < ctx->num_fields);
 +
 +   switch (dec->decoration) {
 +   case SpvDecorationRelaxedPrecision:
 +      break; /* FIXME: Do nothing with this for now. */
 +   case SpvDecorationNoPerspective:
 +      ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
 +      break;
 +   case SpvDecorationFlat:
 +      ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT;
 +      break;
 +   case SpvDecorationCentroid:
 +      ctx->fields[member].centroid = true;
 +      break;
 +   case SpvDecorationSample:
 +      ctx->fields[member].sample = true;
 +      break;
 +   case SpvDecorationLocation:
 +      ctx->fields[member].location = dec->literals[0];
 +      break;
 +   case SpvDecorationBuiltIn:
 +      ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
 +      ctx->type->members[member]->is_builtin = true;
 +      ctx->type->members[member]->builtin = dec->literals[0];
 +      ctx->type->builtin_block = true;
 +      break;
 +   case SpvDecorationOffset:
 +      ctx->type->offsets[member] = dec->literals[0];
 +      break;
 +   case SpvDecorationMatrixStride:
 +      mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0];
 +      break;
 +   case SpvDecorationColMajor:
 +      break; /* Nothing to do here.  Column-major is the default. */
 +   case SpvDecorationRowMajor:
 +      mutable_matrix_member(b, ctx->type, member)->row_major = true;
 +      break;
 +   default:
 +      unreachable("Unhandled member decoration");
 +   }
 +}
 +
 +static void
 +type_decoration_cb(struct vtn_builder *b,
 +                   struct vtn_value *val, int member,
 +                    const struct vtn_decoration *dec, void *ctx)
 +{
 +   struct vtn_type *type = val->type;
 +
 +   if (member != -1)
 +      return;
 +
 +   switch (dec->decoration) {
 +   case SpvDecorationArrayStride:
 +      type->stride = dec->literals[0];
 +      break;
 +   case SpvDecorationBlock:
 +      type->block = true;
 +      break;
 +   case SpvDecorationBufferBlock:
 +      type->buffer_block = true;
 +      break;
 +   case SpvDecorationGLSLShared:
 +   case SpvDecorationGLSLPacked:
 +      /* Ignore these, since we get explicit offsets anyways */
 +      break;
 +
 +   case SpvDecorationStream:
 +      assert(dec->literals[0] == 0);
 +      break;
 +
 +   default:
 +      unreachable("Unhandled type decoration");
 +   }
 +}
 +
 +static unsigned
 +translate_image_format(SpvImageFormat format)
 +{
 +   switch (format) {
 +   case SpvImageFormatUnknown:      return 0;      /* GL_NONE */
 +   case SpvImageFormatRgba32f:      return 0x8814; /* GL_RGBA32F */
 +   case SpvImageFormatRgba16f:      return 0x881A; /* GL_RGBA16F */
 +   case SpvImageFormatR32f:         return 0x822E; /* GL_R32F */
 +   case SpvImageFormatRgba8:        return 0x8058; /* GL_RGBA8 */
 +   case SpvImageFormatRgba8Snorm:   return 0x8F97; /* GL_RGBA8_SNORM */
 +   case SpvImageFormatRg32f:        return 0x8230; /* GL_RG32F */
 +   case SpvImageFormatRg16f:        return 0x822F; /* GL_RG16F */
 +   case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */
 +   case SpvImageFormatR16f:         return 0x822D; /* GL_R16F */
 +   case SpvImageFormatRgba16:       return 0x805B; /* GL_RGBA16 */
 +   case SpvImageFormatRgb10A2:      return 0x8059; /* GL_RGB10_A2 */
 +   case SpvImageFormatRg16:         return 0x822C; /* GL_RG16 */
 +   case SpvImageFormatRg8:          return 0x822B; /* GL_RG8 */
 +   case SpvImageFormatR16:          return 0x822A; /* GL_R16 */
 +   case SpvImageFormatR8:           return 0x8229; /* GL_R8 */
 +   case SpvImageFormatRgba16Snorm:  return 0x8F9B; /* GL_RGBA16_SNORM */
 +   case SpvImageFormatRg16Snorm:    return 0x8F99; /* GL_RG16_SNORM */
 +   case SpvImageFormatRg8Snorm:     return 0x8F95; /* GL_RG8_SNORM */
 +   case SpvImageFormatR16Snorm:     return 0x8F98; /* GL_R16_SNORM */
 +   case SpvImageFormatR8Snorm:      return 0x8F94; /* GL_R8_SNORM */
 +   case SpvImageFormatRgba32i:      return 0x8D82; /* GL_RGBA32I */
 +   case SpvImageFormatRgba16i:      return 0x8D88; /* GL_RGBA16I */
 +   case SpvImageFormatRgba8i:       return 0x8D8E; /* GL_RGBA8I */
 +   case SpvImageFormatR32i:         return 0x8235; /* GL_R32I */
 +   case SpvImageFormatRg32i:        return 0x823B; /* GL_RG32I */
 +   case SpvImageFormatRg16i:        return 0x8239; /* GL_RG16I */
 +   case SpvImageFormatRg8i:         return 0x8237; /* GL_RG8I */
 +   case SpvImageFormatR16i:         return 0x8233; /* GL_R16I */
 +   case SpvImageFormatR8i:          return 0x8231; /* GL_R8I */
 +   case SpvImageFormatRgba32ui:     return 0x8D70; /* GL_RGBA32UI */
 +   case SpvImageFormatRgba16ui:     return 0x8D76; /* GL_RGBA16UI */
 +   case SpvImageFormatRgba8ui:      return 0x8D7C; /* GL_RGBA8UI */
 +   case SpvImageFormatR32ui:        return 0x8236; /* GL_R32UI */
 +   case SpvImageFormatRgb10a2ui:    return 0x906F; /* GL_RGB10_A2UI */
 +   case SpvImageFormatRg32ui:       return 0x823C; /* GL_RG32UI */
 +   case SpvImageFormatRg16ui:       return 0x823A; /* GL_RG16UI */
 +   case SpvImageFormatRg8ui:        return 0x8238; /* GL_RG8UI */
 +   case SpvImageFormatR16ui:        return 0x823A; /* GL_RG16UI */
 +   case SpvImageFormatR8ui:         return 0x8232; /* GL_R8UI */
 +   default:
 +      assert(!"Invalid image format");
 +      return 0;
 +   }
 +}
 +
 +static void
 +vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
 +                const uint32_t *w, unsigned count)
 +{
 +   struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
 +
 +   val->type = rzalloc(b, struct vtn_type);
 +   val->type->is_builtin = false;
 +   val->type->val = val;
 +
 +   switch (opcode) {
 +   case SpvOpTypeVoid:
 +      val->type->type = glsl_void_type();
 +      break;
 +   case SpvOpTypeBool:
 +      val->type->type = glsl_bool_type();
 +      break;
 +   case SpvOpTypeInt: {
 +      const bool signedness = w[3];
 +      val->type->type = (signedness ? glsl_int_type() : glsl_uint_type());
 +      break;
 +   }
 +   case SpvOpTypeFloat:
 +      val->type->type = glsl_float_type();
 +      break;
 +
 +   case SpvOpTypeVector: {
 +      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
 +      unsigned elems = w[3];
 +
 +      assert(glsl_type_is_scalar(base->type));
 +      val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
 +
 +      /* Vectors implicitly have sizeof(base_type) stride.  For now, this
 +       * is always 4 bytes.  This will have to change if we want to start
 +       * supporting doubles or half-floats.
 +       */
 +      val->type->stride = 4;
 +      val->type->array_element = base;
 +      break;
 +   }
 +
 +   case SpvOpTypeMatrix: {
 +      struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
 +      unsigned columns = w[3];
 +
 +      assert(glsl_type_is_vector(base->type));
 +      val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
 +                                         glsl_get_vector_elements(base->type),
 +                                         columns);
 +      assert(!glsl_type_is_error(val->type->type));
 +      val->type->array_element = base;
 +      val->type->row_major = false;
 +      val->type->stride = 0;
 +      break;
 +   }
 +
 +   case SpvOpTypeRuntimeArray:
 +   case SpvOpTypeArray: {
 +      struct vtn_type *array_element =
 +         vtn_value(b, w[2], vtn_value_type_type)->type;
 +
 +      unsigned length;
 +      if (opcode == SpvOpTypeRuntimeArray) {
 +         /* A length of 0 is used to denote unsized arrays */
 +         length = 0;
 +      } else {
 +         length =
 +            vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0];
 +      }
 +
 +      val->type->type = glsl_array_type(array_element->type, length);
 +      val->type->array_element = array_element;
 +      val->type->stride = 0;
 +      break;
 +   }
 +
 +   case SpvOpTypeStruct: {
 +      unsigned num_fields = count - 2;
 +      val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
 +      val->type->offsets = ralloc_array(b, unsigned, num_fields);
 +
 +      NIR_VLA(struct glsl_struct_field, fields, count);
 +      for (unsigned i = 0; i < num_fields; i++) {
 +         val->type->members[i] =
 +            vtn_value(b, w[i + 2], vtn_value_type_type)->type;
 +         fields[i] = (struct glsl_struct_field) {
 +            .type = val->type->members[i]->type,
 +            .name = ralloc_asprintf(b, "field%d", i),
 +            .location = -1,
 +         };
 +      }
 +
 +      struct member_decoration_ctx ctx = {
 +         .num_fields = num_fields,
 +         .fields = fields,
 +         .type = val->type
 +      };
 +
 +      vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
 +
 +      const char *name = val->name ? val->name : "struct";
 +
 +      val->type->type = glsl_struct_type(fields, num_fields, name);
 +      break;
 +   }
 +
 +   case SpvOpTypeFunction: {
 +      const struct glsl_type *return_type =
 +         vtn_value(b, w[2], vtn_value_type_type)->type->type;
 +      NIR_VLA(struct glsl_function_param, params, count - 3);
 +      for (unsigned i = 0; i < count - 3; i++) {
 +         params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type;
 +
 +         /* FIXME: */
 +         params[i].in = true;
 +         params[i].out = true;
 +      }
 +      val->type->type = glsl_function_type(return_type, params, count - 3);
 +      break;
 +   }
 +
 +   case SpvOpTypePointer:
 +      /* FIXME:  For now, we'll just do the really lame thing and return
 +       * the same type.  The validator should ensure that the proper number
 +       * of dereferences happen
 +       */
 +      val->type = vtn_value(b, w[3], vtn_value_type_type)->type;
 +      break;
 +
 +   case SpvOpTypeImage: {
 +      const struct glsl_type *sampled_type =
 +         vtn_value(b, w[2], vtn_value_type_type)->type->type;
 +
 +      assert(glsl_type_is_vector_or_scalar(sampled_type));
 +
 +      enum glsl_sampler_dim dim;
 +      switch ((SpvDim)w[3]) {
 +      case SpvDim1D:       dim = GLSL_SAMPLER_DIM_1D;    break;
 +      case SpvDim2D:       dim = GLSL_SAMPLER_DIM_2D;    break;
 +      case SpvDim3D:       dim = GLSL_SAMPLER_DIM_3D;    break;
 +      case SpvDimCube:     dim = GLSL_SAMPLER_DIM_CUBE;  break;
 +      case SpvDimRect:     dim = GLSL_SAMPLER_DIM_RECT;  break;
 +      case SpvDimBuffer:   dim = GLSL_SAMPLER_DIM_BUF;   break;
 +      default:
 +         unreachable("Invalid SPIR-V Sampler dimension");
 +      }
 +
 +      bool is_shadow = w[4];
 +      bool is_array = w[5];
 +      bool multisampled = w[6];
 +      unsigned sampled = w[7];
 +      SpvImageFormat format = w[8];
 +
 +      if (count > 9)
 +         val->type->access_qualifier = w[9];
 +      else
 +         val->type->access_qualifier = SpvAccessQualifierReadWrite;
 +
 +      if (multisampled) {
 +         assert(dim == GLSL_SAMPLER_DIM_2D);
 +         dim = GLSL_SAMPLER_DIM_MS;
 +      }
 +
 +      val->type->image_format = translate_image_format(format);
 +
 +      if (sampled == 1) {
 +         val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
 +                                             glsl_get_base_type(sampled_type));
 +      } else if (sampled == 2) {
 +         assert(format);
 +         assert(!is_shadow);
 +         val->type->type = glsl_image_type(dim, is_array,
 +                                           glsl_get_base_type(sampled_type));
 +      } else {
 +         assert(!"We need to know if the image will be sampled");
 +      }
 +      break;
 +   }
 +
 +   case SpvOpTypeSampledImage:
 +      val->type = vtn_value(b, w[2], vtn_value_type_type)->type;
 +      break;
 +
 +   case SpvOpTypeSampler:
 +      /* The actual sampler type here doesn't really matter.  It gets
 +       * thrown away the moment you combine it with an image.  What really
 +       * matters is that it's a sampler type as opposed to an integer type
 +       * so the backend knows what to do.
 +       */
 +      val->type->type = glsl_bare_sampler_type();
 +      break;
 +
 +   case SpvOpTypeOpaque:
 +   case SpvOpTypeEvent:
 +   case SpvOpTypeDeviceEvent:
 +   case SpvOpTypeReserveId:
 +   case SpvOpTypeQueue:
 +   case SpvOpTypePipe:
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +
 +   vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
 +}
 +
 +static nir_constant *
 +vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type)
 +{
 +   nir_constant *c = rzalloc(b, nir_constant);
 +
 +   switch (glsl_get_base_type(type)) {
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_BOOL:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_DOUBLE:
 +      /* Nothing to do here.  It's already initialized to zero */
 +      break;
 +
 +   case GLSL_TYPE_ARRAY:
 +      assert(glsl_get_length(type) > 0);
 +      c->num_elements = glsl_get_length(type);
 +      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
 +
 +      c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type));
 +      for (unsigned i = 1; i < c->num_elements; i++)
 +         c->elements[i] = c->elements[0];
 +      break;
 +
 +   case GLSL_TYPE_STRUCT:
 +      c->num_elements = glsl_get_length(type);
 +      c->elements = ralloc_array(b, nir_constant *, c->num_elements);
 +
 +      for (unsigned i = 0; i < c->num_elements; i++) {
 +         c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i));
 +      }
 +      break;
 +
 +   default:
 +      unreachable("Invalid type for null constant");
 +   }
 +
 +   return c;
 +}
 +
 +static void
 +spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v,
 +                             int member, const struct vtn_decoration *dec,
 +                             void *data)
 +{
 +   assert(member == -1);
 +   if (dec->decoration != SpvDecorationSpecId)
 +      return;
 +
 +   uint32_t *const_value = data;
 +
 +   for (unsigned i = 0; i < b->num_specializations; i++) {
 +      if (b->specializations[i].id == dec->literals[0]) {
 +         *const_value = b->specializations[i].data;
 +         return;
 +      }
 +   }
 +}
 +
 +static uint32_t
 +get_specialization(struct vtn_builder *b, struct vtn_value *val,
 +                   uint32_t const_value)
 +{
 +   vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value);
 +   return const_value;
 +}
 +
 +static void
 +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
 +                    const uint32_t *w, unsigned count)
 +{
 +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
 +   val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
 +   val->constant = rzalloc(b, nir_constant);
 +   switch (opcode) {
 +   case SpvOpConstantTrue:
 +      assert(val->const_type == glsl_bool_type());
 +      val->constant->value.u[0] = NIR_TRUE;
 +      break;
 +   case SpvOpConstantFalse:
 +      assert(val->const_type == glsl_bool_type());
 +      val->constant->value.u[0] = NIR_FALSE;
 +      break;
 +
 +   case SpvOpSpecConstantTrue:
 +   case SpvOpSpecConstantFalse: {
 +      assert(val->const_type == glsl_bool_type());
 +      uint32_t int_val =
 +         get_specialization(b, val, (opcode == SpvOpSpecConstantTrue));
 +      val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE;
 +      break;
 +   }
 +
 +   case SpvOpConstant:
 +      assert(glsl_type_is_scalar(val->const_type));
 +      val->constant->value.u[0] = w[3];
 +      break;
 +   case SpvOpSpecConstant:
 +      assert(glsl_type_is_scalar(val->const_type));
 +      val->constant->value.u[0] = get_specialization(b, val, w[3]);
 +      break;
 +   case SpvOpSpecConstantComposite:
 +   case SpvOpConstantComposite: {
 +      unsigned elem_count = count - 3;
 +      nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
 +      for (unsigned i = 0; i < elem_count; i++)
 +         elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant;
 +
 +      switch (glsl_get_base_type(val->const_type)) {
 +      case GLSL_TYPE_UINT:
 +      case GLSL_TYPE_INT:
 +      case GLSL_TYPE_FLOAT:
 +      case GLSL_TYPE_BOOL:
 +         if (glsl_type_is_matrix(val->const_type)) {
 +            unsigned rows = glsl_get_vector_elements(val->const_type);
 +            assert(glsl_get_matrix_columns(val->const_type) == elem_count);
 +            for (unsigned i = 0; i < elem_count; i++)
 +               for (unsigned j = 0; j < rows; j++)
 +                  val->constant->value.u[rows * i + j] = elems[i]->value.u[j];
 +         } else {
 +            assert(glsl_type_is_vector(val->const_type));
 +            assert(glsl_get_vector_elements(val->const_type) == elem_count);
 +            for (unsigned i = 0; i < elem_count; i++)
 +               val->constant->value.u[i] = elems[i]->value.u[0];
 +         }
 +         ralloc_free(elems);
 +         break;
 +
 +      case GLSL_TYPE_STRUCT:
 +      case GLSL_TYPE_ARRAY:
 +         ralloc_steal(val->constant, elems);
 +         val->constant->num_elements = elem_count;
 +         val->constant->elements = elems;
 +         break;
 +
 +      default:
 +         unreachable("Unsupported type for constants");
 +      }
 +      break;
 +   }
 +
 +   case SpvOpSpecConstantOp: {
 +      SpvOp opcode = get_specialization(b, val, w[3]);
 +      switch (opcode) {
 +      case SpvOpVectorShuffle: {
 +         struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
 +         struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
 +         unsigned len0 = glsl_get_vector_elements(v0->const_type);
 +         unsigned len1 = glsl_get_vector_elements(v1->const_type);
 +
 +         uint32_t u[8];
 +         for (unsigned i = 0; i < len0; i++)
 +            u[i] = v0->constant->value.u[i];
 +         for (unsigned i = 0; i < len1; i++)
 +            u[len0 + i] = v1->constant->value.u[i];
 +
 +         for (unsigned i = 0; i < count - 6; i++) {
 +            uint32_t comp = w[i + 6];
 +            if (comp == (uint32_t)-1) {
 +               val->constant->value.u[i] = 0xdeadbeef;
 +            } else {
 +               val->constant->value.u[i] = u[comp];
 +            }
 +         }
 +         return;
 +      }
 +
 +      case SpvOpCompositeExtract:
 +      case SpvOpCompositeInsert: {
 +         struct vtn_value *comp;
 +         unsigned deref_start;
 +         struct nir_constant **c;
 +         if (opcode == SpvOpCompositeExtract) {
 +            comp = vtn_value(b, w[4], vtn_value_type_constant);
 +            deref_start = 5;
 +            c = &comp->constant;
 +         } else {
 +            comp = vtn_value(b, w[5], vtn_value_type_constant);
 +            deref_start = 6;
 +            val->constant = nir_constant_clone(comp->constant,
 +                                               (nir_variable *)b);
 +            c = &val->constant;
 +         }
 +
 +         int elem = -1;
 +         const struct glsl_type *type = comp->const_type;
 +         for (unsigned i = deref_start; i < count; i++) {
 +            switch (glsl_get_base_type(type)) {
 +            case GLSL_TYPE_UINT:
 +            case GLSL_TYPE_INT:
 +            case GLSL_TYPE_FLOAT:
 +            case GLSL_TYPE_BOOL:
 +               /* If we hit this granularity, we're picking off an element */
 +               if (elem < 0)
 +                  elem = 0;
 +
 +               if (glsl_type_is_matrix(type)) {
 +                  elem += w[i] * glsl_get_vector_elements(type);
 +                  type = glsl_get_column_type(type);
 +               } else {
 +                  assert(glsl_type_is_vector(type));
 +                  elem += w[i];
 +                  type = glsl_scalar_type(glsl_get_base_type(type));
 +               }
 +               continue;
 +
 +            case GLSL_TYPE_ARRAY:
 +               c = &(*c)->elements[w[i]];
 +               type = glsl_get_array_element(type);
 +               continue;
 +
 +            case GLSL_TYPE_STRUCT:
 +               c = &(*c)->elements[w[i]];
 +               type = glsl_get_struct_field(type, w[i]);
 +               continue;
 +
 +            default:
 +               unreachable("Invalid constant type");
 +            }
 +         }
 +
 +         if (opcode == SpvOpCompositeExtract) {
 +            if (elem == -1) {
 +               val->constant = *c;
 +            } else {
 +               unsigned num_components = glsl_get_vector_elements(type);
 +               for (unsigned i = 0; i < num_components; i++)
 +                  val->constant->value.u[i] = (*c)->value.u[elem + i];
 +            }
 +         } else {
 +            struct vtn_value *insert =
 +               vtn_value(b, w[4], vtn_value_type_constant);
 +            assert(insert->const_type == type);
 +            if (elem == -1) {
 +               *c = insert->constant;
 +            } else {
 +               unsigned num_components = glsl_get_vector_elements(type);
 +               for (unsigned i = 0; i < num_components; i++)
 +                  (*c)->value.u[elem + i] = insert->constant->value.u[i];
 +            }
 +         }
 +         return;
 +      }
 +
 +      default: {
 +         bool swap;
 +         nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
 +
 +         unsigned num_components = glsl_get_vector_elements(val->const_type);
++         unsigned bit_size =
++            glsl_get_bit_size(glsl_get_base_type(val->const_type));
 +
 +         nir_const_value src[3];
 +         assert(count <= 7);
 +         for (unsigned i = 0; i < count - 4; i++) {
 +            nir_constant *c =
 +               vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
 +
 +            unsigned j = swap ? 1 - i : i;
++            assert(bit_size == 32);
 +            for (unsigned k = 0; k < num_components; k++)
-          nir_const_value res = nir_eval_const_opcode(op, num_components, src);
++               src[j].u32[k] = c->value.u[k];
 +         }
 +
-             val->constant->value.u[k] = res.u[k];
++         nir_const_value res = nir_eval_const_opcode(op, num_components,
++                                                     bit_size, src);
 +
 +         for (unsigned k = 0; k < num_components; k++)
-                      nir_tex_instr_dest_size(instr), NULL);
++            val->constant->value.u[k] = res.u32[k];
 +
 +         return;
 +      } /* default */
 +      }
 +   }
 +
 +   case SpvOpConstantNull:
 +      val->constant = vtn_null_constant(b, val->const_type);
 +      break;
 +
 +   case SpvOpConstantSampler:
 +      assert(!"OpConstantSampler requires Kernel Capability");
 +      break;
 +
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +}
 +
 +static void
 +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
 +                         const uint32_t *w, unsigned count)
 +{
 +   struct nir_function *callee =
 +      vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
 +
 +   nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
 +   for (unsigned i = 0; i < call->num_params; i++) {
 +      unsigned arg_id = w[4 + i];
 +      struct vtn_value *arg = vtn_untyped_value(b, arg_id);
 +      if (arg->value_type == vtn_value_type_access_chain) {
 +         nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain);
 +         call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref));
 +      } else {
 +         struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
 +
 +         /* Make a temporary to store the argument in */
 +         nir_variable *tmp =
 +            nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
 +         call->params[i] = nir_deref_var_create(call, tmp);
 +
 +         vtn_local_store(b, arg_ssa, call->params[i]);
 +      }
 +   }
 +
 +   nir_variable *out_tmp = NULL;
 +   if (!glsl_type_is_void(callee->return_type)) {
 +      out_tmp = nir_local_variable_create(b->impl, callee->return_type,
 +                                          "out_tmp");
 +      call->return_deref = nir_deref_var_create(call, out_tmp);
 +   }
 +
 +   nir_builder_instr_insert(&b->nb, &call->instr);
 +
 +   if (glsl_type_is_void(callee->return_type)) {
 +      vtn_push_value(b, w[2], vtn_value_type_undef);
 +   } else {
 +      struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      retval->ssa = vtn_local_load(b, call->return_deref);
 +   }
 +}
 +
 +struct vtn_ssa_value *
 +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
 +{
 +   struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
 +   val->type = type;
 +
 +   if (!glsl_type_is_vector_or_scalar(type)) {
 +      unsigned elems = glsl_get_length(type);
 +      val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
 +      for (unsigned i = 0; i < elems; i++) {
 +         const struct glsl_type *child_type;
 +
 +         switch (glsl_get_base_type(type)) {
 +         case GLSL_TYPE_INT:
 +         case GLSL_TYPE_UINT:
 +         case GLSL_TYPE_BOOL:
 +         case GLSL_TYPE_FLOAT:
 +         case GLSL_TYPE_DOUBLE:
 +            child_type = glsl_get_column_type(type);
 +            break;
 +         case GLSL_TYPE_ARRAY:
 +            child_type = glsl_get_array_element(type);
 +            break;
 +         case GLSL_TYPE_STRUCT:
 +            child_type = glsl_get_struct_field(type, i);
 +            break;
 +         default:
 +            unreachable("unkown base type");
 +         }
 +
 +         val->elems[i] = vtn_create_ssa_value(b, child_type);
 +      }
 +   }
 +
 +   return val;
 +}
 +
 +static nir_tex_src
 +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
 +{
 +   nir_tex_src src;
 +   src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def);
 +   src.src_type = type;
 +   return src;
 +}
 +
 +static void
 +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
 +                   const uint32_t *w, unsigned count)
 +{
 +   if (opcode == SpvOpSampledImage) {
 +      struct vtn_value *val =
 +         vtn_push_value(b, w[2], vtn_value_type_sampled_image);
 +      val->sampled_image = ralloc(b, struct vtn_sampled_image);
 +      val->sampled_image->image =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +      val->sampled_image->sampler =
 +         vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain;
 +      return;
 +   } else if (opcode == SpvOpImage) {
 +      struct vtn_value *val =
 +         vtn_push_value(b, w[2], vtn_value_type_access_chain);
 +      struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
 +      if (src_val->value_type == vtn_value_type_sampled_image) {
 +         val->access_chain = src_val->sampled_image->image;
 +      } else {
 +         assert(src_val->value_type == vtn_value_type_access_chain);
 +         val->access_chain = src_val->access_chain;
 +      }
 +      return;
 +   }
 +
 +   struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
 +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +
 +   struct vtn_sampled_image sampled;
 +   struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
 +   if (sampled_val->value_type == vtn_value_type_sampled_image) {
 +      sampled = *sampled_val->sampled_image;
 +   } else {
 +      assert(sampled_val->value_type == vtn_value_type_access_chain);
 +      sampled.image = NULL;
 +      sampled.sampler = sampled_val->access_chain;
 +   }
 +
 +   const struct glsl_type *image_type;
 +   if (sampled.image) {
 +      image_type = sampled.image->var->var->interface_type;
 +   } else {
 +      image_type = sampled.sampler->var->var->interface_type;
 +   }
 +
 +   nir_tex_src srcs[8]; /* 8 should be enough */
 +   nir_tex_src *p = srcs;
 +
 +   unsigned idx = 4;
 +
 +   bool has_coord = false;
 +   switch (opcode) {
 +   case SpvOpImageSampleImplicitLod:
 +   case SpvOpImageSampleExplicitLod:
 +   case SpvOpImageSampleDrefImplicitLod:
 +   case SpvOpImageSampleDrefExplicitLod:
 +   case SpvOpImageSampleProjImplicitLod:
 +   case SpvOpImageSampleProjExplicitLod:
 +   case SpvOpImageSampleProjDrefImplicitLod:
 +   case SpvOpImageSampleProjDrefExplicitLod:
 +   case SpvOpImageFetch:
 +   case SpvOpImageGather:
 +   case SpvOpImageDrefGather:
 +   case SpvOpImageQueryLod: {
 +      /* All these types have the coordinate as their first real argument */
 +      struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
 +      has_coord = true;
 +      p->src = nir_src_for_ssa(coord->def);
 +      p->src_type = nir_tex_src_coord;
 +      p++;
 +      break;
 +   }
 +
 +   default:
 +      break;
 +   }
 +
 +   /* These all have an explicit depth value as their next source */
 +   switch (opcode) {
 +   case SpvOpImageSampleDrefImplicitLod:
 +   case SpvOpImageSampleDrefExplicitLod:
 +   case SpvOpImageSampleProjDrefImplicitLod:
 +   case SpvOpImageSampleProjDrefExplicitLod:
 +      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
 +      break;
 +   default:
 +      break;
 +   }
 +
 +   /* For OpImageQuerySizeLod, we always have an LOD */
 +   if (opcode == SpvOpImageQuerySizeLod)
 +      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
 +
 +   /* Figure out the base texture operation */
 +   nir_texop texop;
 +   switch (opcode) {
 +   case SpvOpImageSampleImplicitLod:
 +   case SpvOpImageSampleDrefImplicitLod:
 +   case SpvOpImageSampleProjImplicitLod:
 +   case SpvOpImageSampleProjDrefImplicitLod:
 +      texop = nir_texop_tex;
 +      break;
 +
 +   case SpvOpImageSampleExplicitLod:
 +   case SpvOpImageSampleDrefExplicitLod:
 +   case SpvOpImageSampleProjExplicitLod:
 +   case SpvOpImageSampleProjDrefExplicitLod:
 +      texop = nir_texop_txl;
 +      break;
 +
 +   case SpvOpImageFetch:
 +      if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
 +         texop = nir_texop_txf_ms;
 +      } else {
 +         texop = nir_texop_txf;
 +      }
 +      break;
 +
 +   case SpvOpImageGather:
 +   case SpvOpImageDrefGather:
 +      texop = nir_texop_tg4;
 +      break;
 +
 +   case SpvOpImageQuerySizeLod:
 +   case SpvOpImageQuerySize:
 +      texop = nir_texop_txs;
 +      break;
 +
 +   case SpvOpImageQueryLod:
 +      texop = nir_texop_lod;
 +      break;
 +
 +   case SpvOpImageQueryLevels:
 +      texop = nir_texop_query_levels;
 +      break;
 +
 +   case SpvOpImageQuerySamples:
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +
 +   /* Now we need to handle some number of optional arguments */
 +   if (idx < count) {
 +      uint32_t operands = w[idx++];
 +
 +      if (operands & SpvImageOperandsBiasMask) {
 +         assert(texop == nir_texop_tex);
 +         texop = nir_texop_txb;
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias);
 +      }
 +
 +      if (operands & SpvImageOperandsLodMask) {
 +         assert(texop == nir_texop_txl || texop == nir_texop_txf ||
 +                texop == nir_texop_txf_ms || texop == nir_texop_txs);
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
 +      }
 +
 +      if (operands & SpvImageOperandsGradMask) {
 +         assert(texop == nir_texop_tex);
 +         texop = nir_texop_txd;
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
 +      }
 +
 +      if (operands & SpvImageOperandsOffsetMask ||
 +          operands & SpvImageOperandsConstOffsetMask)
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
 +
 +      if (operands & SpvImageOperandsConstOffsetsMask)
 +         assert(!"Constant offsets to texture gather not yet implemented");
 +
 +      if (operands & SpvImageOperandsSampleMask) {
 +         assert(texop == nir_texop_txf_ms);
 +         texop = nir_texop_txf_ms;
 +         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
 +      }
 +   }
 +   /* We should have now consumed exactly all of the arguments */
 +   assert(idx == count);
 +
 +   nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
 +   instr->op = texop;
 +
 +   memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
 +
 +   instr->sampler_dim = glsl_get_sampler_dim(image_type);
 +   instr->is_array = glsl_sampler_type_is_array(image_type);
 +   instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
 +   instr->is_new_style_shadow = instr->is_shadow;
 +
 +   if (has_coord) {
 +      switch (instr->sampler_dim) {
 +      case GLSL_SAMPLER_DIM_1D:
 +      case GLSL_SAMPLER_DIM_BUF:
 +         instr->coord_components = 1;
 +         break;
 +      case GLSL_SAMPLER_DIM_2D:
 +      case GLSL_SAMPLER_DIM_RECT:
 +      case GLSL_SAMPLER_DIM_MS:
 +         instr->coord_components = 2;
 +         break;
 +      case GLSL_SAMPLER_DIM_3D:
 +      case GLSL_SAMPLER_DIM_CUBE:
 +         instr->coord_components = 3;
 +         break;
 +      default:
 +         assert("Invalid sampler type");
 +      }
 +
 +      if (instr->is_array)
 +         instr->coord_components++;
 +   } else {
 +      instr->coord_components = 0;
 +   }
 +
 +   switch (glsl_get_sampler_result_type(image_type)) {
 +   case GLSL_TYPE_FLOAT:   instr->dest_type = nir_type_float;     break;
 +   case GLSL_TYPE_INT:     instr->dest_type = nir_type_int;       break;
 +   case GLSL_TYPE_UINT:    instr->dest_type = nir_type_uint;  break;
 +   case GLSL_TYPE_BOOL:    instr->dest_type = nir_type_bool;      break;
 +   default:
 +      unreachable("Invalid base type for sampler result");
 +   }
 +
 +   nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler);
 +   if (sampled.image) {
 +      nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image);
 +      instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref));
 +   } else {
 +      instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
 +   }
 +
 +   switch (instr->op) {
 +   case nir_texop_tex:
 +   case nir_texop_txb:
 +   case nir_texop_txl:
 +   case nir_texop_txd:
 +      /* These operations require a sampler */
 +      instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
 +      break;
 +   case nir_texop_txf:
 +   case nir_texop_txf_ms:
 +   case nir_texop_txs:
 +   case nir_texop_lod:
 +   case nir_texop_tg4:
 +   case nir_texop_query_levels:
 +   case nir_texop_texture_samples:
 +   case nir_texop_samples_identical:
 +      /* These don't */
 +      instr->sampler = NULL;
 +      break;
 +   }
 +
 +   nir_ssa_dest_init(&instr->instr, &instr->dest,
-       nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
++                     nir_tex_instr_dest_size(instr), 32, NULL);
 +
 +   assert(glsl_get_vector_elements(ret_type->type) ==
 +          nir_tex_instr_dest_size(instr));
 +
 +   val->ssa = vtn_create_ssa_value(b, ret_type->type);
 +   val->ssa->def = &instr->dest.ssa;
 +
 +   nir_builder_instr_insert(&b->nb, &instr->instr);
 +}
 +
 +static nir_ssa_def *
 +get_image_coord(struct vtn_builder *b, uint32_t value)
 +{
 +   struct vtn_ssa_value *coord = vtn_ssa_value(b, value);
 +
 +   /* The image_load_store intrinsics assume a 4-dim coordinate */
 +   unsigned dim = glsl_get_vector_elements(coord->type);
 +   unsigned swizzle[4];
 +   for (unsigned i = 0; i < 4; i++)
 +      swizzle[i] = MIN2(i, dim - 1);
 +
 +   return nir_swizzle(&b->nb, coord->def, swizzle, 4, false);
 +}
 +
 +static void
 +vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
 +                 const uint32_t *w, unsigned count)
 +{
 +   /* Just get this one out of the way */
 +   if (opcode == SpvOpImageTexelPointer) {
 +      struct vtn_value *val =
 +         vtn_push_value(b, w[2], vtn_value_type_image_pointer);
 +      val->image = ralloc(b, struct vtn_image_pointer);
 +
 +      val->image->image =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +      val->image->coord = get_image_coord(b, w[4]);
 +      val->image->sample = vtn_ssa_value(b, w[5])->def;
 +      return;
 +   }
 +
 +   struct vtn_image_pointer image;
 +
 +   switch (opcode) {
 +   case SpvOpAtomicExchange:
 +   case SpvOpAtomicCompareExchange:
 +   case SpvOpAtomicCompareExchangeWeak:
 +   case SpvOpAtomicIIncrement:
 +   case SpvOpAtomicIDecrement:
 +   case SpvOpAtomicIAdd:
 +   case SpvOpAtomicISub:
 +   case SpvOpAtomicSMin:
 +   case SpvOpAtomicUMin:
 +   case SpvOpAtomicSMax:
 +   case SpvOpAtomicUMax:
 +   case SpvOpAtomicAnd:
 +   case SpvOpAtomicOr:
 +   case SpvOpAtomicXor:
 +      image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
 +      break;
 +
 +   case SpvOpImageQuerySize:
 +      image.image =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +      image.coord = NULL;
 +      image.sample = NULL;
 +      break;
 +
 +   case SpvOpImageRead:
 +      image.image =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +      image.coord = get_image_coord(b, w[4]);
 +
 +      if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) {
 +         assert(w[5] == SpvImageOperandsSampleMask);
 +         image.sample = vtn_ssa_value(b, w[6])->def;
 +      } else {
 +         image.sample = nir_ssa_undef(&b->nb, 1);
 +      }
 +      break;
 +
 +   case SpvOpImageWrite:
 +      image.image =
 +         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
 +      image.coord = get_image_coord(b, w[2]);
 +
 +      /* texel = w[3] */
 +
 +      if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) {
 +         assert(w[4] == SpvImageOperandsSampleMask);
 +         image.sample = vtn_ssa_value(b, w[5])->def;
 +      } else {
 +         image.sample = nir_ssa_undef(&b->nb, 1);
 +      }
 +      break;
 +
 +   default:
 +      unreachable("Invalid image opcode");
 +   }
 +
 +   nir_intrinsic_op op;
 +   switch (opcode) {
 +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break;
 +   OP(ImageQuerySize,         size)
 +   OP(ImageRead,              load)
 +   OP(ImageWrite,             store)
 +   OP(AtomicExchange,         atomic_exchange)
 +   OP(AtomicCompareExchange,  atomic_comp_swap)
 +   OP(AtomicIIncrement,       atomic_add)
 +   OP(AtomicIDecrement,       atomic_add)
 +   OP(AtomicIAdd,             atomic_add)
 +   OP(AtomicISub,             atomic_add)
 +   OP(AtomicSMin,             atomic_min)
 +   OP(AtomicUMin,             atomic_min)
 +   OP(AtomicSMax,             atomic_max)
 +   OP(AtomicUMax,             atomic_max)
 +   OP(AtomicAnd,              atomic_and)
 +   OP(AtomicOr,               atomic_or)
 +   OP(AtomicXor,              atomic_xor)
 +#undef OP
 +   default:
 +      unreachable("Invalid image opcode");
 +   }
 +
 +   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
 +
 +   nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image);
 +   intrin->variables[0] =
 +      nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref));
 +
 +   /* ImageQuerySize doesn't take any extra parameters */
 +   if (opcode != SpvOpImageQuerySize) {
 +      /* The image coordinate is always 4 components but we may not have that
 +       * many.  Swizzle to compensate.
 +       */
 +      unsigned swiz[4];
 +      for (unsigned i = 0; i < 4; i++)
 +         swiz[i] = i < image.coord->num_components ? i : 0;
 +      intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord,
 +                                                   swiz, 4, false));
 +      intrin->src[1] = nir_src_for_ssa(image.sample);
 +   }
 +
 +   switch (opcode) {
 +   case SpvOpImageQuerySize:
 +   case SpvOpImageRead:
 +      break;
 +   case SpvOpImageWrite:
 +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
 +      break;
 +   case SpvOpAtomicIIncrement:
 +      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
 +      break;
 +   case SpvOpAtomicIDecrement:
 +      intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
 +      break;
 +
 +   case SpvOpAtomicExchange:
 +   case SpvOpAtomicIAdd:
 +   case SpvOpAtomicSMin:
 +   case SpvOpAtomicUMin:
 +   case SpvOpAtomicSMax:
 +   case SpvOpAtomicUMax:
 +   case SpvOpAtomicAnd:
 +   case SpvOpAtomicOr:
 +   case SpvOpAtomicXor:
 +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
 +      break;
 +
 +   case SpvOpAtomicCompareExchange:
 +      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
 +      intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
 +      break;
 +
 +   case SpvOpAtomicISub:
 +      intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
 +      break;
 +
 +   default:
 +      unreachable("Invalid image opcode");
 +   }
 +
 +   if (opcode != SpvOpImageWrite) {
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-    nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
++      nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
 +
 +      nir_builder_instr_insert(&b->nb, &intrin->instr);
 +
 +      /* The image intrinsics always return 4 channels but we may not want
 +       * that many.  Emit a mov to trim it down.
 +       */
 +      unsigned swiz[4] = {0, 1, 2, 3};
 +      val->ssa = vtn_create_ssa_value(b, type->type);
 +      val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz,
 +                                  glsl_get_vector_elements(type->type), false);
 +   } else {
 +      nir_builder_instr_insert(&b->nb, &intrin->instr);
 +   }
 +}
 +
 +static nir_intrinsic_op
 +get_ssbo_nir_atomic_op(SpvOp opcode)
 +{
 +   switch (opcode) {
 +#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N;
 +   OP(AtomicExchange,         atomic_exchange)
 +   OP(AtomicCompareExchange,  atomic_comp_swap)
 +   OP(AtomicIIncrement,       atomic_add)
 +   OP(AtomicIDecrement,       atomic_add)
 +   OP(AtomicIAdd,             atomic_add)
 +   OP(AtomicISub,             atomic_add)
 +   OP(AtomicSMin,             atomic_imin)
 +   OP(AtomicUMin,             atomic_umin)
 +   OP(AtomicSMax,             atomic_imax)
 +   OP(AtomicUMax,             atomic_umax)
 +   OP(AtomicAnd,              atomic_and)
 +   OP(AtomicOr,               atomic_or)
 +   OP(AtomicXor,              atomic_xor)
 +#undef OP
 +   default:
 +      unreachable("Invalid SSBO atomic");
 +   }
 +}
 +
 +static nir_intrinsic_op
 +get_shared_nir_atomic_op(SpvOp opcode)
 +{
 +   switch (opcode) {
 +#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N;
 +   OP(AtomicExchange,         atomic_exchange)
 +   OP(AtomicCompareExchange,  atomic_comp_swap)
 +   OP(AtomicIIncrement,       atomic_add)
 +   OP(AtomicIDecrement,       atomic_add)
 +   OP(AtomicIAdd,             atomic_add)
 +   OP(AtomicISub,             atomic_add)
 +   OP(AtomicSMin,             atomic_imin)
 +   OP(AtomicUMin,             atomic_umin)
 +   OP(AtomicSMax,             atomic_imax)
 +   OP(AtomicUMax,             atomic_umax)
 +   OP(AtomicAnd,              atomic_and)
 +   OP(AtomicOr,               atomic_or)
 +   OP(AtomicXor,              atomic_xor)
 +#undef OP
 +   default:
 +      unreachable("Invalid shared atomic");
 +   }
 +}
 +
 +static void
 +fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
 +                           const uint32_t *w, nir_src *src)
 +{
 +   switch (opcode) {
 +   case SpvOpAtomicIIncrement:
 +      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
 +      break;
 +
 +   case SpvOpAtomicIDecrement:
 +      src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
 +      break;
 +
 +   case SpvOpAtomicISub:
 +      src[0] =
 +         nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
 +      break;
 +
 +   case SpvOpAtomicCompareExchange:
 +      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
 +      src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
 +      break;
 +      /* Fall through */
 +
 +   case SpvOpAtomicExchange:
 +   case SpvOpAtomicIAdd:
 +   case SpvOpAtomicSMin:
 +   case SpvOpAtomicUMin:
 +   case SpvOpAtomicSMax:
 +   case SpvOpAtomicUMax:
 +   case SpvOpAtomicAnd:
 +   case SpvOpAtomicOr:
 +   case SpvOpAtomicXor:
 +      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
 +      break;
 +
 +   default:
 +      unreachable("Invalid SPIR-V atomic");
 +   }
 +}
 +
 +static void
 +vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
 +                                 const uint32_t *w, unsigned count)
 +{
 +   struct vtn_access_chain *chain =
 +      vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +   nir_intrinsic_instr *atomic;
 +
 +   /*
 +   SpvScope scope = w[4];
 +   SpvMemorySemanticsMask semantics = w[5];
 +   */
 +
 +   if (chain->var->mode == vtn_variable_mode_workgroup) {
 +      nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref;
 +      nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
 +      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
 +      atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref));
 +      fill_common_atomic_sources(b, opcode, w, &atomic->src[0]);
 +   } else {
 +      assert(chain->var->mode == vtn_variable_mode_ssbo);
 +      struct vtn_type *type;
 +      nir_ssa_def *offset, *index;
 +      offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false);
 +
 +      nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
 +
 +      atomic = nir_intrinsic_instr_create(b->nb.shader, op);
 +      atomic->src[0] = nir_src_for_ssa(index);
 +      atomic->src[1] = nir_src_for_ssa(offset);
 +      fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
 +   }
 +
- create_vec(nir_shader *shader, unsigned num_components)
++   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
 +
 +   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
 +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +   val->ssa = rzalloc(b, struct vtn_ssa_value);
 +   val->ssa->def = &atomic->dest.ssa;
 +   val->ssa->type = type->type;
 +
 +   nir_builder_instr_insert(&b->nb, &atomic->instr);
 +}
 +
 +static nir_alu_instr *
-    nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
++create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
 +{
 +   nir_op op;
 +   switch (num_components) {
 +   case 1: op = nir_op_fmov; break;
 +   case 2: op = nir_op_vec2; break;
 +   case 3: op = nir_op_vec3; break;
 +   case 4: op = nir_op_vec4; break;
 +   default: unreachable("bad vector size");
 +   }
 +
 +   nir_alu_instr *vec = nir_alu_instr_create(shader, op);
-                                       glsl_get_matrix_columns(src->type));
++   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
++                     bit_size, NULL);
 +   vec->dest.write_mask = (1 << num_components) - 1;
 +
 +   return vec;
 +}
 +
 +struct vtn_ssa_value *
 +vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
 +{
 +   if (src->transposed)
 +      return src->transposed;
 +
 +   struct vtn_ssa_value *dest =
 +      vtn_create_ssa_value(b, glsl_transposed_type(src->type));
 +
 +   for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
 +      nir_alu_instr *vec = create_vec(b->shader,
-    nir_alu_instr *vec = create_vec(b->shader, src->num_components);
++                                      glsl_get_matrix_columns(src->type),
++                                      glsl_get_bit_size(glsl_get_base_type(src->type)));
 +      if (glsl_type_is_vector_or_scalar(src->type)) {
 +          vec->src[0].src = nir_src_for_ssa(src->def);
 +          vec->src[0].swizzle[0] = i;
 +      } else {
 +         for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
 +            vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
 +            vec->src[j].swizzle[0] = i;
 +         }
 +      }
 +      nir_builder_instr_insert(&b->nb, &vec->instr);
 +      dest->elems[i]->def = &vec->dest.dest.ssa;
 +   }
 +
 +   dest->transposed = src;
 +
 +   return dest;
 +}
 +
 +nir_ssa_def *
 +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
 +{
 +   unsigned swiz[4] = { index };
 +   return nir_swizzle(&b->nb, src, swiz, 1, true);
 +}
 +
 +nir_ssa_def *
 +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
 +                  unsigned index)
 +{
-    nir_alu_instr *vec = create_vec(b->shader, num_components);
++   nir_alu_instr *vec = create_vec(b->shader, src->num_components,
++                                   src->bit_size);
 +
 +   for (unsigned i = 0; i < src->num_components; i++) {
 +      if (i == index) {
 +         vec->src[i].src = nir_src_for_ssa(insert);
 +      } else {
 +         vec->src[i].src = nir_src_for_ssa(src);
 +         vec->src[i].swizzle[0] = i;
 +      }
 +   }
 +
 +   nir_builder_instr_insert(&b->nb, &vec->instr);
 +
 +   return &vec->dest.dest.ssa;
 +}
 +
 +nir_ssa_def *
 +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
 +                           nir_ssa_def *index)
 +{
 +   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
 +   for (unsigned i = 1; i < src->num_components; i++)
 +      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
 +                       vtn_vector_extract(b, src, i), dest);
 +
 +   return dest;
 +}
 +
 +nir_ssa_def *
 +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
 +                          nir_ssa_def *insert, nir_ssa_def *index)
 +{
 +   nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
 +   for (unsigned i = 1; i < src->num_components; i++)
 +      dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
 +                       vtn_vector_insert(b, src, insert, i), dest);
 +
 +   return dest;
 +}
 +
 +static nir_ssa_def *
 +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
 +                   nir_ssa_def *src0, nir_ssa_def *src1,
 +                   const uint32_t *indices)
 +{
-    nir_alu_instr *vec = create_vec(b->shader, num_components);
++   nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
 +
 +   nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
 +   nir_builder_instr_insert(&b->nb, &undef->instr);
 +
 +   for (unsigned i = 0; i < num_components; i++) {
 +      uint32_t index = indices[i];
 +      if (index == 0xffffffff) {
 +         vec->src[i].src = nir_src_for_ssa(&undef->def);
 +      } else if (index < src0->num_components) {
 +         vec->src[i].src = nir_src_for_ssa(src0);
 +         vec->src[i].swizzle[0] = index;
 +      } else {
 +         vec->src[i].src = nir_src_for_ssa(src1);
 +         vec->src[i].swizzle[0] = index - src0->num_components;
 +      }
 +   }
 +
 +   nir_builder_instr_insert(&b->nb, &vec->instr);
 +
 +   return &vec->dest.dest.ssa;
 +}
 +
 +/*
 + * Concatentates a number of vectors/scalars together to produce a vector
 + */
 +static nir_ssa_def *
 +vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
 +                     unsigned num_srcs, nir_ssa_def **srcs)
 +{
++   nir_alu_instr *vec = create_vec(b->shader, num_components,
++                                   srcs[0]->bit_size);
 +
 +   unsigned dest_idx = 0;
 +   for (unsigned i = 0; i < num_srcs; i++) {
 +      nir_ssa_def *src = srcs[i];
 +      for (unsigned j = 0; j < src->num_components; j++) {
 +         vec->src[dest_idx].src = nir_src_for_ssa(src);
 +         vec->src[dest_idx].swizzle[0] = j;
 +         dest_idx++;
 +      }
 +   }
 +
 +   nir_builder_instr_insert(&b->nb, &vec->instr);
 +
 +   return &vec->dest.dest.ssa;
 +}
 +
 +static struct vtn_ssa_value *
 +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
 +{
 +   struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
 +   dest->type = src->type;
 +
 +   if (glsl_type_is_vector_or_scalar(src->type)) {
 +      dest->def = src->def;
 +   } else {
 +      unsigned elems = glsl_get_length(src->type);
 +
 +      dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
 +      for (unsigned i = 0; i < elems; i++)
 +         dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
 +   }
 +
 +   return dest;
 +}
 +
 +static struct vtn_ssa_value *
 +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
 +                     struct vtn_ssa_value *insert, const uint32_t *indices,
 +                     unsigned num_indices)
 +{
 +   struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
 +
 +   struct vtn_ssa_value *cur = dest;
 +   unsigned i;
 +   for (i = 0; i < num_indices - 1; i++) {
 +      cur = cur->elems[indices[i]];
 +   }
 +
 +   if (glsl_type_is_vector_or_scalar(cur->type)) {
 +      /* According to the SPIR-V spec, OpCompositeInsert may work down to
 +       * the component granularity. In that case, the last index will be
 +       * the index to insert the scalar into the vector.
 +       */
 +
 +      cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
 +   } else {
 +      cur->elems[indices[i]] = insert;
 +   }
 +
 +   return dest;
 +}
 +
 +static struct vtn_ssa_value *
 +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
 +                      const uint32_t *indices, unsigned num_indices)
 +{
 +   struct vtn_ssa_value *cur = src;
 +   for (unsigned i = 0; i < num_indices; i++) {
 +      if (glsl_type_is_vector_or_scalar(cur->type)) {
 +         assert(i == num_indices - 1);
 +         /* According to the SPIR-V spec, OpCompositeExtract may work down to
 +          * the component granularity. The last index will be the index of the
 +          * vector to extract.
 +          */
 +
 +         struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
 +         ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
 +         ret->def = vtn_vector_extract(b, cur->def, indices[i]);
 +         return ret;
 +      } else {
 +         cur = cur->elems[indices[i]];
 +      }
 +   }
 +
 +   return cur;
 +}
 +
 +static void
 +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
 +                     const uint32_t *w, unsigned count)
 +{
 +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +   const struct glsl_type *type =
 +      vtn_value(b, w[1], vtn_value_type_type)->type->type;
 +   val->ssa = vtn_create_ssa_value(b, type);
 +
 +   switch (opcode) {
 +   case SpvOpVectorExtractDynamic:
 +      val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
 +                                                 vtn_ssa_value(b, w[4])->def);
 +      break;
 +
 +   case SpvOpVectorInsertDynamic:
 +      val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
 +                                                vtn_ssa_value(b, w[4])->def,
 +                                                vtn_ssa_value(b, w[5])->def);
 +      break;
 +
 +   case SpvOpVectorShuffle:
 +      val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
 +                                         vtn_ssa_value(b, w[3])->def,
 +                                         vtn_ssa_value(b, w[4])->def,
 +                                         w + 5);
 +      break;
 +
 +   case SpvOpCompositeConstruct: {
 +      unsigned elems = count - 3;
 +      if (glsl_type_is_vector_or_scalar(type)) {
 +         nir_ssa_def *srcs[4];
 +         for (unsigned i = 0; i < elems; i++)
 +            srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
 +         val->ssa->def =
 +            vtn_vector_construct(b, glsl_get_vector_elements(type),
 +                                 elems, srcs);
 +      } else {
 +         val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
 +         for (unsigned i = 0; i < elems; i++)
 +            val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
 +      }
 +      break;
 +   }
 +   case SpvOpCompositeExtract:
 +      val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
 +                                       w + 4, count - 4);
 +      break;
 +
 +   case SpvOpCompositeInsert:
 +      val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
 +                                      vtn_ssa_value(b, w[3]),
 +                                      w + 5, count - 5);
 +      break;
 +
 +   case SpvOpCopyObject:
 +      val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
 +      break;
 +
 +   default:
 +      unreachable("unknown composite operation");
 +   }
 +}
 +
 +static void
 +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
 +                   const uint32_t *w, unsigned count)
 +{
 +   nir_intrinsic_op intrinsic_op;
 +   switch (opcode) {
 +   case SpvOpEmitVertex:
 +   case SpvOpEmitStreamVertex:
 +      intrinsic_op = nir_intrinsic_emit_vertex;
 +      break;
 +   case SpvOpEndPrimitive:
 +   case SpvOpEndStreamPrimitive:
 +      intrinsic_op = nir_intrinsic_end_primitive;
 +      break;
 +   case SpvOpMemoryBarrier:
 +      intrinsic_op = nir_intrinsic_memory_barrier;
 +      break;
 +   case SpvOpControlBarrier:
 +      intrinsic_op = nir_intrinsic_barrier;
 +      break;
 +   default:
 +      unreachable("unknown barrier instruction");
 +   }
 +
 +   nir_intrinsic_instr *intrin =
 +      nir_intrinsic_instr_create(b->shader, intrinsic_op);
 +
 +   if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
 +      nir_intrinsic_set_stream_id(intrin, w[1]);
 +
 +   nir_builder_instr_insert(&b->nb, &intrin->instr);
 +}
 +
 +static unsigned
 +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
 +{
 +   switch (mode) {
 +   case SpvExecutionModeInputPoints:
 +   case SpvExecutionModeOutputPoints:
 +      return 0; /* GL_POINTS */
 +   case SpvExecutionModeInputLines:
 +      return 1; /* GL_LINES */
 +   case SpvExecutionModeInputLinesAdjacency:
 +      return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
 +   case SpvExecutionModeTriangles:
 +      return 4; /* GL_TRIANGLES */
 +   case SpvExecutionModeInputTrianglesAdjacency:
 +      return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
 +   case SpvExecutionModeQuads:
 +      return 7; /* GL_QUADS */
 +   case SpvExecutionModeIsolines:
 +      return 0x8E7A; /* GL_ISOLINES */
 +   case SpvExecutionModeOutputLineStrip:
 +      return 3; /* GL_LINE_STRIP */
 +   case SpvExecutionModeOutputTriangleStrip:
 +      return 5; /* GL_TRIANGLE_STRIP */
 +   default:
 +      assert(!"Invalid primitive type");
 +      return 4;
 +   }
 +}
 +
 +static unsigned
 +vertices_in_from_spv_execution_mode(SpvExecutionMode mode)
 +{
 +   switch (mode) {
 +   case SpvExecutionModeInputPoints:
 +      return 1;
 +   case SpvExecutionModeInputLines:
 +      return 2;
 +   case SpvExecutionModeInputLinesAdjacency:
 +      return 4;
 +   case SpvExecutionModeTriangles:
 +      return 3;
 +   case SpvExecutionModeInputTrianglesAdjacency:
 +      return 6;
 +   default:
 +      assert(!"Invalid GS input mode");
 +      return 0;
 +   }
 +}
 +
 +static gl_shader_stage
 +stage_for_execution_model(SpvExecutionModel model)
 +{
 +   switch (model) {
 +   case SpvExecutionModelVertex:
 +      return MESA_SHADER_VERTEX;
 +   case SpvExecutionModelTessellationControl:
 +      return MESA_SHADER_TESS_CTRL;
 +   case SpvExecutionModelTessellationEvaluation:
 +      return MESA_SHADER_TESS_EVAL;
 +   case SpvExecutionModelGeometry:
 +      return MESA_SHADER_GEOMETRY;
 +   case SpvExecutionModelFragment:
 +      return MESA_SHADER_FRAGMENT;
 +   case SpvExecutionModelGLCompute:
 +      return MESA_SHADER_COMPUTE;
 +   default:
 +      unreachable("Unsupported execution model");
 +   }
 +}
 +
 +static bool
 +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
 +                                const uint32_t *w, unsigned count)
 +{
 +   switch (opcode) {
 +   case SpvOpSource:
 +   case SpvOpSourceExtension:
 +   case SpvOpSourceContinued:
 +   case SpvOpExtension:
 +      /* Unhandled, but these are for debug so that's ok. */
 +      break;
 +
 +   case SpvOpCapability: {
 +      SpvCapability cap = w[1];
 +      switch (cap) {
 +      case SpvCapabilityMatrix:
 +      case SpvCapabilityShader:
 +      case SpvCapabilityGeometry:
 +      case SpvCapabilityTessellationPointSize:
 +      case SpvCapabilityGeometryPointSize:
 +      case SpvCapabilityUniformBufferArrayDynamicIndexing:
 +      case SpvCapabilitySampledImageArrayDynamicIndexing:
 +      case SpvCapabilityStorageBufferArrayDynamicIndexing:
 +      case SpvCapabilityStorageImageArrayDynamicIndexing:
 +      case SpvCapabilityImageRect:
 +      case SpvCapabilitySampledRect:
 +      case SpvCapabilitySampled1D:
 +      case SpvCapabilityImage1D:
 +      case SpvCapabilitySampledCubeArray:
 +      case SpvCapabilitySampledBuffer:
 +      case SpvCapabilityImageBuffer:
 +      case SpvCapabilityImageQuery:
 +         break;
 +      case SpvCapabilityClipDistance:
 +      case SpvCapabilityCullDistance:
 +      case SpvCapabilityGeometryStreams:
 +         fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n");
 +         break;
 +      default:
 +         assert(!"Unsupported capability");
 +      }
 +      break;
 +   }
 +
 +   case SpvOpExtInstImport:
 +      vtn_handle_extension(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpMemoryModel:
 +      assert(w[1] == SpvAddressingModelLogical);
 +      assert(w[2] == SpvMemoryModelGLSL450);
 +      break;
 +
 +   case SpvOpEntryPoint: {
 +      struct vtn_value *entry_point = &b->values[w[2]];
 +      /* Let this be a name label regardless */
 +      unsigned name_words;
 +      entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
 +
 +      if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
 +          stage_for_execution_model(w[1]) != b->entry_point_stage)
 +         break;
 +
 +      assert(b->entry_point == NULL);
 +      b->entry_point = entry_point;
 +      break;
 +   }
 +
 +   case SpvOpString:
 +      vtn_push_value(b, w[1], vtn_value_type_string)->str =
 +         vtn_string_literal(b, &w[2], count - 2, NULL);
 +      break;
 +
 +   case SpvOpName:
 +      b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
 +      break;
 +
 +   case SpvOpMemberName:
 +      /* TODO */
 +      break;
 +
 +   case SpvOpExecutionMode:
 +   case SpvOpDecorationGroup:
 +   case SpvOpDecorate:
 +   case SpvOpMemberDecorate:
 +   case SpvOpGroupDecorate:
 +   case SpvOpGroupMemberDecorate:
 +      vtn_handle_decoration(b, opcode, w, count);
 +      break;
 +
 +   default:
 +      return false; /* End of preamble */
 +   }
 +
 +   return true;
 +}
 +
 +static void
 +vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
 +                          const struct vtn_decoration *mode, void *data)
 +{
 +   assert(b->entry_point == entry_point);
 +
 +   switch(mode->exec_mode) {
 +   case SpvExecutionModeOriginUpperLeft:
 +   case SpvExecutionModeOriginLowerLeft:
 +      b->origin_upper_left =
 +         (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
 +      break;
 +
 +   case SpvExecutionModeEarlyFragmentTests:
 +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
 +      b->shader->info.fs.early_fragment_tests = true;
 +      break;
 +
 +   case SpvExecutionModeInvocations:
 +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
 +      b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
 +      break;
 +
 +   case SpvExecutionModeDepthReplacing:
 +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
 +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
 +      break;
 +   case SpvExecutionModeDepthGreater:
 +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
 +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
 +      break;
 +   case SpvExecutionModeDepthLess:
 +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
 +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
 +      break;
 +   case SpvExecutionModeDepthUnchanged:
 +      assert(b->shader->stage == MESA_SHADER_FRAGMENT);
 +      b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
 +      break;
 +
 +   case SpvExecutionModeLocalSize:
 +      assert(b->shader->stage == MESA_SHADER_COMPUTE);
 +      b->shader->info.cs.local_size[0] = mode->literals[0];
 +      b->shader->info.cs.local_size[1] = mode->literals[1];
 +      b->shader->info.cs.local_size[2] = mode->literals[2];
 +      break;
 +   case SpvExecutionModeLocalSizeHint:
 +      break; /* Nothing do do with this */
 +
 +   case SpvExecutionModeOutputVertices:
 +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
 +      b->shader->info.gs.vertices_out = mode->literals[0];
 +      break;
 +
 +   case SpvExecutionModeInputPoints:
 +   case SpvExecutionModeInputLines:
 +   case SpvExecutionModeInputLinesAdjacency:
 +   case SpvExecutionModeTriangles:
 +   case SpvExecutionModeInputTrianglesAdjacency:
 +   case SpvExecutionModeQuads:
 +   case SpvExecutionModeIsolines:
 +      if (b->shader->stage == MESA_SHADER_GEOMETRY) {
 +         b->shader->info.gs.vertices_in =
 +            vertices_in_from_spv_execution_mode(mode->exec_mode);
 +      } else {
 +         assert(!"Tesselation shaders not yet supported");
 +      }
 +      break;
 +
 +   case SpvExecutionModeOutputPoints:
 +   case SpvExecutionModeOutputLineStrip:
 +   case SpvExecutionModeOutputTriangleStrip:
 +      assert(b->shader->stage == MESA_SHADER_GEOMETRY);
 +      b->shader->info.gs.output_primitive =
 +         gl_primitive_from_spv_execution_mode(mode->exec_mode);
 +      break;
 +
 +   case SpvExecutionModeSpacingEqual:
 +   case SpvExecutionModeSpacingFractionalEven:
 +   case SpvExecutionModeSpacingFractionalOdd:
 +   case SpvExecutionModeVertexOrderCw:
 +   case SpvExecutionModeVertexOrderCcw:
 +   case SpvExecutionModePointMode:
 +      assert(!"TODO: Add tessellation metadata");
 +      break;
 +
 +   case SpvExecutionModePixelCenterInteger:
 +   case SpvExecutionModeXfb:
 +      assert(!"Unhandled execution mode");
 +      break;
 +
 +   case SpvExecutionModeVecTypeHint:
 +   case SpvExecutionModeContractionOff:
 +      break; /* OpenCL */
 +   }
 +}
 +
 +static bool
 +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
 +                                        const uint32_t *w, unsigned count)
 +{
 +   switch (opcode) {
 +   case SpvOpSource:
 +   case SpvOpSourceContinued:
 +   case SpvOpSourceExtension:
 +   case SpvOpExtension:
 +   case SpvOpCapability:
 +   case SpvOpExtInstImport:
 +   case SpvOpMemoryModel:
 +   case SpvOpEntryPoint:
 +   case SpvOpExecutionMode:
 +   case SpvOpString:
 +   case SpvOpName:
 +   case SpvOpMemberName:
 +   case SpvOpDecorationGroup:
 +   case SpvOpDecorate:
 +   case SpvOpMemberDecorate:
 +   case SpvOpGroupDecorate:
 +   case SpvOpGroupMemberDecorate:
 +      assert(!"Invalid opcode types and variables section");
 +      break;
 +
 +   case SpvOpTypeVoid:
 +   case SpvOpTypeBool:
 +   case SpvOpTypeInt:
 +   case SpvOpTypeFloat:
 +   case SpvOpTypeVector:
 +   case SpvOpTypeMatrix:
 +   case SpvOpTypeImage:
 +   case SpvOpTypeSampler:
 +   case SpvOpTypeSampledImage:
 +   case SpvOpTypeArray:
 +   case SpvOpTypeRuntimeArray:
 +   case SpvOpTypeStruct:
 +   case SpvOpTypeOpaque:
 +   case SpvOpTypePointer:
 +   case SpvOpTypeFunction:
 +   case SpvOpTypeEvent:
 +   case SpvOpTypeDeviceEvent:
 +   case SpvOpTypeReserveId:
 +   case SpvOpTypeQueue:
 +   case SpvOpTypePipe:
 +      vtn_handle_type(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpConstantTrue:
 +   case SpvOpConstantFalse:
 +   case SpvOpConstant:
 +   case SpvOpConstantComposite:
 +   case SpvOpConstantSampler:
 +   case SpvOpConstantNull:
 +   case SpvOpSpecConstantTrue:
 +   case SpvOpSpecConstantFalse:
 +   case SpvOpSpecConstant:
 +   case SpvOpSpecConstantComposite:
 +   case SpvOpSpecConstantOp:
 +      vtn_handle_constant(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpVariable:
 +      vtn_handle_variables(b, opcode, w, count);
 +      break;
 +
 +   default:
 +      return false; /* End of preamble */
 +   }
 +
 +   return true;
 +}
 +
 +static bool
 +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
 +                            const uint32_t *w, unsigned count)
 +{
 +   switch (opcode) {
 +   case SpvOpLabel:
 +      break;
 +
 +   case SpvOpLoopMerge:
 +   case SpvOpSelectionMerge:
 +      /* This is handled by cfg pre-pass and walk_blocks */
 +      break;
 +
 +   case SpvOpUndef: {
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
 +      val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
 +      break;
 +   }
 +
 +   case SpvOpExtInst:
 +      vtn_handle_extension(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpVariable:
 +   case SpvOpLoad:
 +   case SpvOpStore:
 +   case SpvOpCopyMemory:
 +   case SpvOpCopyMemorySized:
 +   case SpvOpAccessChain:
 +   case SpvOpInBoundsAccessChain:
 +   case SpvOpArrayLength:
 +      vtn_handle_variables(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpFunctionCall:
 +      vtn_handle_function_call(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpSampledImage:
 +   case SpvOpImage:
 +   case SpvOpImageSampleImplicitLod:
 +   case SpvOpImageSampleExplicitLod:
 +   case SpvOpImageSampleDrefImplicitLod:
 +   case SpvOpImageSampleDrefExplicitLod:
 +   case SpvOpImageSampleProjImplicitLod:
 +   case SpvOpImageSampleProjExplicitLod:
 +   case SpvOpImageSampleProjDrefImplicitLod:
 +   case SpvOpImageSampleProjDrefExplicitLod:
 +   case SpvOpImageFetch:
 +   case SpvOpImageGather:
 +   case SpvOpImageDrefGather:
 +   case SpvOpImageQuerySizeLod:
 +   case SpvOpImageQueryLod:
 +   case SpvOpImageQueryLevels:
 +   case SpvOpImageQuerySamples:
 +      vtn_handle_texture(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpImageRead:
 +   case SpvOpImageWrite:
 +   case SpvOpImageTexelPointer:
 +      vtn_handle_image(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpImageQuerySize: {
 +      struct vtn_access_chain *image =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +      if (glsl_type_is_image(image->var->var->interface_type)) {
 +         vtn_handle_image(b, opcode, w, count);
 +      } else {
 +         vtn_handle_texture(b, opcode, w, count);
 +      }
 +      break;
 +   }
 +
 +   case SpvOpAtomicExchange:
 +   case SpvOpAtomicCompareExchange:
 +   case SpvOpAtomicCompareExchangeWeak:
 +   case SpvOpAtomicIIncrement:
 +   case SpvOpAtomicIDecrement:
 +   case SpvOpAtomicIAdd:
 +   case SpvOpAtomicISub:
 +   case SpvOpAtomicSMin:
 +   case SpvOpAtomicUMin:
 +   case SpvOpAtomicSMax:
 +   case SpvOpAtomicUMax:
 +   case SpvOpAtomicAnd:
 +   case SpvOpAtomicOr:
 +   case SpvOpAtomicXor: {
 +      struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
 +      if (pointer->value_type == vtn_value_type_image_pointer) {
 +         vtn_handle_image(b, opcode, w, count);
 +      } else {
 +         assert(pointer->value_type == vtn_value_type_access_chain);
 +         vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count);
 +      }
 +      break;
 +   }
 +
 +   case SpvOpSNegate:
 +   case SpvOpFNegate:
 +   case SpvOpNot:
 +   case SpvOpAny:
 +   case SpvOpAll:
 +   case SpvOpConvertFToU:
 +   case SpvOpConvertFToS:
 +   case SpvOpConvertSToF:
 +   case SpvOpConvertUToF:
 +   case SpvOpUConvert:
 +   case SpvOpSConvert:
 +   case SpvOpFConvert:
 +   case SpvOpQuantizeToF16:
 +   case SpvOpConvertPtrToU:
 +   case SpvOpConvertUToPtr:
 +   case SpvOpPtrCastToGeneric:
 +   case SpvOpGenericCastToPtr:
 +   case SpvOpBitcast:
 +   case SpvOpIsNan:
 +   case SpvOpIsInf:
 +   case SpvOpIsFinite:
 +   case SpvOpIsNormal:
 +   case SpvOpSignBitSet:
 +   case SpvOpLessOrGreater:
 +   case SpvOpOrdered:
 +   case SpvOpUnordered:
 +   case SpvOpIAdd:
 +   case SpvOpFAdd:
 +   case SpvOpISub:
 +   case SpvOpFSub:
 +   case SpvOpIMul:
 +   case SpvOpFMul:
 +   case SpvOpUDiv:
 +   case SpvOpSDiv:
 +   case SpvOpFDiv:
 +   case SpvOpUMod:
 +   case SpvOpSRem:
 +   case SpvOpSMod:
 +   case SpvOpFRem:
 +   case SpvOpFMod:
 +   case SpvOpVectorTimesScalar:
 +   case SpvOpDot:
 +   case SpvOpIAddCarry:
 +   case SpvOpISubBorrow:
 +   case SpvOpUMulExtended:
 +   case SpvOpSMulExtended:
 +   case SpvOpShiftRightLogical:
 +   case SpvOpShiftRightArithmetic:
 +   case SpvOpShiftLeftLogical:
 +   case SpvOpLogicalEqual:
 +   case SpvOpLogicalNotEqual:
 +   case SpvOpLogicalOr:
 +   case SpvOpLogicalAnd:
 +   case SpvOpLogicalNot:
 +   case SpvOpBitwiseOr:
 +   case SpvOpBitwiseXor:
 +   case SpvOpBitwiseAnd:
 +   case SpvOpSelect:
 +   case SpvOpIEqual:
 +   case SpvOpFOrdEqual:
 +   case SpvOpFUnordEqual:
 +   case SpvOpINotEqual:
 +   case SpvOpFOrdNotEqual:
 +   case SpvOpFUnordNotEqual:
 +   case SpvOpULessThan:
 +   case SpvOpSLessThan:
 +   case SpvOpFOrdLessThan:
 +   case SpvOpFUnordLessThan:
 +   case SpvOpUGreaterThan:
 +   case SpvOpSGreaterThan:
 +   case SpvOpFOrdGreaterThan:
 +   case SpvOpFUnordGreaterThan:
 +   case SpvOpULessThanEqual:
 +   case SpvOpSLessThanEqual:
 +   case SpvOpFOrdLessThanEqual:
 +   case SpvOpFUnordLessThanEqual:
 +   case SpvOpUGreaterThanEqual:
 +   case SpvOpSGreaterThanEqual:
 +   case SpvOpFOrdGreaterThanEqual:
 +   case SpvOpFUnordGreaterThanEqual:
 +   case SpvOpDPdx:
 +   case SpvOpDPdy:
 +   case SpvOpFwidth:
 +   case SpvOpDPdxFine:
 +   case SpvOpDPdyFine:
 +   case SpvOpFwidthFine:
 +   case SpvOpDPdxCoarse:
 +   case SpvOpDPdyCoarse:
 +   case SpvOpFwidthCoarse:
 +   case SpvOpBitFieldInsert:
 +   case SpvOpBitFieldSExtract:
 +   case SpvOpBitFieldUExtract:
 +   case SpvOpBitReverse:
 +   case SpvOpBitCount:
 +   case SpvOpTranspose:
 +   case SpvOpOuterProduct:
 +   case SpvOpMatrixTimesScalar:
 +   case SpvOpVectorTimesMatrix:
 +   case SpvOpMatrixTimesVector:
 +   case SpvOpMatrixTimesMatrix:
 +      vtn_handle_alu(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpVectorExtractDynamic:
 +   case SpvOpVectorInsertDynamic:
 +   case SpvOpVectorShuffle:
 +   case SpvOpCompositeConstruct:
 +   case SpvOpCompositeExtract:
 +   case SpvOpCompositeInsert:
 +   case SpvOpCopyObject:
 +      vtn_handle_composite(b, opcode, w, count);
 +      break;
 +
 +   case SpvOpEmitVertex:
 +   case SpvOpEndPrimitive:
 +   case SpvOpEmitStreamVertex:
 +   case SpvOpEndStreamPrimitive:
 +   case SpvOpControlBarrier:
 +   case SpvOpMemoryBarrier:
 +      vtn_handle_barrier(b, opcode, w, count);
 +      break;
 +
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +
 +   return true;
 +}
 +
 +nir_function *
 +spirv_to_nir(const uint32_t *words, size_t word_count,
 +             struct nir_spirv_specialization *spec, unsigned num_spec,
 +             gl_shader_stage stage, const char *entry_point_name,
 +             const nir_shader_compiler_options *options)
 +{
 +   const uint32_t *word_end = words + word_count;
 +
 +   /* Handle the SPIR-V header (first 4 dwords)  */
 +   assert(word_count > 5);
 +
 +   assert(words[0] == SpvMagicNumber);
 +   assert(words[1] >= 0x10000);
 +   /* words[2] == generator magic */
 +   unsigned value_id_bound = words[3];
 +   assert(words[4] == 0);
 +
 +   words+= 5;
 +
 +   /* Initialize the stn_builder object */
 +   struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
 +   b->value_id_bound = value_id_bound;
 +   b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
 +   exec_list_make_empty(&b->functions);
 +   b->entry_point_stage = stage;
 +   b->entry_point_name = entry_point_name;
 +
 +   /* Handle all the preamble instructions */
 +   words = vtn_foreach_instruction(b, words, word_end,
 +                                   vtn_handle_preamble_instruction);
 +
 +   if (b->entry_point == NULL) {
 +      assert(!"Entry point not found");
 +      ralloc_free(b);
 +      return NULL;
 +   }
 +
 +   b->shader = nir_shader_create(NULL, stage, options);
 +
 +   /* Parse execution modes */
 +   vtn_foreach_execution_mode(b, b->entry_point,
 +                              vtn_handle_execution_mode, NULL);
 +
 +   b->specializations = spec;
 +   b->num_specializations = num_spec;
 +
 +   /* Handle all variable, type, and constant instructions */
 +   words = vtn_foreach_instruction(b, words, word_end,
 +                                   vtn_handle_variable_or_type_instruction);
 +
 +   vtn_build_cfg(b, words, word_end);
 +
 +   foreach_list_typed(struct vtn_function, func, node, &b->functions) {
 +      b->impl = func->impl;
 +      b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
 +                                               _mesa_key_pointer_equal);
 +
 +      vtn_function_emit(b, func, vtn_handle_body_instruction);
 +   }
 +
 +   assert(b->entry_point->value_type == vtn_value_type_function);
 +   nir_function *entry_point = b->entry_point->func->impl->function;
 +   assert(entry_point);
 +
 +   ralloc_free(b);
 +
 +   return entry_point;
 +}
index 6b649fd,0000000..3360fda
mode 100644,000000..100644
--- /dev/null
@@@ -1,669 -1,0 +1,671 @@@
-                      glsl_get_vector_elements(val->ssa->type), val->name);
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *    Jason Ekstrand (jason@jlekstrand.net)
 + *
 + */
 +
 +#include "vtn_private.h"
 +#include "GLSL.std.450.h"
 +
 +#define M_PIf   ((float) M_PI)
 +#define M_PI_2f ((float) M_PI_2)
 +#define M_PI_4f ((float) M_PI_4)
 +
 +static nir_ssa_def *
 +build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
 +{
 +   unsigned swiz[4] = {1, 0, 0, 0};
 +   nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
 +   return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
 +}
 +
 +static nir_ssa_def *
 +build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
 +{
 +   unsigned yzx[4] = {1, 2, 0, 0};
 +   unsigned zxy[4] = {2, 0, 1, 0};
 +
 +   nir_ssa_def *prod0 =
 +      nir_fmul(b, col[0],
 +               nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
 +                           nir_swizzle(b, col[2], zxy, 3, true)));
 +   nir_ssa_def *prod1 =
 +      nir_fmul(b, col[0],
 +               nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
 +                           nir_swizzle(b, col[2], yzx, 3, true)));
 +
 +   nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
 +
 +   return nir_fadd(b, nir_channel(b, diff, 0),
 +                      nir_fadd(b, nir_channel(b, diff, 1),
 +                                  nir_channel(b, diff, 2)));
 +}
 +
 +static nir_ssa_def *
 +build_mat4_det(nir_builder *b, nir_ssa_def **col)
 +{
 +   nir_ssa_def *subdet[4];
 +   for (unsigned i = 0; i < 4; i++) {
 +      unsigned swiz[3];
 +      for (unsigned j = 0; j < 3; j++)
 +         swiz[j] = j + (j >= i);
 +
 +      nir_ssa_def *subcol[3];
 +      subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
 +      subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
 +      subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
 +
 +      subdet[i] = build_mat3_det(b, subcol);
 +   }
 +
 +   nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
 +
 +   return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
 +                                  nir_channel(b, prod, 1)),
 +                      nir_fsub(b, nir_channel(b, prod, 2),
 +                                  nir_channel(b, prod, 3)));
 +}
 +
 +static nir_ssa_def *
 +build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
 +{
 +   unsigned size = glsl_get_vector_elements(src->type);
 +
 +   nir_ssa_def *cols[4];
 +   for (unsigned i = 0; i < size; i++)
 +      cols[i] = src->elems[i]->def;
 +
 +   switch(size) {
 +   case 2: return build_mat2_det(&b->nb, cols);
 +   case 3: return build_mat3_det(&b->nb, cols);
 +   case 4: return build_mat4_det(&b->nb, cols);
 +   default:
 +      unreachable("Invalid matrix size");
 +   }
 +}
 +
 +/* Computes the determinate of the submatrix given by taking src and
 + * removing the specified row and column.
 + */
 +static nir_ssa_def *
 +build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
 +                 unsigned size, unsigned row, unsigned col)
 +{
 +   assert(row < size && col < size);
 +   if (size == 2) {
 +      return nir_channel(b, src->elems[1 - col]->def, 1 - row);
 +   } else {
 +      /* Swizzle to get all but the specified row */
 +      unsigned swiz[3];
 +      for (unsigned j = 0; j < 3; j++)
 +         swiz[j] = j + (j >= row);
 +
 +      /* Grab all but the specified column */
 +      nir_ssa_def *subcol[3];
 +      for (unsigned j = 0; j < size; j++) {
 +         if (j != col) {
 +            subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
 +                                                swiz, size - 1, true);
 +         }
 +      }
 +
 +      if (size == 3) {
 +         return build_mat2_det(b, subcol);
 +      } else {
 +         assert(size == 4);
 +         return build_mat3_det(b, subcol);
 +      }
 +   }
 +}
 +
 +static struct vtn_ssa_value *
 +matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
 +{
 +   nir_ssa_def *adj_col[4];
 +   unsigned size = glsl_get_vector_elements(src->type);
 +
 +   /* Build up an adjugate matrix */
 +   for (unsigned c = 0; c < size; c++) {
 +      nir_ssa_def *elem[4];
 +      for (unsigned r = 0; r < size; r++) {
 +         elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
 +
 +         if ((r + c) % 2)
 +            elem[r] = nir_fneg(&b->nb, elem[r]);
 +      }
 +
 +      adj_col[c] = nir_vec(&b->nb, elem, size);
 +   }
 +
 +   nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
 +
 +   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
 +   for (unsigned i = 0; i < size; i++)
 +      val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
 +
 +   return val;
 +}
 +
 +static nir_ssa_def*
 +build_length(nir_builder *b, nir_ssa_def *vec)
 +{
 +   switch (vec->num_components) {
 +   case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
 +   case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
 +   case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
 +   case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
 +   default:
 +      unreachable("Invalid number of components");
 +   }
 +}
 +
 +static inline nir_ssa_def *
 +build_fclamp(nir_builder *b,
 +             nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
 +{
 +   return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
 +}
 +
 +/**
 + * Return e^x.
 + */
 +static nir_ssa_def *
 +build_exp(nir_builder *b, nir_ssa_def *x)
 +{
 +   return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
 +}
 +
 +/**
 + * Return ln(x) - the natural logarithm of x.
 + */
 +static nir_ssa_def *
 +build_log(nir_builder *b, nir_ssa_def *x)
 +{
 +   return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
 +}
 +
 +/**
 + * Approximate asin(x) by the formula:
 + *    asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
 + *
 + * which is correct to first order at x=0 and x=±1 regardless of the p
 + * coefficients but can be made second-order correct at both ends by selecting
 + * the fit coefficients appropriately.  Different p coefficients can be used
 + * in the asin and acos implementation to minimize some relative error metric
 + * in each case.
 + */
 +static nir_ssa_def *
 +build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
 +{
 +   nir_ssa_def *abs_x = nir_fabs(b, x);
 +   return nir_fmul(b, nir_fsign(b, x),
 +                   nir_fsub(b, nir_imm_float(b, M_PI_2f),
 +                            nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
 +                                     nir_fadd(b, nir_imm_float(b, M_PI_2f),
 +                                              nir_fmul(b, abs_x,
 +                                                       nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
 +                                                                nir_fmul(b, abs_x,
 +                                                                         nir_fadd(b, nir_imm_float(b, p0),
 +                                                                                  nir_fmul(b, abs_x,
 +                                                                                           nir_imm_float(b, p1))))))))));
 +}
 +
 +/**
 + * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
 + */
 +static nir_ssa_def *
 +build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
 +{
 +   nir_ssa_def *accum = xs[0];
 +
 +   for (int i = 1; i < terms; i++)
 +      accum = nir_fadd(b, accum, xs[i]);
 +
 +   return accum;
 +}
 +
 +static nir_ssa_def *
 +build_atan(nir_builder *b, nir_ssa_def *y_over_x)
 +{
 +   nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
 +   nir_ssa_def *one = nir_imm_float(b, 1.0f);
 +
 +   /*
 +    * range-reduction, first step:
 +    *
 +    *      / y_over_x         if |y_over_x| <= 1.0;
 +    * x = <
 +    *      \ 1.0 / y_over_x   otherwise
 +    */
 +   nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
 +                                nir_fmax(b, abs_y_over_x, one));
 +
 +   /*
 +    * approximate atan by evaluating polynomial:
 +    *
 +    * x   * 0.9999793128310355 - x^3  * 0.3326756418091246 +
 +    * x^5 * 0.1938924977115610 - x^7  * 0.1173503194786851 +
 +    * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
 +    */
 +   nir_ssa_def *x_2  = nir_fmul(b, x,   x);
 +   nir_ssa_def *x_3  = nir_fmul(b, x_2, x);
 +   nir_ssa_def *x_5  = nir_fmul(b, x_3, x_2);
 +   nir_ssa_def *x_7  = nir_fmul(b, x_5, x_2);
 +   nir_ssa_def *x_9  = nir_fmul(b, x_7, x_2);
 +   nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
 +
 +   nir_ssa_def *polynomial_terms[] = {
 +      nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
 +      nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
 +      nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
 +      nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
 +      nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
 +      nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
 +   };
 +
 +   nir_ssa_def *tmp =
 +      build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
 +
 +   /* range-reduction fixup */
 +   tmp = nir_fadd(b, tmp,
 +                  nir_fmul(b,
 +                           nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
 +                           nir_fadd(b, nir_fmul(b, tmp,
 +                                                nir_imm_float(b, -2.0f)),
 +                                       nir_imm_float(b, M_PI_2f))));
 +
 +   /* sign fixup */
 +   return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
 +}
 +
 +static nir_ssa_def *
 +build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
 +{
 +   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
 +
 +   /* If |x| >= 1.0e-8 * |y|: */
 +   nir_ssa_def *condition =
 +      nir_fge(b, nir_fabs(b, x),
 +              nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
 +
 +   /* Then...call atan(y/x) and fix it up: */
 +   nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
 +   nir_ssa_def *r_then =
 +      nir_bcsel(b, nir_flt(b, x, zero),
 +                   nir_fadd(b, atan1,
 +                               nir_bcsel(b, nir_fge(b, y, zero),
 +                                            nir_imm_float(b, M_PIf),
 +                                            nir_imm_float(b, -M_PIf))),
 +                   atan1);
 +
 +   /* Else... */
 +   nir_ssa_def *r_else =
 +      nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
 +
 +   return nir_bcsel(b, condition, r_then, r_else);
 +}
 +
 +static nir_ssa_def *
 +build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
 +{
 +   nir_ssa_def *abs_x = nir_fabs(b, x);
 +   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
 +
 +   /* Single-precision floating-point values are stored as
 +    *   1 sign bit;
 +    *   8 exponent bits;
 +    *   23 mantissa bits.
 +    *
 +    * An exponent shift of 23 will shift the mantissa out, leaving only the
 +    * exponent and sign bit (which itself may be zero, if the absolute value
 +    * was taken before the bitcast and shift.
 +    */
 +   nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
 +   nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
 +
 +   nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
 +
 +   /* Exponent of floating-point values in the range [0.5, 1.0). */
 +   nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
 +
 +   nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
 +
 +   *exponent =
 +      nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
 +                  nir_bcsel(b, is_not_zero, exponent_bias, zero));
 +
 +   return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
 +                     nir_bcsel(b, is_not_zero, exponent_value, zero));
 +}
 +
 +static void
 +handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
 +                   const uint32_t *w, unsigned count)
 +{
 +   struct nir_builder *nb = &b->nb;
 +   const struct glsl_type *dest_type =
 +      vtn_value(b, w[1], vtn_value_type_type)->type->type;
 +
 +   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +   val->ssa = vtn_create_ssa_value(b, dest_type);
 +
 +   /* Collect the various SSA sources */
 +   unsigned num_inputs = count - 5;
 +   nir_ssa_def *src[3];
 +   for (unsigned i = 0; i < num_inputs; i++)
 +      src[i] = vtn_ssa_value(b, w[i + 5])->def;
 +
 +   nir_op op;
 +   switch (entrypoint) {
 +   case GLSLstd450Round:       op = nir_op_fround_even;   break; /* TODO */
 +   case GLSLstd450RoundEven:   op = nir_op_fround_even;   break;
 +   case GLSLstd450Trunc:       op = nir_op_ftrunc;        break;
 +   case GLSLstd450FAbs:        op = nir_op_fabs;          break;
 +   case GLSLstd450SAbs:        op = nir_op_iabs;          break;
 +   case GLSLstd450FSign:       op = nir_op_fsign;         break;
 +   case GLSLstd450SSign:       op = nir_op_isign;         break;
 +   case GLSLstd450Floor:       op = nir_op_ffloor;        break;
 +   case GLSLstd450Ceil:        op = nir_op_fceil;         break;
 +   case GLSLstd450Fract:       op = nir_op_ffract;        break;
 +   case GLSLstd450Radians:
 +      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
 +      return;
 +   case GLSLstd450Degrees:
 +      val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
 +      return;
 +   case GLSLstd450Sin:         op = nir_op_fsin;       break;
 +   case GLSLstd450Cos:         op = nir_op_fcos;       break;
 +   case GLSLstd450Tan:
 +      val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
 +                               nir_fcos(nb, src[0]));
 +      return;
 +   case GLSLstd450Pow:         op = nir_op_fpow;       break;
 +   case GLSLstd450Exp2:        op = nir_op_fexp2;      break;
 +   case GLSLstd450Log2:        op = nir_op_flog2;      break;
 +   case GLSLstd450Sqrt:        op = nir_op_fsqrt;      break;
 +   case GLSLstd450InverseSqrt: op = nir_op_frsq;       break;
 +
 +   case GLSLstd450Modf: {
 +      nir_ssa_def *sign = nir_fsign(nb, src[0]);
 +      nir_ssa_def *abs = nir_fabs(nb, src[0]);
 +      val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
 +      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
 +                          nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
 +      return;
 +   }
 +
 +   case GLSLstd450ModfStruct: {
 +      nir_ssa_def *sign = nir_fsign(nb, src[0]);
 +      nir_ssa_def *abs = nir_fabs(nb, src[0]);
 +      assert(glsl_type_is_struct(val->ssa->type));
 +      val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
 +      val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
 +      return;
 +   }
 +
 +   case GLSLstd450FMin:        op = nir_op_fmin;       break;
 +   case GLSLstd450UMin:        op = nir_op_umin;       break;
 +   case GLSLstd450SMin:        op = nir_op_imin;       break;
 +   case GLSLstd450FMax:        op = nir_op_fmax;       break;
 +   case GLSLstd450UMax:        op = nir_op_umax;       break;
 +   case GLSLstd450SMax:        op = nir_op_imax;       break;
 +   case GLSLstd450FMix:        op = nir_op_flrp;       break;
 +   case GLSLstd450Step:
 +      val->ssa->def = nir_sge(nb, src[1], src[0]);
 +      return;
 +
 +   case GLSLstd450Fma:         op = nir_op_ffma;       break;
 +   case GLSLstd450Ldexp:       op = nir_op_ldexp;      break;
 +
 +   /* Packing/Unpacking functions */
 +   case GLSLstd450PackSnorm4x8:      op = nir_op_pack_snorm_4x8;      break;
 +   case GLSLstd450PackUnorm4x8:      op = nir_op_pack_unorm_4x8;      break;
 +   case GLSLstd450PackSnorm2x16:     op = nir_op_pack_snorm_2x16;     break;
 +   case GLSLstd450PackUnorm2x16:     op = nir_op_pack_unorm_2x16;     break;
 +   case GLSLstd450PackHalf2x16:      op = nir_op_pack_half_2x16;      break;
 +   case GLSLstd450UnpackSnorm4x8:    op = nir_op_unpack_snorm_4x8;    break;
 +   case GLSLstd450UnpackUnorm4x8:    op = nir_op_unpack_unorm_4x8;    break;
 +   case GLSLstd450UnpackSnorm2x16:   op = nir_op_unpack_snorm_2x16;   break;
 +   case GLSLstd450UnpackUnorm2x16:   op = nir_op_unpack_unorm_2x16;   break;
 +   case GLSLstd450UnpackHalf2x16:    op = nir_op_unpack_half_2x16;    break;
 +
 +   case GLSLstd450Length:
 +      val->ssa->def = build_length(nb, src[0]);
 +      return;
 +   case GLSLstd450Distance:
 +      val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
 +      return;
 +   case GLSLstd450Normalize:
 +      val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
 +      return;
 +
 +   case GLSLstd450Exp:
 +      val->ssa->def = build_exp(nb, src[0]);
 +      return;
 +
 +   case GLSLstd450Log:
 +      val->ssa->def = build_log(nb, src[0]);
 +      return;
 +
 +   case GLSLstd450FClamp:
 +      val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
 +      return;
 +   case GLSLstd450UClamp:
 +      val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
 +      return;
 +   case GLSLstd450SClamp:
 +      val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
 +      return;
 +
 +   case GLSLstd450Cross: {
 +      unsigned yzx[4] = { 1, 2, 0, 0 };
 +      unsigned zxy[4] = { 2, 0, 1, 0 };
 +      val->ssa->def =
 +         nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
 +                                   nir_swizzle(nb, src[1], zxy, 3, true)),
 +                      nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
 +                                   nir_swizzle(nb, src[1], yzx, 3, true)));
 +      return;
 +   }
 +
 +   case GLSLstd450SmoothStep: {
 +      /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
 +      nir_ssa_def *t =
 +         build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
 +                                       nir_fsub(nb, src[1], src[0])),
 +                          nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
 +      /* result = t * t * (3 - 2 * t) */
 +      val->ssa->def =
 +         nir_fmul(nb, t, nir_fmul(nb, t,
 +            nir_fsub(nb, nir_imm_float(nb, 3.0),
 +                         nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
 +      return;
 +   }
 +
 +   case GLSLstd450FaceForward:
 +      val->ssa->def =
 +         nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
 +                                   nir_imm_float(nb, 0.0)),
 +                       src[0], nir_fneg(nb, src[0]));
 +      return;
 +
 +   case GLSLstd450Reflect:
 +      /* I - 2 * dot(N, I) * N */
 +      val->ssa->def =
 +         nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
 +                              nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
 +                                           src[1])));
 +      return;
 +
 +   case GLSLstd450Refract: {
 +      nir_ssa_def *I = src[0];
 +      nir_ssa_def *N = src[1];
 +      nir_ssa_def *eta = src[2];
 +      nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
 +      nir_ssa_def *one = nir_imm_float(nb, 1.0);
 +      nir_ssa_def *zero = nir_imm_float(nb, 0.0);
 +      /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
 +      nir_ssa_def *k =
 +         nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
 +                      nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
 +      nir_ssa_def *result =
 +         nir_fsub(nb, nir_fmul(nb, eta, I),
 +                      nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
 +                                                nir_fsqrt(nb, k)), N));
 +      /* XXX: bcsel, or if statement? */
 +      val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
 +      return;
 +   }
 +
 +   case GLSLstd450Sinh:
 +      /* 0.5 * (e^x - e^(-x)) */
 +      val->ssa->def =
 +         nir_fmul(nb, nir_imm_float(nb, 0.5f),
 +                      nir_fsub(nb, build_exp(nb, src[0]),
 +                                   build_exp(nb, nir_fneg(nb, src[0]))));
 +      return;
 +
 +   case GLSLstd450Cosh:
 +      /* 0.5 * (e^x + e^(-x)) */
 +      val->ssa->def =
 +         nir_fmul(nb, nir_imm_float(nb, 0.5f),
 +                      nir_fadd(nb, build_exp(nb, src[0]),
 +                                   build_exp(nb, nir_fneg(nb, src[0]))));
 +      return;
 +
 +   case GLSLstd450Tanh:
 +      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
 +      val->ssa->def =
 +         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
 +                                   nir_fsub(nb, build_exp(nb, src[0]),
 +                                                build_exp(nb, nir_fneg(nb, src[0])))),
 +                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
 +                                   nir_fadd(nb, build_exp(nb, src[0]),
 +                                                build_exp(nb, nir_fneg(nb, src[0])))));
 +      return;
 +
 +   case GLSLstd450Asinh:
 +      val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
 +         build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
 +                       nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
 +                                                  nir_imm_float(nb, 1.0f))))));
 +      return;
 +   case GLSLstd450Acosh:
 +      val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
 +         nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
 +                                    nir_imm_float(nb, 1.0f)))));
 +      return;
 +   case GLSLstd450Atanh: {
 +      nir_ssa_def *one = nir_imm_float(nb, 1.0);
 +      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
 +         build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
 +                                    nir_fsub(nb, one, src[0]))));
 +      return;
 +   }
 +
 +   case GLSLstd450FindILsb:   op = nir_op_find_lsb;   break;
 +   case GLSLstd450FindSMsb:   op = nir_op_ifind_msb;  break;
 +   case GLSLstd450FindUMsb:   op = nir_op_ufind_msb;  break;
 +
 +   case GLSLstd450Asin:
 +      val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
 +      return;
 +
 +   case GLSLstd450Acos:
 +      val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
 +                               build_asin(nb, src[0], 0.08132463, -0.02363318));
 +      return;
 +
 +   case GLSLstd450Atan:
 +      val->ssa->def = build_atan(nb, src[0]);
 +      return;
 +
 +   case GLSLstd450Atan2:
 +      val->ssa->def = build_atan2(nb, src[0], src[1]);
 +      return;
 +
 +   case GLSLstd450Frexp: {
 +      nir_ssa_def *exponent;
 +      val->ssa->def = build_frexp(nb, src[0], &exponent);
 +      nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
 +      return;
 +   }
 +
 +   case GLSLstd450FrexpStruct: {
 +      assert(glsl_type_is_struct(val->ssa->type));
 +      val->ssa->elems[0]->def = build_frexp(nb, src[0],
 +                                            &val->ssa->elems[1]->def);
 +      return;
 +   }
 +
 +   case GLSLstd450PackDouble2x32:
 +   case GLSLstd450UnpackDouble2x32:
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +
 +   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
 +   nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
++                     glsl_get_vector_elements(val->ssa->type),
++                     glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
++                     val->name);
 +   instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
 +   val->ssa->def = &instr->dest.dest.ssa;
 +
 +   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
 +      instr->src[i].src = nir_src_for_ssa(src[i]);
 +
 +   nir_builder_instr_insert(nb, &instr->instr);
 +}
 +
 +bool
 +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
 +                               const uint32_t *w, unsigned count)
 +{
 +   switch ((enum GLSLstd450)ext_opcode) {
 +   case GLSLstd450Determinant: {
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      val->ssa = rzalloc(b, struct vtn_ssa_value);
 +      val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
 +      val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
 +      break;
 +   }
 +
 +   case GLSLstd450MatrixInverse: {
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
 +      break;
 +   }
 +
 +   case GLSLstd450InterpolateAtCentroid:
 +   case GLSLstd450InterpolateAtSample:
 +   case GLSLstd450InterpolateAtOffset:
 +      unreachable("Unhandled opcode");
 +
 +   default:
 +      handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
 +   }
 +
 +   return true;
 +}
index 31bf416,0000000..3cbac1e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1412 -1,0 +1,1415 @@@
-                            intrin->num_components, NULL);
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *    Jason Ekstrand (jason@jlekstrand.net)
 + *
 + */
 +
 +#include "vtn_private.h"
 +
 +static struct vtn_access_chain *
 +vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
 +                        unsigned new_ids)
 +{
 +   struct vtn_access_chain *chain;
 +
 +   unsigned new_len = old->length + new_ids;
 +   chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0]));
 +
 +   chain->var = old->var;
 +   chain->length = new_len;
 +
 +   for (unsigned i = 0; i < old->length; i++)
 +      chain->link[i] = old->link[i];
 +
 +   return chain;
 +}
 +
 +static nir_ssa_def *
 +vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
 +                       unsigned stride)
 +{
 +   assert(stride > 0);
 +   if (link.mode == vtn_access_mode_literal) {
 +      return nir_imm_int(&b->nb, link.id * stride);
 +   } else if (stride == 1) {
 +      return vtn_ssa_value(b, link.id)->def;
 +   } else {
 +      return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
 +                              nir_imm_int(&b->nb, stride));
 +   }
 +}
 +
 +static struct vtn_type *
 +vtn_access_chain_tail_type(struct vtn_builder *b,
 +                           struct vtn_access_chain *chain)
 +{
 +   struct vtn_type *type = chain->var->type;
 +   for (unsigned i = 0; i < chain->length; i++) {
 +      if (glsl_type_is_struct(type->type)) {
 +         assert(chain->link[i].mode == vtn_access_mode_literal);
 +         type = type->members[chain->link[i].id];
 +      } else {
 +         type = type->array_element;
 +      }
 +   }
 +   return type;
 +}
 +
 +/* Crawls a chain of array derefs and rewrites the types so that the
 + * lengths stay the same but the terminal type is the one given by
 + * tail_type.  This is useful for split structures.
 + */
 +static void
 +rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
 +{
 +   deref->type = type;
 +   if (deref->child) {
 +      assert(deref->child->deref_type == nir_deref_type_array);
 +      assert(glsl_type_is_array(deref->type));
 +      rewrite_deref_types(deref->child, glsl_get_array_element(type));
 +   }
 +}
 +
 +nir_deref_var *
 +vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
 +{
 +   nir_deref_var *deref_var;
 +   if (chain->var->var) {
 +      deref_var = nir_deref_var_create(b, chain->var->var);
 +   } else {
 +      assert(chain->var->members);
 +      /* Create the deref_var manually.  It will get filled out later. */
 +      deref_var = rzalloc(b, nir_deref_var);
 +      deref_var->deref.deref_type = nir_deref_type_var;
 +   }
 +
 +   struct vtn_type *deref_type = chain->var->type;
 +   nir_deref *tail = &deref_var->deref;
 +   nir_variable **members = chain->var->members;
 +
 +   for (unsigned i = 0; i < chain->length; i++) {
 +      enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
 +      switch (base_type) {
 +      case GLSL_TYPE_UINT:
 +      case GLSL_TYPE_INT:
 +      case GLSL_TYPE_FLOAT:
 +      case GLSL_TYPE_DOUBLE:
 +      case GLSL_TYPE_BOOL:
 +      case GLSL_TYPE_ARRAY: {
 +         deref_type = deref_type->array_element;
 +
 +         nir_deref_array *deref_arr = nir_deref_array_create(b);
 +         deref_arr->deref.type = deref_type->type;
 +
 +         if (chain->link[i].mode == vtn_access_mode_literal) {
 +            deref_arr->deref_array_type = nir_deref_array_type_direct;
 +            deref_arr->base_offset = chain->link[i].id;
 +         } else {
 +            assert(chain->link[i].mode == vtn_access_mode_id);
 +            deref_arr->deref_array_type = nir_deref_array_type_indirect;
 +            deref_arr->base_offset = 0;
 +            deref_arr->indirect =
 +               nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
 +         }
 +         tail->child = &deref_arr->deref;
 +         tail = tail->child;
 +         break;
 +      }
 +
 +      case GLSL_TYPE_STRUCT: {
 +         assert(chain->link[i].mode == vtn_access_mode_literal);
 +         unsigned idx = chain->link[i].id;
 +         deref_type = deref_type->members[idx];
 +         if (members) {
 +            /* This is a pre-split structure. */
 +            deref_var->var = members[idx];
 +            rewrite_deref_types(&deref_var->deref, members[idx]->type);
 +            assert(tail->type == deref_type->type);
 +            members = NULL;
 +         } else {
 +            nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
 +            deref_struct->deref.type = deref_type->type;
 +            tail->child = &deref_struct->deref;
 +            tail = tail->child;
 +         }
 +         break;
 +      }
 +      default:
 +         unreachable("Invalid type for deref");
 +      }
 +   }
 +
 +   assert(members == NULL);
 +   return deref_var;
 +}
 +
 +static void
 +_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
 +                      nir_deref *tail, struct vtn_ssa_value *inout)
 +{
 +   /* The deref tail may contain a deref to select a component of a vector (in
 +    * other words, it might not be an actual tail) so we have to save it away
 +    * here since we overwrite it later.
 +    */
 +   nir_deref *old_child = tail->child;
 +
 +   if (glsl_type_is_vector_or_scalar(tail->type)) {
 +      /* Terminate the deref chain in case there is one more link to pick
 +       * off a component of the vector.
 +       */
 +      tail->child = NULL;
 +
 +      nir_intrinsic_op op = load ? nir_intrinsic_load_var :
 +                                   nir_intrinsic_store_var;
 +
 +      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
 +      intrin->variables[0] =
 +         nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
 +      intrin->num_components = glsl_get_vector_elements(tail->type);
 +
 +      if (load) {
 +         nir_ssa_dest_init(&intrin->instr, &intrin->dest,
-    nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++                           intrin->num_components,
++                           glsl_get_bit_size(glsl_get_base_type(tail->type)),
++                           NULL);
 +         inout->def = &intrin->dest.ssa;
 +      } else {
 +         nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
 +         intrin->src[0] = nir_src_for_ssa(inout->def);
 +      }
 +
 +      nir_builder_instr_insert(&b->nb, &intrin->instr);
 +   } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
 +              glsl_type_is_matrix(tail->type)) {
 +      unsigned elems = glsl_get_length(tail->type);
 +      nir_deref_array *deref_arr = nir_deref_array_create(b);
 +      deref_arr->deref_array_type = nir_deref_array_type_direct;
 +      deref_arr->deref.type = glsl_get_array_element(tail->type);
 +      tail->child = &deref_arr->deref;
 +      for (unsigned i = 0; i < elems; i++) {
 +         deref_arr->base_offset = i;
 +         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
 +      }
 +   } else {
 +      assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
 +      unsigned elems = glsl_get_length(tail->type);
 +      nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
 +      tail->child = &deref_struct->deref;
 +      for (unsigned i = 0; i < elems; i++) {
 +         deref_struct->index = i;
 +         deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
 +         _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
 +      }
 +   }
 +
 +   tail->child = old_child;
 +}
 +
 +nir_deref_var *
 +vtn_nir_deref(struct vtn_builder *b, uint32_t id)
 +{
 +   struct vtn_access_chain *chain =
 +      vtn_value(b, id, vtn_value_type_access_chain)->access_chain;
 +
 +   return vtn_access_chain_to_deref(b, chain);
 +}
 +
 +/*
 + * Gets the NIR-level deref tail, which may have as a child an array deref
 + * selecting which component due to OpAccessChain supporting per-component
 + * indexing in SPIR-V.
 + */
 +static nir_deref *
 +get_deref_tail(nir_deref_var *deref)
 +{
 +   nir_deref *cur = &deref->deref;
 +   while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
 +      cur = cur->child;
 +
 +   return cur;
 +}
 +
 +struct vtn_ssa_value *
 +vtn_local_load(struct vtn_builder *b, nir_deref_var *src)
 +{
 +   nir_deref *src_tail = get_deref_tail(src);
 +   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
 +   _vtn_local_load_store(b, true, src, src_tail, val);
 +
 +   if (src_tail->child) {
 +      nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
 +      assert(vec_deref->deref.child == NULL);
 +      val->type = vec_deref->deref.type;
 +      if (vec_deref->deref_array_type == nir_deref_array_type_direct)
 +         val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
 +      else
 +         val->def = vtn_vector_extract_dynamic(b, val->def,
 +                                               vec_deref->indirect.ssa);
 +   }
 +
 +   return val;
 +}
 +
 +void
 +vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
 +                nir_deref_var *dest)
 +{
 +   nir_deref *dest_tail = get_deref_tail(dest);
 +
 +   if (dest_tail->child) {
 +      struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
 +      _vtn_local_load_store(b, true, dest, dest_tail, val);
 +      nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
 +      assert(deref->deref.child == NULL);
 +      if (deref->deref_array_type == nir_deref_array_type_direct)
 +         val->def = vtn_vector_insert(b, val->def, src->def,
 +                                      deref->base_offset);
 +      else
 +         val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
 +                                              deref->indirect.ssa);
 +      _vtn_local_load_store(b, false, dest, dest_tail, val);
 +   } else {
 +      _vtn_local_load_store(b, false, dest, dest_tail, src);
 +   }
 +}
 +
 +static nir_ssa_def *
 +get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
 +                          struct vtn_type **type, unsigned *chain_idx)
 +{
 +   /* Push constants have no explicit binding */
 +   if (chain->var->mode == vtn_variable_mode_push_constant) {
 +      *chain_idx = 0;
 +      *type = chain->var->type;
 +      return NULL;
 +   }
 +
 +   nir_ssa_def *array_index;
 +   if (glsl_type_is_array(chain->var->type->type)) {
 +      assert(chain->length > 0);
 +      array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
 +      *chain_idx = 1;
 +      *type = chain->var->type->array_element;
 +   } else {
 +      array_index = nir_imm_int(&b->nb, 0);
 +      *chain_idx = 0;
 +      *type = chain->var->type;
 +   }
 +
 +   nir_intrinsic_instr *instr =
 +      nir_intrinsic_instr_create(b->nb.shader,
 +                                 nir_intrinsic_vulkan_resource_index);
 +   instr->src[0] = nir_src_for_ssa(array_index);
 +   nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
 +   nir_intrinsic_set_binding(instr, chain->var->binding);
 +
-                         instr->num_components, NULL);
++   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
 +   nir_builder_instr_insert(&b->nb, &instr->instr);
 +
 +   return &instr->dest.ssa;
 +}
 +
 +nir_ssa_def *
 +vtn_access_chain_to_offset(struct vtn_builder *b,
 +                           struct vtn_access_chain *chain,
 +                           nir_ssa_def **index_out, struct vtn_type **type_out,
 +                           unsigned *end_idx_out, bool stop_at_matrix)
 +{
 +   unsigned idx = 0;
 +   struct vtn_type *type;
 +   *index_out = get_vulkan_resource_index(b, chain, &type, &idx);
 +
 +   nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
 +   for (; idx < chain->length; idx++) {
 +      enum glsl_base_type base_type = glsl_get_base_type(type->type);
 +      switch (base_type) {
 +      case GLSL_TYPE_UINT:
 +      case GLSL_TYPE_INT:
 +      case GLSL_TYPE_FLOAT:
 +      case GLSL_TYPE_DOUBLE:
 +      case GLSL_TYPE_BOOL:
 +         /* Some users may not want matrix or vector derefs */
 +         if (stop_at_matrix)
 +            goto end;
 +         /* Fall through */
 +
 +      case GLSL_TYPE_ARRAY:
 +         offset = nir_iadd(&b->nb, offset,
 +                           vtn_access_link_as_ssa(b, chain->link[idx],
 +                                                  type->stride));
 +
 +         type = type->array_element;
 +         break;
 +
 +      case GLSL_TYPE_STRUCT: {
 +         assert(chain->link[idx].mode == vtn_access_mode_literal);
 +         unsigned member = chain->link[idx].id;
 +         offset = nir_iadd(&b->nb, offset,
 +                           nir_imm_int(&b->nb, type->offsets[member]));
 +         type = type->members[member];
 +         break;
 +      }
 +
 +      default:
 +         unreachable("Invalid type for deref");
 +      }
 +   }
 +
 +end:
 +   *type_out = type;
 +   if (end_idx_out)
 +      *end_idx_out = idx;
 +
 +   return offset;
 +}
 +
 +static void
 +_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
 +                     nir_ssa_def *index, nir_ssa_def *offset,
 +                     struct vtn_ssa_value **inout, const struct glsl_type *type)
 +{
 +   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
 +   instr->num_components = glsl_get_vector_elements(type);
 +
 +   int src = 0;
 +   if (!load) {
 +      nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
 +      instr->src[src++] = nir_src_for_ssa((*inout)->def);
 +   }
 +
 +   /* We set the base and size for push constant load to the entire push
 +    * constant block for now.
 +    */
 +   if (op == nir_intrinsic_load_push_constant) {
 +      nir_intrinsic_set_base(instr, 0);
 +      nir_intrinsic_set_range(instr, 128);
 +   }
 +
 +   if (index)
 +      instr->src[src++] = nir_src_for_ssa(index);
 +
 +   instr->src[src++] = nir_src_for_ssa(offset);
 +
 +   if (load) {
 +      nir_ssa_dest_init(&instr->instr, &instr->dest,
-       nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++                        instr->num_components,
++                        glsl_get_bit_size(glsl_get_base_type(type)), NULL);
 +      (*inout)->def = &instr->dest.ssa;
 +   }
 +
 +   nir_builder_instr_insert(&b->nb, &instr->instr);
 +
 +   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
 +      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
 +}
 +
 +static void
 +_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
 +                      nir_ssa_def *index, nir_ssa_def *offset,
 +                      struct vtn_access_chain *chain, unsigned chain_idx,
 +                      struct vtn_type *type, struct vtn_ssa_value **inout)
 +{
 +   if (chain && chain_idx >= chain->length)
 +      chain = NULL;
 +
 +   if (load && chain == NULL && *inout == NULL)
 +      *inout = vtn_create_ssa_value(b, type->type);
 +
 +   enum glsl_base_type base_type = glsl_get_base_type(type->type);
 +   switch (base_type) {
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_BOOL:
 +      /* This is where things get interesting.  At this point, we've hit
 +       * a vector, a scalar, or a matrix.
 +       */
 +      if (glsl_type_is_matrix(type->type)) {
 +         if (chain == NULL) {
 +            /* Loading the whole matrix */
 +            struct vtn_ssa_value *transpose;
 +            unsigned num_ops, vec_width;
 +            if (type->row_major) {
 +               num_ops = glsl_get_vector_elements(type->type);
 +               vec_width = glsl_get_matrix_columns(type->type);
 +               if (load) {
 +                  const struct glsl_type *transpose_type =
 +                     glsl_matrix_type(base_type, vec_width, num_ops);
 +                  *inout = vtn_create_ssa_value(b, transpose_type);
 +               } else {
 +                  transpose = vtn_ssa_transpose(b, *inout);
 +                  inout = &transpose;
 +               }
 +            } else {
 +               num_ops = glsl_get_matrix_columns(type->type);
 +               vec_width = glsl_get_vector_elements(type->type);
 +            }
 +
 +            for (unsigned i = 0; i < num_ops; i++) {
 +               nir_ssa_def *elem_offset =
 +                  nir_iadd(&b->nb, offset,
 +                           nir_imm_int(&b->nb, i * type->stride));
 +               _vtn_load_store_tail(b, op, load, index, elem_offset,
 +                                    &(*inout)->elems[i],
 +                                    glsl_vector_type(base_type, vec_width));
 +            }
 +
 +            if (load && type->row_major)
 +               *inout = vtn_ssa_transpose(b, *inout);
 +         } else if (type->row_major) {
 +            /* Row-major but with an access chiain. */
 +            nir_ssa_def *col_offset =
 +               vtn_access_link_as_ssa(b, chain->link[chain_idx],
 +                                      type->array_element->stride);
 +            offset = nir_iadd(&b->nb, offset, col_offset);
 +
 +            if (chain_idx + 1 < chain->length) {
 +               /* Picking off a single element */
 +               nir_ssa_def *row_offset =
 +                  vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
 +                                         type->stride);
 +               offset = nir_iadd(&b->nb, offset, row_offset);
 +               if (load)
 +                  *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
 +               _vtn_load_store_tail(b, op, load, index, offset, inout,
 +                                    glsl_scalar_type(base_type));
 +            } else {
 +               /* Grabbing a column; picking one element off each row */
 +               unsigned num_comps = glsl_get_vector_elements(type->type);
 +               const struct glsl_type *column_type =
 +                  glsl_get_column_type(type->type);
 +
 +               nir_ssa_def *comps[4];
 +               for (unsigned i = 0; i < num_comps; i++) {
 +                  nir_ssa_def *elem_offset =
 +                     nir_iadd(&b->nb, offset,
 +                              nir_imm_int(&b->nb, i * type->stride));
 +
 +                  struct vtn_ssa_value *comp, temp_val;
 +                  if (!load) {
 +                     temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
 +                     temp_val.type = glsl_scalar_type(base_type);
 +                  }
 +                  comp = &temp_val;
 +                  _vtn_load_store_tail(b, op, load, index, elem_offset,
 +                                       &comp, glsl_scalar_type(base_type));
 +                  comps[i] = comp->def;
 +               }
 +
 +               if (load) {
 +                  if (*inout == NULL)
 +                     *inout = vtn_create_ssa_value(b, column_type);
 +
 +                  (*inout)->def = nir_vec(&b->nb, comps, num_comps);
 +               }
 +            }
 +         } else {
 +            /* Column-major with a deref. Fall through to array case. */
 +            nir_ssa_def *col_offset =
 +               vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
 +            offset = nir_iadd(&b->nb, offset, col_offset);
 +
 +            _vtn_block_load_store(b, op, load, index, offset,
 +                                  chain, chain_idx + 1,
 +                                  type->array_element, inout);
 +         }
 +      } else if (chain == NULL) {
 +         /* Single whole vector */
 +         assert(glsl_type_is_vector_or_scalar(type->type));
 +         _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
 +      } else {
 +         /* Single component of a vector. Fall through to array case. */
 +         nir_ssa_def *elem_offset =
 +            vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
 +         offset = nir_iadd(&b->nb, offset, elem_offset);
 +
 +         _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
 +                               type->array_element, inout);
 +      }
 +      return;
 +
 +   case GLSL_TYPE_ARRAY: {
 +      unsigned elems = glsl_get_length(type->type);
 +      for (unsigned i = 0; i < elems; i++) {
 +         nir_ssa_def *elem_off =
 +            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
 +         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
 +                               type->array_element, &(*inout)->elems[i]);
 +      }
 +      return;
 +   }
 +
 +   case GLSL_TYPE_STRUCT: {
 +      unsigned elems = glsl_get_length(type->type);
 +      for (unsigned i = 0; i < elems; i++) {
 +         nir_ssa_def *elem_off =
 +            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
 +         _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
 +                               type->members[i], &(*inout)->elems[i]);
 +      }
 +      return;
 +   }
 +
 +   default:
 +      unreachable("Invalid block member type");
 +   }
 +}
 +
 +static struct vtn_ssa_value *
 +vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src)
 +{
 +   nir_intrinsic_op op;
 +   switch (src->var->mode) {
 +   case vtn_variable_mode_ubo:
 +      op = nir_intrinsic_load_ubo;
 +      break;
 +   case vtn_variable_mode_ssbo:
 +      op = nir_intrinsic_load_ssbo;
 +      break;
 +   case vtn_variable_mode_push_constant:
 +      op = nir_intrinsic_load_push_constant;
 +      break;
 +   default:
 +      assert(!"Invalid block variable mode");
 +   }
 +
 +   nir_ssa_def *offset, *index = NULL;
 +   struct vtn_type *type;
 +   unsigned chain_idx;
 +   offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true);
 +
 +   struct vtn_ssa_value *value = NULL;
 +   _vtn_block_load_store(b, op, true, index, offset,
 +                         src, chain_idx, type, &value);
 +   return value;
 +}
 +
 +static void
 +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
 +                struct vtn_access_chain *dst)
 +{
 +   nir_ssa_def *offset, *index = NULL;
 +   struct vtn_type *type;
 +   unsigned chain_idx;
 +   offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true);
 +
 +   _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
 +                         dst, chain_idx, type, &src);
 +}
 +
 +static bool
 +vtn_variable_is_external_block(struct vtn_variable *var)
 +{
 +   return var->mode == vtn_variable_mode_ssbo ||
 +          var->mode == vtn_variable_mode_ubo ||
 +          var->mode == vtn_variable_mode_push_constant;
 +}
 +
 +static void
 +_vtn_variable_load_store(struct vtn_builder *b, bool load,
 +                         struct vtn_access_chain *chain,
 +                         struct vtn_type *tail_type,
 +                         struct vtn_ssa_value **inout)
 +{
 +   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
 +   switch (base_type) {
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_BOOL:
 +      /* At this point, we have a scalar, vector, or matrix so we know that
 +       * there cannot be any structure splitting still in the way.  By
 +       * stopping at the matrix level rather than the vector level, we
 +       * ensure that matrices get loaded in the optimal way even if they
 +       * are storred row-major in a UBO.
 +       */
 +      if (load) {
 +         *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
 +      } else {
 +         vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
 +      }
 +      return;
 +
 +   case GLSL_TYPE_ARRAY:
 +   case GLSL_TYPE_STRUCT: {
 +      struct vtn_access_chain *new_chain =
 +         vtn_access_chain_extend(b, chain, 1);
 +      new_chain->link[chain->length].mode = vtn_access_mode_literal;
 +      unsigned elems = glsl_get_length(tail_type->type);
 +      if (load) {
 +         assert(*inout == NULL);
 +         *inout = rzalloc(b, struct vtn_ssa_value);
 +         (*inout)->type = tail_type->type;
 +         (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
 +      }
 +      for (unsigned i = 0; i < elems; i++) {
 +         new_chain->link[chain->length].id = i;
 +         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
 +            tail_type->array_element : tail_type->members[i];
 +         _vtn_variable_load_store(b, load, new_chain, elem_type,
 +                                  &(*inout)->elems[i]);
 +      }
 +      return;
 +   }
 +
 +   default:
 +      unreachable("Invalid access chain type");
 +   }
 +}
 +
 +struct vtn_ssa_value *
 +vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src)
 +{
 +   if (vtn_variable_is_external_block(src->var)) {
 +      return vtn_block_load(b, src);
 +   } else {
 +      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
 +      struct vtn_ssa_value *val = NULL;
 +      _vtn_variable_load_store(b, true, src, tail_type, &val);
 +      return val;
 +   }
 +}
 +
 +void
 +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
 +                   struct vtn_access_chain *dest)
 +{
 +   if (vtn_variable_is_external_block(dest->var)) {
 +      assert(dest->var->mode == vtn_variable_mode_ssbo);
 +      vtn_block_store(b, src, dest);
 +   } else {
 +      struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest);
 +      _vtn_variable_load_store(b, false, dest, tail_type, &src);
 +   }
 +}
 +
 +static void
 +_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
 +                   struct vtn_access_chain *src, struct vtn_type *tail_type)
 +{
 +   enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
 +   switch (base_type) {
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_BOOL:
 +      /* At this point, we have a scalar, vector, or matrix so we know that
 +       * there cannot be any structure splitting still in the way.  By
 +       * stopping at the matrix level rather than the vector level, we
 +       * ensure that matrices get loaded in the optimal way even if they
 +       * are storred row-major in a UBO.
 +       */
 +      vtn_variable_store(b, vtn_variable_load(b, src), dest);
 +      return;
 +
 +   case GLSL_TYPE_ARRAY:
 +   case GLSL_TYPE_STRUCT: {
 +      struct vtn_access_chain *new_src, *new_dest;
 +      new_src = vtn_access_chain_extend(b, src, 1);
 +      new_dest = vtn_access_chain_extend(b, dest, 1);
 +      new_src->link[src->length].mode = vtn_access_mode_literal;
 +      new_dest->link[dest->length].mode = vtn_access_mode_literal;
 +      unsigned elems = glsl_get_length(tail_type->type);
 +      for (unsigned i = 0; i < elems; i++) {
 +         new_src->link[src->length].id = i;
 +         new_dest->link[dest->length].id = i;
 +         struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
 +            tail_type->array_element : tail_type->members[i];
 +         _vtn_variable_copy(b, new_dest, new_src, elem_type);
 +      }
 +      return;
 +   }
 +
 +   default:
 +      unreachable("Invalid access chain type");
 +   }
 +}
 +
 +static void
 +vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
 +                  struct vtn_access_chain *src)
 +{
 +   struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
 +   assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type);
 +
 +   /* TODO: At some point, we should add a special-case for when we can
 +    * just emit a copy_var intrinsic.
 +    */
 +   _vtn_variable_copy(b, dest, src, tail_type);
 +}
 +
 +static void
 +set_mode_system_value(nir_variable_mode *mode)
 +{
 +   assert(*mode == nir_var_system_value || *mode == nir_var_shader_in);
 +   *mode = nir_var_system_value;
 +}
 +
 +static void
 +vtn_get_builtin_location(struct vtn_builder *b,
 +                         SpvBuiltIn builtin, int *location,
 +                         nir_variable_mode *mode)
 +{
 +   switch (builtin) {
 +   case SpvBuiltInPosition:
 +      *location = VARYING_SLOT_POS;
 +      break;
 +   case SpvBuiltInPointSize:
 +      *location = VARYING_SLOT_PSIZ;
 +      break;
 +   case SpvBuiltInClipDistance:
 +      *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */
 +      break;
 +   case SpvBuiltInCullDistance:
 +      /* XXX figure this out */
 +      break;
 +   case SpvBuiltInVertexIndex:
 +      *location = SYSTEM_VALUE_VERTEX_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInVertexId:
 +      /* Vulkan defines VertexID to be zero-based and reserves the new
 +       * builtin keyword VertexIndex to indicate the non-zero-based value.
 +       */
 +      *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInInstanceIndex:
 +      *location = SYSTEM_VALUE_INSTANCE_INDEX;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInInstanceId:
 +      *location = SYSTEM_VALUE_INSTANCE_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInPrimitiveId:
 +      *location = VARYING_SLOT_PRIMITIVE_ID;
 +      *mode = nir_var_shader_out;
 +      break;
 +   case SpvBuiltInInvocationId:
 +      *location = SYSTEM_VALUE_INVOCATION_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInLayer:
 +      *location = VARYING_SLOT_LAYER;
 +      *mode = nir_var_shader_out;
 +      break;
 +   case SpvBuiltInViewportIndex:
 +      *location = VARYING_SLOT_VIEWPORT;
 +      if (b->shader->stage == MESA_SHADER_GEOMETRY)
 +         *mode = nir_var_shader_out;
 +      else if (b->shader->stage == MESA_SHADER_FRAGMENT)
 +         *mode = nir_var_shader_in;
 +      else
 +         unreachable("invalid stage for SpvBuiltInViewportIndex");
 +      break;
 +   case SpvBuiltInTessLevelOuter:
 +   case SpvBuiltInTessLevelInner:
 +   case SpvBuiltInTessCoord:
 +   case SpvBuiltInPatchVertices:
 +      unreachable("no tessellation support");
 +   case SpvBuiltInFragCoord:
 +      *location = VARYING_SLOT_POS;
 +      assert(*mode == nir_var_shader_in);
 +      break;
 +   case SpvBuiltInPointCoord:
 +      *location = VARYING_SLOT_PNTC;
 +      assert(*mode == nir_var_shader_in);
 +      break;
 +   case SpvBuiltInFrontFacing:
 +      *location = VARYING_SLOT_FACE;
 +      assert(*mode == nir_var_shader_in);
 +      break;
 +   case SpvBuiltInSampleId:
 +      *location = SYSTEM_VALUE_SAMPLE_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInSamplePosition:
 +      *location = SYSTEM_VALUE_SAMPLE_POS;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInSampleMask:
 +      *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInFragDepth:
 +      *location = FRAG_RESULT_DEPTH;
 +      assert(*mode == nir_var_shader_out);
 +      break;
 +   case SpvBuiltInNumWorkgroups:
 +      *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInWorkgroupSize:
 +      /* This should already be handled */
 +      unreachable("unsupported builtin");
 +      break;
 +   case SpvBuiltInWorkgroupId:
 +      *location = SYSTEM_VALUE_WORK_GROUP_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInLocalInvocationId:
 +      *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInLocalInvocationIndex:
 +      *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInGlobalInvocationId:
 +      *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
 +      set_mode_system_value(mode);
 +      break;
 +   case SpvBuiltInHelperInvocation:
 +   default:
 +      unreachable("unsupported builtin");
 +   }
 +}
 +
 +static void
 +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
 +                  const struct vtn_decoration *dec, void *void_var)
 +{
 +   struct vtn_variable *vtn_var = void_var;
 +
 +   /* Handle decorations that apply to a vtn_variable as a whole */
 +   switch (dec->decoration) {
 +   case SpvDecorationBinding:
 +      vtn_var->binding = dec->literals[0];
 +      return;
 +   case SpvDecorationDescriptorSet:
 +      vtn_var->descriptor_set = dec->literals[0];
 +      return;
 +
 +   case SpvDecorationLocation: {
 +      unsigned location = dec->literals[0];
 +      bool is_vertex_input;
 +      if (b->shader->stage == MESA_SHADER_FRAGMENT &&
 +          vtn_var->mode == vtn_variable_mode_output) {
 +         is_vertex_input = false;
 +         location += FRAG_RESULT_DATA0;
 +      } else if (b->shader->stage == MESA_SHADER_VERTEX &&
 +                 vtn_var->mode == vtn_variable_mode_input) {
 +         is_vertex_input = true;
 +         location += VERT_ATTRIB_GENERIC0;
 +      } else if (vtn_var->mode == vtn_variable_mode_input ||
 +                 vtn_var->mode == vtn_variable_mode_output) {
 +         is_vertex_input = false;
 +         location += VARYING_SLOT_VAR0;
 +      } else {
 +         assert(!"Location must be on input or output variable");
 +      }
 +
 +      if (vtn_var->var) {
 +         vtn_var->var->data.location = location;
 +         vtn_var->var->data.explicit_location = true;
 +      } else {
 +         assert(vtn_var->members);
 +         unsigned length = glsl_get_length(vtn_var->type->type);
 +         for (unsigned i = 0; i < length; i++) {
 +            vtn_var->members[i]->data.location = location;
 +            vtn_var->members[i]->data.explicit_location = true;
 +            location +=
 +               glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
 +                                          is_vertex_input);
 +         }
 +      }
 +      return;
 +   }
 +
 +   default:
 +      break;
 +   }
 +
 +   /* Now we handle decorations that apply to a particular nir_variable */
 +   nir_variable *nir_var = vtn_var->var;
 +   if (val->value_type == vtn_value_type_access_chain) {
 +      assert(val->access_chain->length == 0);
 +      assert(val->access_chain->var == void_var);
 +      assert(member == -1);
 +   } else {
 +      assert(val->value_type == vtn_value_type_type);
 +      if (member != -1)
 +         nir_var = vtn_var->members[member];
 +   }
 +
 +   if (nir_var == NULL)
 +      return;
 +
 +   switch (dec->decoration) {
 +   case SpvDecorationRelaxedPrecision:
 +      break; /* FIXME: Do nothing with this for now. */
 +   case SpvDecorationNoPerspective:
 +      nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
 +      break;
 +   case SpvDecorationFlat:
 +      nir_var->data.interpolation = INTERP_QUALIFIER_FLAT;
 +      break;
 +   case SpvDecorationCentroid:
 +      nir_var->data.centroid = true;
 +      break;
 +   case SpvDecorationSample:
 +      nir_var->data.sample = true;
 +      break;
 +   case SpvDecorationInvariant:
 +      nir_var->data.invariant = true;
 +      break;
 +   case SpvDecorationConstant:
 +      assert(nir_var->constant_initializer != NULL);
 +      nir_var->data.read_only = true;
 +      break;
 +   case SpvDecorationNonWritable:
 +      nir_var->data.read_only = true;
 +      break;
 +   case SpvDecorationComponent:
 +      nir_var->data.location_frac = dec->literals[0];
 +      break;
 +   case SpvDecorationIndex:
 +      nir_var->data.explicit_index = true;
 +      nir_var->data.index = dec->literals[0];
 +      break;
 +   case SpvDecorationBuiltIn: {
 +      SpvBuiltIn builtin = dec->literals[0];
 +
 +      if (builtin == SpvBuiltInWorkgroupSize) {
 +         /* This shouldn't be a builtin.  It's actually a constant. */
 +         nir_var->data.mode = nir_var_global;
 +         nir_var->data.read_only = true;
 +
 +         nir_constant *c = rzalloc(nir_var, nir_constant);
 +         c->value.u[0] = b->shader->info.cs.local_size[0];
 +         c->value.u[1] = b->shader->info.cs.local_size[1];
 +         c->value.u[2] = b->shader->info.cs.local_size[2];
 +         nir_var->constant_initializer = c;
 +         break;
 +      }
 +
 +      nir_variable_mode mode = nir_var->data.mode;
 +      vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode);
 +      nir_var->data.explicit_location = true;
 +      nir_var->data.mode = mode;
 +
 +      if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition)
 +         nir_var->data.origin_upper_left = b->origin_upper_left;
 +      break;
 +   }
 +   case SpvDecorationRowMajor:
 +   case SpvDecorationColMajor:
 +   case SpvDecorationGLSLShared:
 +   case SpvDecorationPatch:
 +   case SpvDecorationRestrict:
 +   case SpvDecorationAliased:
 +   case SpvDecorationVolatile:
 +   case SpvDecorationCoherent:
 +   case SpvDecorationNonReadable:
 +   case SpvDecorationUniform:
 +      /* This is really nice but we have no use for it right now. */
 +   case SpvDecorationCPacked:
 +   case SpvDecorationSaturatedConversion:
 +   case SpvDecorationStream:
 +   case SpvDecorationOffset:
 +   case SpvDecorationXfbBuffer:
 +   case SpvDecorationFuncParamAttr:
 +   case SpvDecorationFPRoundingMode:
 +   case SpvDecorationFPFastMathMode:
 +   case SpvDecorationLinkageAttributes:
 +   case SpvDecorationSpecId:
 +      break;
 +   default:
 +      unreachable("Unhandled variable decoration");
 +   }
 +}
 +
 +/* Tries to compute the size of an interface block based on the strides and
 + * offsets that are provided to us in the SPIR-V source.
 + */
 +static unsigned
 +vtn_type_block_size(struct vtn_type *type)
 +{
 +   enum glsl_base_type base_type = glsl_get_base_type(type->type);
 +   switch (base_type) {
 +   case GLSL_TYPE_UINT:
 +   case GLSL_TYPE_INT:
 +   case GLSL_TYPE_FLOAT:
 +   case GLSL_TYPE_BOOL:
 +   case GLSL_TYPE_DOUBLE: {
 +      unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
 +                                        glsl_get_matrix_columns(type->type);
 +      if (cols > 1) {
 +         assert(type->stride > 0);
 +         return type->stride * cols;
 +      } else if (base_type == GLSL_TYPE_DOUBLE) {
 +         return glsl_get_vector_elements(type->type) * 8;
 +      } else {
 +         return glsl_get_vector_elements(type->type) * 4;
 +      }
 +   }
 +
 +   case GLSL_TYPE_STRUCT:
 +   case GLSL_TYPE_INTERFACE: {
 +      unsigned size = 0;
 +      unsigned num_fields = glsl_get_length(type->type);
 +      for (unsigned f = 0; f < num_fields; f++) {
 +         unsigned field_end = type->offsets[f] +
 +                              vtn_type_block_size(type->members[f]);
 +         size = MAX2(size, field_end);
 +      }
 +      return size;
 +   }
 +
 +   case GLSL_TYPE_ARRAY:
 +      assert(type->stride > 0);
 +      assert(glsl_get_length(type->type) > 0);
 +      return type->stride * glsl_get_length(type->type);
 +
 +   default:
 +      assert(!"Invalid block type");
 +      return 0;
 +   }
 +}
 +
 +void
 +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 +                     const uint32_t *w, unsigned count)
 +{
 +   switch (opcode) {
 +   case SpvOpVariable: {
 +      struct vtn_variable *var = rzalloc(b, struct vtn_variable);
 +      var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
 +
 +      var->chain.var = var;
 +      var->chain.length = 0;
 +
 +      struct vtn_value *val =
 +         vtn_push_value(b, w[2], vtn_value_type_access_chain);
 +      val->access_chain = &var->chain;
 +
 +      struct vtn_type *without_array = var->type;
 +      while(glsl_type_is_array(without_array->type))
 +         without_array = without_array->array_element;
 +
 +      nir_variable_mode nir_mode;
 +      switch ((SpvStorageClass)w[3]) {
 +      case SpvStorageClassUniform:
 +      case SpvStorageClassUniformConstant:
 +         if (without_array->block) {
 +            var->mode = vtn_variable_mode_ubo;
 +            b->shader->info.num_ubos++;
 +         } else if (without_array->buffer_block) {
 +            var->mode = vtn_variable_mode_ssbo;
 +            b->shader->info.num_ssbos++;
 +         } else if (glsl_type_is_image(without_array->type)) {
 +            var->mode = vtn_variable_mode_image;
 +            nir_mode = nir_var_uniform;
 +            b->shader->info.num_images++;
 +         } else if (glsl_type_is_sampler(without_array->type)) {
 +            var->mode = vtn_variable_mode_sampler;
 +            nir_mode = nir_var_uniform;
 +            b->shader->info.num_textures++;
 +         } else {
 +            assert(!"Invalid uniform variable type");
 +         }
 +         break;
 +      case SpvStorageClassPushConstant:
 +         var->mode = vtn_variable_mode_push_constant;
 +         assert(b->shader->num_uniforms == 0);
 +         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
 +         break;
 +      case SpvStorageClassInput:
 +         var->mode = vtn_variable_mode_input;
 +         nir_mode = nir_var_shader_in;
 +         break;
 +      case SpvStorageClassOutput:
 +         var->mode = vtn_variable_mode_output;
 +         nir_mode = nir_var_shader_out;
 +         break;
 +      case SpvStorageClassPrivate:
 +         var->mode = vtn_variable_mode_global;
 +         nir_mode = nir_var_global;
 +         break;
 +      case SpvStorageClassFunction:
 +         var->mode = vtn_variable_mode_local;
 +         nir_mode = nir_var_local;
 +         break;
 +      case SpvStorageClassWorkgroup:
 +         var->mode = vtn_variable_mode_workgroup;
 +         nir_mode = nir_var_shared;
 +         break;
 +      case SpvStorageClassCrossWorkgroup:
 +      case SpvStorageClassGeneric:
 +      case SpvStorageClassAtomicCounter:
 +      default:
 +         unreachable("Unhandled variable storage class");
 +      }
 +
 +      switch (var->mode) {
 +      case vtn_variable_mode_local:
 +      case vtn_variable_mode_global:
 +      case vtn_variable_mode_image:
 +      case vtn_variable_mode_sampler:
 +      case vtn_variable_mode_workgroup:
 +         /* For these, we create the variable normally */
 +         var->var = rzalloc(b->shader, nir_variable);
 +         var->var->name = ralloc_strdup(var->var, val->name);
 +         var->var->type = var->type->type;
 +         var->var->data.mode = nir_mode;
 +
 +         switch (var->mode) {
 +         case vtn_variable_mode_image:
 +         case vtn_variable_mode_sampler:
 +            var->var->interface_type = without_array->type;
 +            break;
 +         default:
 +            var->var->interface_type = NULL;
 +            break;
 +         }
 +         break;
 +
 +      case vtn_variable_mode_input:
 +      case vtn_variable_mode_output: {
 +         /* For inputs and outputs, we immediately split structures.  This
 +          * is for a couple of reasons.  For one, builtins may all come in
 +          * a struct and we really want those split out into separate
 +          * variables.  For another, interpolation qualifiers can be
 +          * applied to members of the top-level struct ane we need to be
 +          * able to preserve that information.
 +          */
 +
 +         int array_length = -1;
 +         struct vtn_type *interface_type = var->type;
 +         if (b->shader->stage == MESA_SHADER_GEOMETRY &&
 +             glsl_type_is_array(var->type->type)) {
 +            /* In Geometry shaders (and some tessellation), inputs come
 +             * in per-vertex arrays.  However, some builtins come in
 +             * non-per-vertex, hence the need for the is_array check.  In
 +             * any case, there are no non-builtin arrays allowed so this
 +             * check should be sufficient.
 +             */
 +            interface_type = var->type->array_element;
 +            array_length = glsl_get_length(var->type->type);
 +         }
 +
 +         if (glsl_type_is_struct(interface_type->type)) {
 +            /* It's a struct.  Split it. */
 +            unsigned num_members = glsl_get_length(interface_type->type);
 +            var->members = ralloc_array(b, nir_variable *, num_members);
 +
 +            for (unsigned i = 0; i < num_members; i++) {
 +               const struct glsl_type *mtype = interface_type->members[i]->type;
 +               if (array_length >= 0)
 +                  mtype = glsl_array_type(mtype, array_length);
 +
 +               var->members[i] = rzalloc(b->shader, nir_variable);
 +               var->members[i]->name =
 +                  ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
 +               var->members[i]->type = mtype;
 +               var->members[i]->interface_type =
 +                  interface_type->members[i]->type;
 +               var->members[i]->data.mode = nir_mode;
 +            }
 +         } else {
 +            var->var = rzalloc(b->shader, nir_variable);
 +            var->var->name = ralloc_strdup(var->var, val->name);
 +            var->var->type = var->type->type;
 +            var->var->interface_type = interface_type->type;
 +            var->var->data.mode = nir_mode;
 +         }
 +
 +         /* For inputs and outputs, we need to grab locations and builtin
 +          * information from the interface type.
 +          */
 +         vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
 +         break;
 +
 +      case vtn_variable_mode_param:
 +         unreachable("Not created through OpVariable");
 +      }
 +
 +      case vtn_variable_mode_ubo:
 +      case vtn_variable_mode_ssbo:
 +      case vtn_variable_mode_push_constant:
 +         /* These don't need actual variables. */
 +         break;
 +      }
 +
 +      if (count > 4) {
 +         assert(count == 5);
 +         nir_constant *constant =
 +            vtn_value(b, w[4], vtn_value_type_constant)->constant;
 +         var->var->constant_initializer =
 +            nir_constant_clone(constant, var->var);
 +      }
 +
 +      vtn_foreach_decoration(b, val, var_decoration_cb, var);
 +
 +      if (var->mode == vtn_variable_mode_image ||
 +          var->mode == vtn_variable_mode_sampler) {
 +         /* XXX: We still need the binding information in the nir_variable
 +          * for these. We should fix that.
 +          */
 +         var->var->data.binding = var->binding;
 +         var->var->data.descriptor_set = var->descriptor_set;
 +
 +         if (var->mode == vtn_variable_mode_image)
 +            var->var->data.image.format = without_array->image_format;
 +      }
 +
 +      if (var->mode == vtn_variable_mode_local) {
 +         assert(var->members == NULL && var->var != NULL);
 +         nir_function_impl_add_variable(b->impl, var->var);
 +      } else if (var->var) {
 +         nir_shader_add_variable(b->shader, var->var);
 +      } else if (var->members) {
 +         unsigned count = glsl_get_length(without_array->type);
 +         for (unsigned i = 0; i < count; i++) {
 +            assert(var->members[i]->data.mode != nir_var_local);
 +            nir_shader_add_variable(b->shader, var->members[i]);
 +         }
 +      } else {
 +         assert(var->mode == vtn_variable_mode_ubo ||
 +                var->mode == vtn_variable_mode_ssbo ||
 +                var->mode == vtn_variable_mode_push_constant);
 +      }
 +      break;
 +   }
 +
 +   case SpvOpAccessChain:
 +   case SpvOpInBoundsAccessChain: {
 +      struct vtn_access_chain *base, *chain;
 +      struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
 +      if (base_val->value_type == vtn_value_type_sampled_image) {
 +         /* This is rather insane.  SPIR-V allows you to use OpSampledImage
 +          * to combine an array of images with a single sampler to get an
 +          * array of sampled images that all share the same sampler.
 +          * Fortunately, this means that we can more-or-less ignore the
 +          * sampler when crawling the access chain, but it does leave us
 +          * with this rather awkward little special-case.
 +          */
 +         base = base_val->sampled_image->image;
 +      } else {
 +         assert(base_val->value_type == vtn_value_type_access_chain);
 +         base = base_val->access_chain;
 +      }
 +
 +      chain = vtn_access_chain_extend(b, base, count - 4);
 +
 +      unsigned idx = base->length;
 +      for (int i = 4; i < count; i++) {
 +         struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
 +         if (link_val->value_type == vtn_value_type_constant) {
 +            chain->link[idx].mode = vtn_access_mode_literal;
 +            chain->link[idx].id = link_val->constant->value.u[0];
 +         } else {
 +            chain->link[idx].mode = vtn_access_mode_id;
 +            chain->link[idx].id = w[i];
 +         }
 +         idx++;
 +      }
 +
 +      if (base_val->value_type == vtn_value_type_sampled_image) {
 +         struct vtn_value *val =
 +            vtn_push_value(b, w[2], vtn_value_type_sampled_image);
 +         val->sampled_image = ralloc(b, struct vtn_sampled_image);
 +         val->sampled_image->image = chain;
 +         val->sampled_image->sampler = base_val->sampled_image->sampler;
 +      } else {
 +         struct vtn_value *val =
 +            vtn_push_value(b, w[2], vtn_value_type_access_chain);
 +         val->access_chain = chain;
 +      }
 +      break;
 +   }
 +
 +   case SpvOpCopyMemory: {
 +      struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain);
 +      struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain);
 +
 +      vtn_variable_copy(b, dest->access_chain, src->access_chain);
 +      break;
 +   }
 +
 +   case SpvOpLoad: {
 +      struct vtn_access_chain *src =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +
 +      if (src->var->mode == vtn_variable_mode_image ||
 +          src->var->mode == vtn_variable_mode_sampler) {
 +         vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src;
 +         return;
 +      }
 +
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      val->ssa = vtn_variable_load(b, src);
 +      break;
 +   }
 +
 +   case SpvOpStore: {
 +      struct vtn_access_chain *dest =
 +         vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
 +      struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
 +      vtn_variable_store(b, src, dest);
 +      break;
 +   }
 +
 +   case SpvOpArrayLength: {
 +      struct vtn_access_chain *chain =
 +         vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
 +
 +      const uint32_t offset = chain->var->type->offsets[w[4]];
 +      const uint32_t stride = chain->var->type->members[w[4]]->stride;
 +
 +      unsigned chain_idx;
 +      struct vtn_type *type;
 +      nir_ssa_def *index =
 +         get_vulkan_resource_index(b, chain, &type, &chain_idx);
 +
 +      nir_intrinsic_instr *instr =
 +         nir_intrinsic_instr_create(b->nb.shader,
 +                                    nir_intrinsic_get_buffer_size);
 +      instr->src[0] = nir_src_for_ssa(index);
++      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
 +      nir_builder_instr_insert(&b->nb, &instr->instr);
 +      nir_ssa_def *buf_size = &instr->dest.ssa;
 +
 +      /* array_length = max(buffer_size - offset, 0) / stride */
 +      nir_ssa_def *array_length =
 +         nir_idiv(&b->nb,
 +                  nir_imax(&b->nb,
 +                           nir_isub(&b->nb,
 +                                    buf_size,
 +                                    nir_imm_int(&b->nb, offset)),
 +                           nir_imm_int(&b->nb, 0u)),
 +                  nir_imm_int(&b->nb, stride));
 +
 +      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 +      val->ssa = vtn_create_ssa_value(b, glsl_uint_type());
 +      val->ssa->def = array_length;
 +      break;
 +   }
 +
 +   case SpvOpCopyMemorySized:
 +   default:
 +      unreachable("Unhandled opcode");
 +   }
 +}
Simple merge
index e23b697,0000000..218499a
mode 100644,000000..100644
--- /dev/null
@@@ -1,748 -1,0 +1,748 @@@
-    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include "anv_meta.h"
 +#include "nir/nir_builder.h"
 +
 +struct blit_region {
 +   VkOffset3D src_offset;
 +   VkExtent3D src_extent;
 +   VkOffset3D dest_offset;
 +   VkExtent3D dest_extent;
 +};
 +
 +static nir_shader *
 +build_nir_vertex_shader(void)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +   nir_builder b;
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
 +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
 +
 +   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                              vec4, "a_pos");
 +   pos_in->data.location = VERT_ATTRIB_GENERIC0;
 +   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                               vec4, "gl_Position");
 +   pos_out->data.location = VARYING_SLOT_POS;
 +   nir_copy_var(&b, pos_out, pos_in);
 +
 +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                                  vec4, "a_tex_pos");
 +   tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
 +   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                                   vec4, "v_tex_pos");
 +   tex_pos_out->data.location = VARYING_SLOT_VAR0;
 +   tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
 +   nir_copy_var(&b, tex_pos_out, tex_pos_in);
 +
 +   return b.shader;
 +}
 +
 +static nir_shader *
 +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +   nir_builder b;
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
 +
 +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                                  vec4, "v_tex_pos");
 +   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 +
 +   /* Swizzle the array index which comes in as Z coordinate into the right
 +    * position.
 +    */
 +   unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
 +   nir_ssa_def *const tex_pos =
 +      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
 +                  (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
 +
 +   const struct glsl_type *sampler_type =
 +      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
 +                        glsl_get_base_type(vec4));
 +   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
 +                                               sampler_type, "s_tex");
 +   sampler->data.descriptor_set = 0;
 +   sampler->data.binding = 0;
 +
 +   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
 +   tex->sampler_dim = tex_dim;
 +   tex->op = nir_texop_tex;
 +   tex->src[0].src_type = nir_tex_src_coord;
 +   tex->src[0].src = nir_src_for_ssa(tex_pos);
 +   tex->dest_type = nir_type_float; /* TODO */
 +   tex->is_array = glsl_sampler_type_is_array(sampler_type);
 +   tex->coord_components = tex_pos->num_components;
 +   tex->texture = nir_deref_var_create(tex, sampler);
 +   tex->sampler = nir_deref_var_create(tex, sampler);
 +
++   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
 +   nir_builder_instr_insert(&b, &tex->instr);
 +
 +   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                                 vec4, "f_color");
 +   color_out->data.location = FRAG_RESULT_DATA0;
 +   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
 +
 +   return b.shader;
 +}
 +
 +static void
 +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
 +                  struct anv_meta_saved_state *saved_state)
 +{
 +   anv_meta_save(saved_state, cmd_buffer,
 +                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
 +}
 +
 +static void
 +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
 +               struct anv_image *src_image,
 +               struct anv_image_view *src_iview,
 +               VkOffset3D src_offset,
 +               VkExtent3D src_extent,
 +               struct anv_image *dest_image,
 +               struct anv_image_view *dest_iview,
 +               VkOffset3D dest_offset,
 +               VkExtent3D dest_extent,
 +               VkFilter blit_filter)
 +{
 +   struct anv_device *device = cmd_buffer->device;
 +
 +   struct blit_vb_data {
 +      float pos[2];
 +      float tex_coord[3];
 +   } *vb_data;
 +
 +   assert(src_image->samples == dest_image->samples);
 +
 +   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
 +
 +   struct anv_state vb_state =
 +      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
 +   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
 +   vb_data = vb_state.map + sizeof(struct anv_vue_header);
 +
 +   vb_data[0] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x + dest_extent.width,
 +         dest_offset.y + dest_extent.height,
 +      },
 +      .tex_coord = {
 +         (float)(src_offset.x + src_extent.width)
 +            / (float)src_iview->extent.width,
 +         (float)(src_offset.y + src_extent.height)
 +            / (float)src_iview->extent.height,
 +         (float)src_offset.z / (float)src_iview->extent.depth,
 +      },
 +   };
 +
 +   vb_data[1] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x,
 +         dest_offset.y + dest_extent.height,
 +      },
 +      .tex_coord = {
 +         (float)src_offset.x / (float)src_iview->extent.width,
 +         (float)(src_offset.y + src_extent.height) /
 +            (float)src_iview->extent.height,
 +         (float)src_offset.z / (float)src_iview->extent.depth,
 +      },
 +   };
 +
 +   vb_data[2] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x,
 +         dest_offset.y,
 +      },
 +      .tex_coord = {
 +         (float)src_offset.x / (float)src_iview->extent.width,
 +         (float)src_offset.y / (float)src_iview->extent.height,
 +         (float)src_offset.z / (float)src_iview->extent.depth,
 +      },
 +   };
 +
 +   anv_state_clflush(vb_state);
 +
 +   struct anv_buffer vertex_buffer = {
 +      .device = device,
 +      .size = vb_size,
 +      .bo = &device->dynamic_state_block_pool.bo,
 +      .offset = vb_state.offset,
 +   };
 +
 +   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
 +      (VkBuffer[]) {
 +         anv_buffer_to_handle(&vertex_buffer),
 +         anv_buffer_to_handle(&vertex_buffer)
 +      },
 +      (VkDeviceSize[]) {
 +         0,
 +         sizeof(struct anv_vue_header),
 +      });
 +
 +   VkSampler sampler;
 +   ANV_CALL(CreateSampler)(anv_device_to_handle(device),
 +      &(VkSamplerCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
 +         .magFilter = blit_filter,
 +         .minFilter = blit_filter,
 +      }, &cmd_buffer->pool->alloc, &sampler);
 +
 +   VkDescriptorPool desc_pool;
 +   anv_CreateDescriptorPool(anv_device_to_handle(device),
 +      &(const VkDescriptorPoolCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 +         .pNext = NULL,
 +         .flags = 0,
 +         .maxSets = 1,
 +         .poolSizeCount = 1,
 +         .pPoolSizes = (VkDescriptorPoolSize[]) {
 +            {
 +               .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +               .descriptorCount = 1
 +            },
 +         }
 +      }, &cmd_buffer->pool->alloc, &desc_pool);
 +
 +   VkDescriptorSet set;
 +   anv_AllocateDescriptorSets(anv_device_to_handle(device),
 +      &(VkDescriptorSetAllocateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 +         .descriptorPool = desc_pool,
 +         .descriptorSetCount = 1,
 +         .pSetLayouts = &device->meta_state.blit.ds_layout
 +      }, &set);
 +
 +   anv_UpdateDescriptorSets(anv_device_to_handle(device),
 +      1, /* writeCount */
 +      (VkWriteDescriptorSet[]) {
 +         {
 +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 +            .dstSet = set,
 +            .dstBinding = 0,
 +            .dstArrayElement = 0,
 +            .descriptorCount = 1,
 +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +            .pImageInfo = (VkDescriptorImageInfo[]) {
 +               {
 +                  .sampler = sampler,
 +                  .imageView = anv_image_view_to_handle(src_iview),
 +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 +               },
 +            }
 +         }
 +      }, 0, NULL);
 +
 +   VkFramebuffer fb;
 +   anv_CreateFramebuffer(anv_device_to_handle(device),
 +      &(VkFramebufferCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = (VkImageView[]) {
 +            anv_image_view_to_handle(dest_iview),
 +         },
 +         .width = dest_iview->extent.width,
 +         .height = dest_iview->extent.height,
 +         .layers = 1
 +      }, &cmd_buffer->pool->alloc, &fb);
 +
 +   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
 +      &(VkRenderPassBeginInfo) {
 +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
 +         .renderPass = device->meta_state.blit.render_pass,
 +         .framebuffer = fb,
 +         .renderArea = {
 +            .offset = { dest_offset.x, dest_offset.y },
 +            .extent = { dest_extent.width, dest_extent.height },
 +         },
 +         .clearValueCount = 0,
 +         .pClearValues = NULL,
 +      }, VK_SUBPASS_CONTENTS_INLINE);
 +
 +   VkPipeline pipeline;
 +
 +   switch (src_image->type) {
 +   case VK_IMAGE_TYPE_1D:
 +      pipeline = device->meta_state.blit.pipeline_1d_src;
 +      break;
 +   case VK_IMAGE_TYPE_2D:
 +      pipeline = device->meta_state.blit.pipeline_2d_src;
 +      break;
 +   case VK_IMAGE_TYPE_3D:
 +      pipeline = device->meta_state.blit.pipeline_3d_src;
 +      break;
 +   default:
 +      unreachable(!"bad VkImageType");
 +   }
 +
 +   if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
 +      anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
 +                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
 +   }
 +
 +   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
 +                      &(VkViewport) {
 +                        .x = 0.0f,
 +                        .y = 0.0f,
 +                        .width = dest_iview->extent.width,
 +                        .height = dest_iview->extent.height,
 +                        .minDepth = 0.0f,
 +                        .maxDepth = 1.0f,
 +                      });
 +
 +   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
 +                             VK_PIPELINE_BIND_POINT_GRAPHICS,
 +                             device->meta_state.blit.pipeline_layout, 0, 1,
 +                             &set, 0, NULL);
 +
 +   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 +
 +   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
 +
 +   /* At the point where we emit the draw call, all data from the
 +    * descriptor sets, etc. has been used.  We are free to delete it.
 +    */
 +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
 +                             desc_pool, &cmd_buffer->pool->alloc);
 +   anv_DestroySampler(anv_device_to_handle(device), sampler,
 +                      &cmd_buffer->pool->alloc);
 +   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
 +                          &cmd_buffer->pool->alloc);
 +}
 +
 +static void
 +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
 +                 const struct anv_meta_saved_state *saved_state)
 +{
 +   anv_meta_restore(saved_state, cmd_buffer);
 +}
 +
 +void anv_CmdBlitImage(
 +    VkCommandBuffer                             commandBuffer,
 +    VkImage                                     srcImage,
 +    VkImageLayout                               srcImageLayout,
 +    VkImage                                     destImage,
 +    VkImageLayout                               destImageLayout,
 +    uint32_t                                    regionCount,
 +    const VkImageBlit*                          pRegions,
 +    VkFilter                                    filter)
 +
 +{
 +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 +   ANV_FROM_HANDLE(anv_image, src_image, srcImage);
 +   ANV_FROM_HANDLE(anv_image, dest_image, destImage);
 +   struct anv_meta_saved_state saved_state;
 +
 +   /* From the Vulkan 1.0 spec:
 +    *
 +    *    vkCmdBlitImage must not be used for multisampled source or
 +    *    destination images. Use vkCmdResolveImage for this purpose.
 +    */
 +   assert(src_image->samples == 1);
 +   assert(dest_image->samples == 1);
 +
 +   meta_prepare_blit(cmd_buffer, &saved_state);
 +
 +   for (unsigned r = 0; r < regionCount; r++) {
 +      struct anv_image_view src_iview;
 +      anv_image_view_init(&src_iview, cmd_buffer->device,
 +         &(VkImageViewCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 +            .image = srcImage,
 +            .viewType = anv_meta_get_view_type(src_image),
 +            .format = src_image->vk_format,
 +            .subresourceRange = {
 +               .aspectMask = pRegions[r].srcSubresource.aspectMask,
 +               .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
 +               .levelCount = 1,
 +               .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
 +               .layerCount = 1
 +            },
 +         },
 +         cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
 +
 +      const VkOffset3D dest_offset = {
 +         .x = pRegions[r].dstOffsets[0].x,
 +         .y = pRegions[r].dstOffsets[0].y,
 +         .z = 0,
 +      };
 +
 +      if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
 +          pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
 +          pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
 +          pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
 +         anv_finishme("FINISHME: Allow flipping in blits");
 +
 +      const VkExtent3D dest_extent = {
 +         .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
 +         .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
 +      };
 +
 +      const VkExtent3D src_extent = {
 +         .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
 +         .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
 +      };
 +
 +      const uint32_t dest_array_slice =
 +         anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
 +                                  &pRegions[r].dstOffsets[0]);
 +
 +      if (pRegions[r].srcSubresource.layerCount > 1)
 +         anv_finishme("FINISHME: copy multiple array layers");
 +
 +      if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
 +          pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
 +         anv_finishme("FINISHME: copy multiple depth layers");
 +
 +      struct anv_image_view dest_iview;
 +      anv_image_view_init(&dest_iview, cmd_buffer->device,
 +         &(VkImageViewCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 +            .image = destImage,
 +            .viewType = anv_meta_get_view_type(dest_image),
 +            .format = dest_image->vk_format,
 +            .subresourceRange = {
 +               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 +               .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
 +               .levelCount = 1,
 +               .baseArrayLayer = dest_array_slice,
 +               .layerCount = 1
 +            },
 +         },
 +         cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
 +
 +      meta_emit_blit(cmd_buffer,
 +                     src_image, &src_iview,
 +                     pRegions[r].srcOffsets[0], src_extent,
 +                     dest_image, &dest_iview,
 +                     dest_offset, dest_extent,
 +                     filter);
 +   }
 +
 +   meta_finish_blit(cmd_buffer, &saved_state);
 +}
 +
 +void
 +anv_device_finish_meta_blit_state(struct anv_device *device)
 +{
 +   anv_DestroyRenderPass(anv_device_to_handle(device),
 +                         device->meta_state.blit.render_pass,
 +                         &device->meta_state.alloc);
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit.pipeline_1d_src,
 +                       &device->meta_state.alloc);
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit.pipeline_2d_src,
 +                       &device->meta_state.alloc);
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit.pipeline_3d_src,
 +                       &device->meta_state.alloc);
 +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
 +                             device->meta_state.blit.pipeline_layout,
 +                             &device->meta_state.alloc);
 +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
 +                                  device->meta_state.blit.ds_layout,
 +                                  &device->meta_state.alloc);
 +}
 +
 +VkResult
 +anv_device_init_meta_blit_state(struct anv_device *device)
 +{
 +   VkResult result;
 +
 +   result = anv_CreateRenderPass(anv_device_to_handle(device),
 +      &(VkRenderPassCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = &(VkAttachmentDescription) {
 +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
 +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
 +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
 +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
 +         },
 +         .subpassCount = 1,
 +         .pSubpasses = &(VkSubpassDescription) {
 +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
 +            .inputAttachmentCount = 0,
 +            .colorAttachmentCount = 1,
 +            .pColorAttachments = &(VkAttachmentReference) {
 +               .attachment = 0,
 +               .layout = VK_IMAGE_LAYOUT_GENERAL,
 +            },
 +            .pResolveAttachments = NULL,
 +            .pDepthStencilAttachment = &(VkAttachmentReference) {
 +               .attachment = VK_ATTACHMENT_UNUSED,
 +               .layout = VK_IMAGE_LAYOUT_GENERAL,
 +            },
 +            .preserveAttachmentCount = 1,
 +            .pPreserveAttachments = (uint32_t[]) { 0 },
 +         },
 +         .dependencyCount = 0,
 +      }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
 +   if (result != VK_SUCCESS)
 +      goto fail;
 +
 +   /* We don't use a vertex shader for blitting, but instead build and pass
 +    * the VUEs directly to the rasterization backend.  However, we do need
 +    * to provide GLSL source for the vertex shader so that the compiler
 +    * does not dead-code our inputs.
 +    */
 +   struct anv_shader_module vs = {
 +      .nir = build_nir_vertex_shader(),
 +   };
 +
 +   struct anv_shader_module fs_1d = {
 +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
 +   };
 +
 +   struct anv_shader_module fs_2d = {
 +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
 +   };
 +
 +   struct anv_shader_module fs_3d = {
 +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
 +   };
 +
 +   VkPipelineVertexInputStateCreateInfo vi_create_info = {
 +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
 +      .vertexBindingDescriptionCount = 2,
 +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
 +         {
 +            .binding = 0,
 +            .stride = 0,
 +            .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
 +         },
 +         {
 +            .binding = 1,
 +            .stride = 5 * sizeof(float),
 +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
 +         },
 +      },
 +      .vertexAttributeDescriptionCount = 3,
 +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
 +         {
 +            /* VUE Header */
 +            .location = 0,
 +            .binding = 0,
 +            .format = VK_FORMAT_R32G32B32A32_UINT,
 +            .offset = 0
 +         },
 +         {
 +            /* Position */
 +            .location = 1,
 +            .binding = 1,
 +            .format = VK_FORMAT_R32G32_SFLOAT,
 +            .offset = 0
 +         },
 +         {
 +            /* Texture Coordinate */
 +            .location = 2,
 +            .binding = 1,
 +            .format = VK_FORMAT_R32G32B32_SFLOAT,
 +            .offset = 8
 +         }
 +      }
 +   };
 +
 +   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
 +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 +      .bindingCount = 1,
 +      .pBindings = (VkDescriptorSetLayoutBinding[]) {
 +         {
 +            .binding = 0,
 +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +            .descriptorCount = 1,
 +            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
 +            .pImmutableSamplers = NULL
 +         },
 +      }
 +   };
 +   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
 +                                          &ds_layout_info,
 +                                          &device->meta_state.alloc,
 +                                          &device->meta_state.blit.ds_layout);
 +   if (result != VK_SUCCESS)
 +      goto fail_render_pass;
 +
 +   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
 +      &(VkPipelineLayoutCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 +         .setLayoutCount = 1,
 +         .pSetLayouts = &device->meta_state.blit.ds_layout,
 +      },
 +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
 +   if (result != VK_SUCCESS)
 +      goto fail_descriptor_set_layout;
 +
 +   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
 +      {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +         .stage = VK_SHADER_STAGE_VERTEX_BIT,
 +         .module = anv_shader_module_to_handle(&vs),
 +         .pName = "main",
 +         .pSpecializationInfo = NULL
 +      }, {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
 +         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
 +         .pName = "main",
 +         .pSpecializationInfo = NULL
 +      },
 +   };
 +
 +   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
 +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
 +      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
 +      .pStages = pipeline_shader_stages,
 +      .pVertexInputState = &vi_create_info,
 +      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
 +         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
 +         .primitiveRestartEnable = false,
 +      },
 +      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
 +         .viewportCount = 1,
 +         .scissorCount = 1,
 +      },
 +      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
 +         .rasterizerDiscardEnable = false,
 +         .polygonMode = VK_POLYGON_MODE_FILL,
 +         .cullMode = VK_CULL_MODE_NONE,
 +         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
 +      },
 +      .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
 +         .rasterizationSamples = 1,
 +         .sampleShadingEnable = false,
 +         .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
 +      },
 +      .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = (VkPipelineColorBlendAttachmentState []) {
 +            { .colorWriteMask =
 +                 VK_COLOR_COMPONENT_A_BIT |
 +                 VK_COLOR_COMPONENT_R_BIT |
 +                 VK_COLOR_COMPONENT_G_BIT |
 +                 VK_COLOR_COMPONENT_B_BIT },
 +         }
 +      },
 +      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
 +         .dynamicStateCount = 9,
 +         .pDynamicStates = (VkDynamicState[]) {
 +            VK_DYNAMIC_STATE_VIEWPORT,
 +            VK_DYNAMIC_STATE_SCISSOR,
 +            VK_DYNAMIC_STATE_LINE_WIDTH,
 +            VK_DYNAMIC_STATE_DEPTH_BIAS,
 +            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
 +            VK_DYNAMIC_STATE_DEPTH_BOUNDS,
 +            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
 +            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
 +            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
 +         },
 +      },
 +      .flags = 0,
 +      .layout = device->meta_state.blit.pipeline_layout,
 +      .renderPass = device->meta_state.blit.render_pass,
 +      .subpass = 0,
 +   };
 +
 +   const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
 +      .color_attachment_count = -1,
 +      .use_repclear = false,
 +      .disable_viewport = true,
 +      .disable_scissor = true,
 +      .disable_vs = true,
 +      .use_rectlist = true
 +   };
 +
 +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
 +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
 +      VK_NULL_HANDLE,
 +      &vk_pipeline_info, &anv_pipeline_info,
 +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
 +   if (result != VK_SUCCESS)
 +      goto fail_pipeline_layout;
 +
 +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
 +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
 +      VK_NULL_HANDLE,
 +      &vk_pipeline_info, &anv_pipeline_info,
 +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
 +   if (result != VK_SUCCESS)
 +      goto fail_pipeline_1d;
 +
 +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
 +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
 +      VK_NULL_HANDLE,
 +      &vk_pipeline_info, &anv_pipeline_info,
 +      &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
 +   if (result != VK_SUCCESS)
 +      goto fail_pipeline_2d;
 +
 +   ralloc_free(vs.nir);
 +   ralloc_free(fs_1d.nir);
 +   ralloc_free(fs_2d.nir);
 +   ralloc_free(fs_3d.nir);
 +
 +   return VK_SUCCESS;
 +
 + fail_pipeline_2d:
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit.pipeline_2d_src,
 +                       &device->meta_state.alloc);
 +
 + fail_pipeline_1d:
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit.pipeline_1d_src,
 +                       &device->meta_state.alloc);
 +
 + fail_pipeline_layout:
 +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
 +                             device->meta_state.blit.pipeline_layout,
 +                             &device->meta_state.alloc);
 + fail_descriptor_set_layout:
 +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
 +                                  device->meta_state.blit.ds_layout,
 +                                  &device->meta_state.alloc);
 + fail_render_pass:
 +   anv_DestroyRenderPass(anv_device_to_handle(device),
 +                         device->meta_state.blit.render_pass,
 +                         &device->meta_state.alloc);
 +
 +   ralloc_free(vs.nir);
 +   ralloc_free(fs_1d.nir);
 +   ralloc_free(fs_2d.nir);
 +   ralloc_free(fs_3d.nir);
 + fail:
 +   return result;
 +}
index 4a0bed1,0000000..87c3358
mode 100644,000000..100644
--- /dev/null
@@@ -1,723 -1,0 +1,723 @@@
-    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
 +/*
 + * Copyright Â© 2016 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include "anv_meta.h"
 +#include "nir/nir_builder.h"
 +
 +static VkFormat
 +vk_format_for_size(int bs)
 +{
 +   /* The choice of UNORM and UINT formats is very intentional here.  Most of
 +    * the time, we want to use a UINT format to avoid any rounding error in
 +    * the blit.  For stencil blits, R8_UINT is required by the hardware.
 +    * (It's the only format allowed in conjunction with W-tiling.)  Also we
 +    * intentionally use the 4-channel formats whenever we can.  This is so
 +    * that, when we do a RGB <-> RGBX copy, the two formats will line up even
 +    * though one of them is 3/4 the size of the other.  The choice of UNORM
 +    * vs. UINT is also very intentional because Haswell doesn't handle 8 or
 +    * 16-bit RGB UINT formats at all so we have to use UNORM there.
 +    * Fortunately, the only time we should ever use two different formats in
 +    * the table below is for RGB -> RGBA blits and so we will never have any
 +    * UNORM/UINT mismatch.
 +    */
 +   switch (bs) {
 +   case 1: return VK_FORMAT_R8_UINT;
 +   case 2: return VK_FORMAT_R8G8_UINT;
 +   case 3: return VK_FORMAT_R8G8B8_UNORM;
 +   case 4: return VK_FORMAT_R8G8B8A8_UNORM;
 +   case 6: return VK_FORMAT_R16G16B16_UNORM;
 +   case 8: return VK_FORMAT_R16G16B16A16_UNORM;
 +   case 12: return VK_FORMAT_R32G32B32_UINT;
 +   case 16: return VK_FORMAT_R32G32B32A32_UINT;
 +   default:
 +      unreachable("Invalid format block size");
 +   }
 +}
 +
 +static void
 +meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer,
 +               struct anv_image_view *src_iview,
 +               VkOffset3D src_offset,
 +               struct anv_image_view *dest_iview,
 +               VkOffset3D dest_offset,
 +               VkExtent3D extent)
 +{
 +   struct anv_device *device = cmd_buffer->device;
 +
 +   struct blit_vb_data {
 +      float pos[2];
 +      float tex_coord[3];
 +   } *vb_data;
 +
 +   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
 +
 +   struct anv_state vb_state =
 +      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
 +   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
 +   vb_data = vb_state.map + sizeof(struct anv_vue_header);
 +
 +   vb_data[0] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x + extent.width,
 +         dest_offset.y + extent.height,
 +      },
 +      .tex_coord = {
 +         src_offset.x + extent.width,
 +         src_offset.y + extent.height,
 +         src_offset.z,
 +      },
 +   };
 +
 +   vb_data[1] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x,
 +         dest_offset.y + extent.height,
 +      },
 +      .tex_coord = {
 +         src_offset.x,
 +         src_offset.y + extent.height,
 +         src_offset.z,
 +      },
 +   };
 +
 +   vb_data[2] = (struct blit_vb_data) {
 +      .pos = {
 +         dest_offset.x,
 +         dest_offset.y,
 +      },
 +      .tex_coord = {
 +         src_offset.x,
 +         src_offset.y,
 +         src_offset.z,
 +      },
 +   };
 +
 +   anv_state_clflush(vb_state);
 +
 +   struct anv_buffer vertex_buffer = {
 +      .device = device,
 +      .size = vb_size,
 +      .bo = &device->dynamic_state_block_pool.bo,
 +      .offset = vb_state.offset,
 +   };
 +
 +   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
 +      (VkBuffer[]) {
 +         anv_buffer_to_handle(&vertex_buffer),
 +         anv_buffer_to_handle(&vertex_buffer)
 +      },
 +      (VkDeviceSize[]) {
 +         0,
 +         sizeof(struct anv_vue_header),
 +      });
 +
 +   VkDescriptorPool desc_pool;
 +   anv_CreateDescriptorPool(anv_device_to_handle(device),
 +      &(const VkDescriptorPoolCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 +         .pNext = NULL,
 +         .flags = 0,
 +         .maxSets = 1,
 +         .poolSizeCount = 1,
 +         .pPoolSizes = (VkDescriptorPoolSize[]) {
 +            {
 +               .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 +               .descriptorCount = 1
 +            },
 +         }
 +      }, &cmd_buffer->pool->alloc, &desc_pool);
 +
 +   VkDescriptorSet set;
 +   anv_AllocateDescriptorSets(anv_device_to_handle(device),
 +      &(VkDescriptorSetAllocateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 +         .descriptorPool = desc_pool,
 +         .descriptorSetCount = 1,
 +         .pSetLayouts = &device->meta_state.blit2d.ds_layout
 +      }, &set);
 +
 +   anv_UpdateDescriptorSets(anv_device_to_handle(device),
 +      1, /* writeCount */
 +      (VkWriteDescriptorSet[]) {
 +         {
 +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 +            .dstSet = set,
 +            .dstBinding = 0,
 +            .dstArrayElement = 0,
 +            .descriptorCount = 1,
 +            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 +            .pImageInfo = (VkDescriptorImageInfo[]) {
 +               {
 +                  .sampler = NULL,
 +                  .imageView = anv_image_view_to_handle(src_iview),
 +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 +               },
 +            }
 +         }
 +      }, 0, NULL);
 +
 +   VkFramebuffer fb;
 +   anv_CreateFramebuffer(anv_device_to_handle(device),
 +      &(VkFramebufferCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = (VkImageView[]) {
 +            anv_image_view_to_handle(dest_iview),
 +         },
 +         .width = dest_iview->extent.width,
 +         .height = dest_iview->extent.height,
 +         .layers = 1
 +      }, &cmd_buffer->pool->alloc, &fb);
 +
 +   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
 +      &(VkRenderPassBeginInfo) {
 +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
 +         .renderPass = device->meta_state.blit2d.render_pass,
 +         .framebuffer = fb,
 +         .renderArea = {
 +            .offset = { dest_offset.x, dest_offset.y },
 +            .extent = { extent.width, extent.height },
 +         },
 +         .clearValueCount = 0,
 +         .pClearValues = NULL,
 +      }, VK_SUBPASS_CONTENTS_INLINE);
 +
 +   VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src;
 +
 +   if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
 +      anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
 +                          VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
 +   }
 +
 +   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
 +                      &(VkViewport) {
 +                        .x = 0.0f,
 +                        .y = 0.0f,
 +                        .width = dest_iview->extent.width,
 +                        .height = dest_iview->extent.height,
 +                        .minDepth = 0.0f,
 +                        .maxDepth = 1.0f,
 +                      });
 +
 +   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
 +                             VK_PIPELINE_BIND_POINT_GRAPHICS,
 +                             device->meta_state.blit2d.pipeline_layout, 0, 1,
 +                             &set, 0, NULL);
 +
 +   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 +
 +   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
 +
 +   /* At the point where we emit the draw call, all data from the
 +    * descriptor sets, etc. has been used.  We are free to delete it.
 +    */
 +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
 +                             desc_pool, &cmd_buffer->pool->alloc);
 +   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
 +                          &cmd_buffer->pool->alloc);
 +}
 +
 +void
 +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
 +                    struct anv_meta_saved_state *save)
 +{
 +   anv_meta_restore(save, cmd_buffer);
 +}
 +
 +void
 +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
 +                      struct anv_meta_saved_state *save)
 +{
 +   anv_meta_save(save, cmd_buffer,
 +                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
 +}
 +
 +void
 +anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
 +                struct anv_meta_blit2d_surf *src,
 +                struct anv_meta_blit2d_surf *dst,
 +                unsigned num_rects,
 +                struct anv_meta_blit2d_rect *rects)
 +{
 +   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
 +   VkFormat src_format = vk_format_for_size(src->bs);
 +   VkFormat dst_format = vk_format_for_size(dst->bs);
 +   VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
 +   VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 +
 +   for (unsigned r = 0; r < num_rects; ++r) {
 +
 +      /* Create VkImages */
 +      VkImageCreateInfo image_info = {
 +         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
 +         .imageType = VK_IMAGE_TYPE_2D,
 +         .format = 0, /* TEMPLATE */
 +         .extent = {
 +            .width = 0, /* TEMPLATE */
 +            .height = 0, /* TEMPLATE */
 +            .depth = 1,
 +         },
 +         .mipLevels = 1,
 +         .arrayLayers = 1,
 +         .samples = 1,
 +         .tiling = 0, /* TEMPLATE */
 +         .usage = 0, /* TEMPLATE */
 +      };
 +      struct anv_image_create_info anv_image_info = {
 +         .vk_info = &image_info,
 +         .isl_tiling_flags = 0, /* TEMPLATE */
 +      };
 +
 +      /* The image height is the rect height + src/dst y-offset from the
 +       * tile-aligned base address.
 +       */
 +      struct isl_tile_info tile_info;
 +
 +      anv_image_info.isl_tiling_flags = 1 << src->tiling;
 +      image_info.tiling = src->tiling == ISL_TILING_LINEAR ?
 +                          VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
 +      image_info.usage = src_usage;
 +      image_info.format = src_format,
 +      isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs,
 +                          &tile_info);
 +      image_info.extent.height = rects[r].height +
 +                                 rects[r].src_y % tile_info.height;
 +      image_info.extent.width = src->pitch / src->bs;
 +      VkImage src_image;
 +      anv_image_create(vk_device, &anv_image_info,
 +                       &cmd_buffer->pool->alloc, &src_image);
 +
 +      anv_image_info.isl_tiling_flags = 1 << dst->tiling;
 +      image_info.tiling = dst->tiling == ISL_TILING_LINEAR ?
 +                          VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
 +      image_info.usage = dst_usage;
 +      image_info.format = dst_format,
 +      isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs,
 +                          &tile_info);
 +      image_info.extent.height = rects[r].height +
 +                                 rects[r].dst_y % tile_info.height;
 +      image_info.extent.width = dst->pitch / dst->bs;
 +      VkImage dst_image;
 +      anv_image_create(vk_device, &anv_image_info,
 +                       &cmd_buffer->pool->alloc, &dst_image);
 +
 +      /* We could use a vk call to bind memory, but that would require
 +      * creating a dummy memory object etc. so there's really no point.
 +      */
 +      anv_image_from_handle(src_image)->bo = src->bo;
 +      anv_image_from_handle(src_image)->offset = src->base_offset;
 +      anv_image_from_handle(dst_image)->bo = dst->bo;
 +      anv_image_from_handle(dst_image)->offset = dst->base_offset;
 +
 +      /* Create VkImageViews */
 +      VkImageViewCreateInfo iview_info = {
 +         .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 +         .image = 0, /* TEMPLATE */
 +         .viewType = VK_IMAGE_VIEW_TYPE_2D,
 +         .format = 0, /* TEMPLATE */
 +         .subresourceRange = {
 +            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 +            .baseMipLevel = 0,
 +            .levelCount = 1,
 +            .baseArrayLayer = 0,
 +            .layerCount = 1
 +         },
 +      };
 +      uint32_t img_o = 0;
 +
 +      iview_info.image = src_image;
 +      iview_info.format = src_format;
 +      VkOffset3D src_offset_el = {0};
 +      isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
 +                                                &anv_image_from_handle(src_image)->
 +                                                   color_surface.isl,
 +                                                rects[r].src_x,
 +                                                rects[r].src_y,
 +                                                &img_o,
 +                                                (uint32_t*)&src_offset_el.x,
 +                                                (uint32_t*)&src_offset_el.y);
 +
 +      struct anv_image_view src_iview;
 +      anv_image_view_init(&src_iview, cmd_buffer->device,
 +         &iview_info, cmd_buffer, img_o, src_usage);
 +
 +      iview_info.image = dst_image;
 +      iview_info.format = dst_format;
 +      VkOffset3D dst_offset_el = {0};
 +      isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
 +                                                &anv_image_from_handle(dst_image)->
 +                                                   color_surface.isl,
 +                                                rects[r].dst_x,
 +                                                rects[r].dst_y,
 +                                                &img_o,
 +                                                (uint32_t*)&dst_offset_el.x,
 +                                                (uint32_t*)&dst_offset_el.y);
 +      struct anv_image_view dst_iview;
 +      anv_image_view_init(&dst_iview, cmd_buffer->device,
 +         &iview_info, cmd_buffer, img_o, dst_usage);
 +
 +      /* Perform blit */
 +      meta_emit_blit2d(cmd_buffer,
 +                     &src_iview,
 +                     src_offset_el,
 +                     &dst_iview,
 +                     dst_offset_el,
 +                     (VkExtent3D){rects[r].width, rects[r].height, 1});
 +
 +      anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
 +      anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc);
 +   }
 +}
 +
 +
 +static nir_shader *
 +build_nir_vertex_shader(void)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +   nir_builder b;
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
 +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
 +
 +   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                              vec4, "a_pos");
 +   pos_in->data.location = VERT_ATTRIB_GENERIC0;
 +   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                               vec4, "gl_Position");
 +   pos_out->data.location = VARYING_SLOT_POS;
 +   nir_copy_var(&b, pos_out, pos_in);
 +
 +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                                  vec4, "a_tex_pos");
 +   tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
 +   nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                                   vec4, "v_tex_pos");
 +   tex_pos_out->data.location = VARYING_SLOT_VAR0;
 +   tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
 +   nir_copy_var(&b, tex_pos_out, tex_pos_in);
 +
 +   return b.shader;
 +}
 +
 +static nir_shader *
 +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +   const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
 +   nir_builder b;
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 +   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
 +
 +   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 +                                                  vec3, "v_tex_pos");
 +   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 +   nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
 +
 +   const struct glsl_type *sampler_type =
 +      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
 +                        glsl_get_base_type(vec4));
 +   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
 +                                               sampler_type, "s_tex");
 +   sampler->data.descriptor_set = 0;
 +   sampler->data.binding = 0;
 +
 +   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
 +   tex->sampler_dim = tex_dim;
 +   tex->op = nir_texop_txf;
 +   tex->src[0].src_type = nir_tex_src_coord;
 +   tex->src[0].src = nir_src_for_ssa(tex_pos);
 +   tex->src[1].src_type = nir_tex_src_lod;
 +   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
 +   tex->dest_type = nir_type_float; /* TODO */
 +   tex->is_array = glsl_sampler_type_is_array(sampler_type);
 +   tex->coord_components = tex_pos->num_components;
 +   tex->texture = nir_deref_var_create(tex, sampler);
 +   tex->sampler = NULL;
 +
++   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
 +   nir_builder_instr_insert(&b, &tex->instr);
 +
 +   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
 +                                                 vec4, "f_color");
 +   color_out->data.location = FRAG_RESULT_DATA0;
 +   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
 +
 +   return b.shader;
 +}
 +
 +void
 +anv_device_finish_meta_blit2d_state(struct anv_device *device)
 +{
 +   anv_DestroyRenderPass(anv_device_to_handle(device),
 +                         device->meta_state.blit2d.render_pass,
 +                         &device->meta_state.alloc);
 +   anv_DestroyPipeline(anv_device_to_handle(device),
 +                       device->meta_state.blit2d.pipeline_2d_src,
 +                       &device->meta_state.alloc);
 +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
 +                             device->meta_state.blit2d.pipeline_layout,
 +                             &device->meta_state.alloc);
 +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
 +                                  device->meta_state.blit2d.ds_layout,
 +                                  &device->meta_state.alloc);
 +}
 +
 +VkResult
 +anv_device_init_meta_blit2d_state(struct anv_device *device)
 +{
 +   VkResult result;
 +
 +   result = anv_CreateRenderPass(anv_device_to_handle(device),
 +      &(VkRenderPassCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = &(VkAttachmentDescription) {
 +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
 +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
 +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
 +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
 +         },
 +         .subpassCount = 1,
 +         .pSubpasses = &(VkSubpassDescription) {
 +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
 +            .inputAttachmentCount = 0,
 +            .colorAttachmentCount = 1,
 +            .pColorAttachments = &(VkAttachmentReference) {
 +               .attachment = 0,
 +               .layout = VK_IMAGE_LAYOUT_GENERAL,
 +            },
 +            .pResolveAttachments = NULL,
 +            .pDepthStencilAttachment = &(VkAttachmentReference) {
 +               .attachment = VK_ATTACHMENT_UNUSED,
 +               .layout = VK_IMAGE_LAYOUT_GENERAL,
 +            },
 +            .preserveAttachmentCount = 1,
 +            .pPreserveAttachments = (uint32_t[]) { 0 },
 +         },
 +         .dependencyCount = 0,
 +      }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
 +   if (result != VK_SUCCESS)
 +      goto fail;
 +
 +   /* We don't use a vertex shader for blitting, but instead build and pass
 +    * the VUEs directly to the rasterization backend.  However, we do need
 +    * to provide GLSL source for the vertex shader so that the compiler
 +    * does not dead-code our inputs.
 +    */
 +   struct anv_shader_module vs = {
 +      .nir = build_nir_vertex_shader(),
 +   };
 +
 +   struct anv_shader_module fs_2d = {
 +      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
 +   };
 +
 +   VkPipelineVertexInputStateCreateInfo vi_create_info = {
 +      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
 +      .vertexBindingDescriptionCount = 2,
 +      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
 +         {
 +            .binding = 0,
 +            .stride = 0,
 +            .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
 +         },
 +         {
 +            .binding = 1,
 +            .stride = 5 * sizeof(float),
 +            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
 +         },
 +      },
 +      .vertexAttributeDescriptionCount = 3,
 +      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
 +         {
 +            /* VUE Header */
 +            .location = 0,
 +            .binding = 0,
 +            .format = VK_FORMAT_R32G32B32A32_UINT,
 +            .offset = 0
 +         },
 +         {
 +            /* Position */
 +            .location = 1,
 +            .binding = 1,
 +            .format = VK_FORMAT_R32G32_SFLOAT,
 +            .offset = 0
 +         },
 +         {
 +            /* Texture Coordinate */
 +            .location = 2,
 +            .binding = 1,
 +            .format = VK_FORMAT_R32G32B32_SFLOAT,
 +            .offset = 8
 +         }
 +      }
 +   };
 +
 +   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
 +      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 +      .bindingCount = 1,
 +      .pBindings = (VkDescriptorSetLayoutBinding[]) {
 +         {
 +            .binding = 0,
 +            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 +            .descriptorCount = 1,
 +            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
 +            .pImmutableSamplers = NULL
 +         },
 +      }
 +   };
 +   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
 +                                          &ds_layout_info,
 +                                          &device->meta_state.alloc,
 +                                          &device->meta_state.blit2d.ds_layout);
 +   if (result != VK_SUCCESS)
 +      goto fail_render_pass;
 +
 +   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
 +      &(VkPipelineLayoutCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 +         .setLayoutCount = 1,
 +         .pSetLayouts = &device->meta_state.blit2d.ds_layout,
 +      },
 +      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout);
 +   if (result != VK_SUCCESS)
 +      goto fail_descriptor_set_layout;
 +
 +   VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
 +      {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +         .stage = VK_SHADER_STAGE_VERTEX_BIT,
 +         .module = anv_shader_module_to_handle(&vs),
 +         .pName = "main",
 +         .pSpecializationInfo = NULL
 +      }, {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
 +         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
 +         .pName = "main",
 +         .pSpecializationInfo = NULL
 +      },
 +   };
 +
 +   const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
 +      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
 +      .stageCount = ARRAY_SIZE(pipeline_shader_stages),
 +      .pStages = pipeline_shader_stages,
 +      .pVertexInputState = &vi_create_info,
 +      .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
 +         .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
 +         .primitiveRestartEnable = false,
 +      },
 +      .pViewportState = &(VkPipelineViewportStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
 +         .viewportCount = 1,
 +         .scissorCount = 1,
 +      },
 +      .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
 +         .rasterizerDiscardEnable = false,
 +         .polygonMode = VK_POLYGON_MODE_FILL,
 +         .cullMode = VK_CULL_MODE_NONE,
 +         .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
 +      },
 +      .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
 +         .rasterizationSamples = 1,
 +         .sampleShadingEnable = false,
 +         .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
 +      },
 +      .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = (VkPipelineColorBlendAttachmentState []) {
 +            { .colorWriteMask =
 +                 VK_COLOR_COMPONENT_A_BIT |
 +                 VK_COLOR_COMPONENT_R_BIT |
 +                 VK_COLOR_COMPONENT_G_BIT |
 +                 VK_COLOR_COMPONENT_B_BIT },
 +         }
 +      },
 +      .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
 +         .dynamicStateCount = 9,
 +         .pDynamicStates = (VkDynamicState[]) {
 +            VK_DYNAMIC_STATE_VIEWPORT,
 +            VK_DYNAMIC_STATE_SCISSOR,
 +            VK_DYNAMIC_STATE_LINE_WIDTH,
 +            VK_DYNAMIC_STATE_DEPTH_BIAS,
 +            VK_DYNAMIC_STATE_BLEND_CONSTANTS,
 +            VK_DYNAMIC_STATE_DEPTH_BOUNDS,
 +            VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
 +            VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
 +            VK_DYNAMIC_STATE_STENCIL_REFERENCE,
 +         },
 +      },
 +      .flags = 0,
 +      .layout = device->meta_state.blit2d.pipeline_layout,
 +      .renderPass = device->meta_state.blit2d.render_pass,
 +      .subpass = 0,
 +   };
 +
 +   const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
 +      .color_attachment_count = -1,
 +      .use_repclear = false,
 +      .disable_viewport = true,
 +      .disable_scissor = true,
 +      .disable_vs = true,
 +      .use_rectlist = true
 +   };
 +
 +   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
 +   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
 +      VK_NULL_HANDLE,
 +      &vk_pipeline_info, &anv_pipeline_info,
 +      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src);
 +   if (result != VK_SUCCESS)
 +      goto fail_pipeline_layout;
 +
 +   ralloc_free(vs.nir);
 +   ralloc_free(fs_2d.nir);
 +
 +   return VK_SUCCESS;
 +
 + fail_pipeline_layout:
 +   anv_DestroyPipelineLayout(anv_device_to_handle(device),
 +                             device->meta_state.blit2d.pipeline_layout,
 +                             &device->meta_state.alloc);
 + fail_descriptor_set_layout:
 +   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
 +                                  device->meta_state.blit2d.ds_layout,
 +                                  &device->meta_state.alloc);
 + fail_render_pass:
 +   anv_DestroyRenderPass(anv_device_to_handle(device),
 +                         device->meta_state.blit2d.render_pass,
 +                         &device->meta_state.alloc);
 +
 +   ralloc_free(vs.nir);
 +   ralloc_free(fs_2d.nir);
 + fail:
 +   return result;
 +}
index f50af52,0000000..3e7c7d3
mode 100644,000000..100644
--- /dev/null
@@@ -1,902 -1,0 +1,902 @@@
-       nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
 +/*
 + * Copyright Â© 2016 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include <assert.h>
 +#include <stdbool.h>
 +
 +#include "anv_meta.h"
 +#include "anv_private.h"
 +#include "nir/nir_builder.h"
 +
 +/**
 + * Vertex attributes used by all pipelines.
 + */
 +struct vertex_attrs {
 +   struct anv_vue_header vue_header;
 +   float position[2]; /**< 3DPRIM_RECTLIST */
 +   float tex_position[2];
 +};
 +
 +static void
 +meta_resolve_save(struct anv_meta_saved_state *saved_state,
 +                  struct anv_cmd_buffer *cmd_buffer)
 +{
 +   anv_meta_save(saved_state, cmd_buffer,
 +                 (1 << VK_DYNAMIC_STATE_VIEWPORT) |
 +                 (1 << VK_DYNAMIC_STATE_SCISSOR));
 +
 +   cmd_buffer->state.dynamic.viewport.count = 0;
 +   cmd_buffer->state.dynamic.scissor.count = 0;
 +}
 +
 +static void
 +meta_resolve_restore(struct anv_meta_saved_state *saved_state,
 +                     struct anv_cmd_buffer *cmd_buffer)
 +{
 +   anv_meta_restore(saved_state, cmd_buffer);
 +}
 +
 +static VkPipeline *
 +get_pipeline_h(struct anv_device *device, uint32_t samples)
 +{
 +   uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
 +
 +   assert(samples >= 2);
 +   assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
 +
 +   return &device->meta_state.resolve.pipelines[i];
 +}
 +
 +static nir_shader *
 +build_nir_vs(void)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +
 +   nir_builder b;
 +   nir_variable *a_position;
 +   nir_variable *v_position;
 +   nir_variable *a_tex_position;
 +   nir_variable *v_tex_position;
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
 +   b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
 +
 +   a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 +                                    "a_position");
 +   a_position->data.location = VERT_ATTRIB_GENERIC0;
 +
 +   v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
 +                                    "gl_Position");
 +   v_position->data.location = VARYING_SLOT_POS;
 +
 +   a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 +                                    "a_tex_position");
 +   a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
 +
 +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
 +                                    "v_tex_position");
 +   v_tex_position->data.location = VARYING_SLOT_VAR0;
 +
 +   nir_copy_var(&b, v_position, a_position);
 +   nir_copy_var(&b, v_tex_position, a_tex_position);
 +
 +   return b.shader;
 +}
 +
 +static nir_shader *
 +build_nir_fs(uint32_t num_samples)
 +{
 +   const struct glsl_type *vec4 = glsl_vec4_type();
 +
 +   const struct glsl_type *sampler2DMS =
 +         glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
 +                           /*is_shadow*/ false,
 +                           /*is_array*/ false,
 +                           GLSL_TYPE_FLOAT);
 +
 +   nir_builder b;
 +   nir_variable *u_tex; /* uniform sampler */
 +   nir_variable *v_position; /* vec4, varying fragment position */
 +   nir_variable *v_tex_position; /* vec4, varying texture coordinate */
 +   nir_variable *f_color; /* vec4, fragment output color */
 +   nir_ssa_def *accum; /* vec4, accumulation of sample values */
 +
 +   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 +   b.shader->info.name = ralloc_asprintf(b.shader,
 +                                         "meta_resolve_fs_samples%02d",
 +                                         num_samples);
 +
 +   u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
 +                                   "u_tex");
 +   u_tex->data.descriptor_set = 0;
 +   u_tex->data.binding = 0;
 +
 +   v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 +                                     "v_position");
 +   v_position->data.location = VARYING_SLOT_POS;
 +   v_position->data.origin_upper_left = true;
 +
 +   v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 +                                    "v_tex_position");
 +   v_tex_position->data.location = VARYING_SLOT_VAR0;
 +
 +   f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
 +                                 "f_color");
 +   f_color->data.location = FRAG_RESULT_DATA0;
 +
 +   accum = nir_imm_vec4(&b, 0, 0, 0, 0);
 +
 +   nir_ssa_def *tex_position_ivec =
 +      nir_f2i(&b, nir_load_var(&b, v_tex_position));
 +
 +   for (uint32_t i = 0; i < num_samples; ++i) {
 +      nir_tex_instr *tex;
 +
 +      tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
 +      tex->texture = nir_deref_var_create(tex, u_tex);
 +      tex->sampler = nir_deref_var_create(tex, u_tex);
 +      tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
 +      tex->op = nir_texop_txf_ms;
 +      tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
 +      tex->src[0].src_type = nir_tex_src_coord;
 +      tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
 +      tex->src[1].src_type = nir_tex_src_ms_index;
 +      tex->dest_type = nir_type_float;
 +      tex->is_array = false;
 +      tex->coord_components = 3;
++      nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
 +      nir_builder_instr_insert(&b, &tex->instr);
 +
 +      accum = nir_fadd(&b, accum, &tex->dest.ssa);
 +   }
 +
 +   accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
 +   nir_store_var(&b, f_color, accum, /*writemask*/ 4);
 +
 +   return b.shader;
 +}
 +
 +static VkResult
 +create_pass(struct anv_device *device)
 +{
 +   VkResult result;
 +   VkDevice device_h = anv_device_to_handle(device);
 +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
 +
 +   result = anv_CreateRenderPass(device_h,
 +      &(VkRenderPassCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 +         .attachmentCount = 1,
 +         .pAttachments = &(VkAttachmentDescription) {
 +            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
 +            .samples = 1,
 +            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 +            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
 +            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
 +            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
 +         },
 +         .subpassCount = 1,
 +         .pSubpasses = &(VkSubpassDescription) {
 +            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
 +            .inputAttachmentCount = 0,
 +            .colorAttachmentCount = 1,
 +            .pColorAttachments = &(VkAttachmentReference) {
 +               .attachment = 0,
 +               .layout = VK_IMAGE_LAYOUT_GENERAL,
 +            },
 +            .pResolveAttachments = NULL,
 +            .pDepthStencilAttachment = &(VkAttachmentReference) {
 +               .attachment = VK_ATTACHMENT_UNUSED,
 +            },
 +            .preserveAttachmentCount = 0,
 +            .pPreserveAttachments = NULL,
 +         },
 +         .dependencyCount = 0,
 +      },
 +      alloc,
 +      &device->meta_state.resolve.pass);
 +
 +   return result;
 +}
 +
 +static VkResult
 +create_pipeline(struct anv_device *device,
 +                uint32_t num_samples,
 +                VkShaderModule vs_module_h)
 +{
 +   VkResult result;
 +   VkDevice device_h = anv_device_to_handle(device);
 +
 +   struct anv_shader_module fs_module = {
 +      .nir = build_nir_fs(num_samples),
 +   };
 +
 +   if (!fs_module.nir) {
 +      /* XXX: Need more accurate error */
 +      result = VK_ERROR_OUT_OF_HOST_MEMORY;
 +      goto cleanup;
 +   }
 +
 +   result = anv_graphics_pipeline_create(device_h,
 +      VK_NULL_HANDLE,
 +      &(VkGraphicsPipelineCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
 +         .stageCount = 2,
 +         .pStages = (VkPipelineShaderStageCreateInfo[]) {
 +            {
 +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +               .stage = VK_SHADER_STAGE_VERTEX_BIT,
 +               .module = vs_module_h,
 +               .pName = "main",
 +            },
 +            {
 +               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 +               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
 +               .module = anv_shader_module_to_handle(&fs_module),
 +               .pName = "main",
 +            },
 +         },
 +         .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
 +            .vertexBindingDescriptionCount = 1,
 +            .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
 +               {
 +                  .binding = 0,
 +                  .stride = sizeof(struct vertex_attrs),
 +                  .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
 +               },
 +            },
 +            .vertexAttributeDescriptionCount = 3,
 +            .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
 +               {
 +                  /* VUE Header */
 +                  .location = 0,
 +                  .binding = 0,
 +                  .format = VK_FORMAT_R32G32B32A32_UINT,
 +                  .offset = offsetof(struct vertex_attrs, vue_header),
 +               },
 +               {
 +                  /* Position */
 +                  .location = 1,
 +                  .binding = 0,
 +                  .format = VK_FORMAT_R32G32_SFLOAT,
 +                  .offset = offsetof(struct vertex_attrs, position),
 +               },
 +               {
 +                  /* Texture Coordinate */
 +                  .location = 2,
 +                  .binding = 0,
 +                  .format = VK_FORMAT_R32G32_SFLOAT,
 +                  .offset = offsetof(struct vertex_attrs, tex_position),
 +               },
 +            },
 +         },
 +         .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
 +            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
 +            .primitiveRestartEnable = false,
 +         },
 +         .pViewportState = &(VkPipelineViewportStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
 +            .viewportCount = 1,
 +            .scissorCount = 1,
 +         },
 +         .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
 +            .depthClampEnable = false,
 +            .rasterizerDiscardEnable = false,
 +            .polygonMode = VK_POLYGON_MODE_FILL,
 +            .cullMode = VK_CULL_MODE_NONE,
 +            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
 +         },
 +         .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
 +            .rasterizationSamples = 1,
 +            .sampleShadingEnable = false,
 +            .pSampleMask = (VkSampleMask[]) { 0x1 },
 +            .alphaToCoverageEnable = false,
 +            .alphaToOneEnable = false,
 +         },
 +         .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
 +            .logicOpEnable = false,
 +            .attachmentCount = 1,
 +            .pAttachments = (VkPipelineColorBlendAttachmentState []) {
 +               {
 +                  .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
 +                                    VK_COLOR_COMPONENT_G_BIT |
 +                                    VK_COLOR_COMPONENT_B_BIT |
 +                                    VK_COLOR_COMPONENT_A_BIT,
 +               },
 +            },
 +         },
 +         .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
 +            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
 +            .dynamicStateCount = 2,
 +            .pDynamicStates = (VkDynamicState[]) {
 +               VK_DYNAMIC_STATE_VIEWPORT,
 +               VK_DYNAMIC_STATE_SCISSOR,
 +            },
 +         },
 +         .layout = device->meta_state.resolve.pipeline_layout,
 +         .renderPass = device->meta_state.resolve.pass,
 +         .subpass = 0,
 +      },
 +      &(struct anv_graphics_pipeline_create_info) {
 +         .color_attachment_count = -1,
 +         .use_repclear = false,
 +         .disable_viewport = true,
 +         .disable_scissor = true,
 +         .disable_vs = true,
 +         .use_rectlist = true
 +      },
 +      &device->meta_state.alloc,
 +      get_pipeline_h(device, num_samples));
 +   if (result != VK_SUCCESS)
 +      goto cleanup;
 +
 +   goto cleanup;
 +
 +cleanup:
 +   ralloc_free(fs_module.nir);
 +   return result;
 +}
 +
 +void
 +anv_device_finish_meta_resolve_state(struct anv_device *device)
 +{
 +   struct anv_meta_state *state = &device->meta_state;
 +   VkDevice device_h = anv_device_to_handle(device);
 +   VkRenderPass pass_h = device->meta_state.resolve.pass;
 +   VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
 +   VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
 +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
 +
 +   if (pass_h)
 +      ANV_CALL(DestroyRenderPass)(device_h, pass_h,
 +                                  &device->meta_state.alloc);
 +
 +   if (pipeline_layout_h)
 +      ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
 +
 +   if (ds_layout_h)
 +      ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
 +
 +   for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
 +      VkPipeline pipeline_h = state->resolve.pipelines[i];
 +
 +      if (pipeline_h) {
 +         ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
 +      }
 +   }
 +}
 +
 +VkResult
 +anv_device_init_meta_resolve_state(struct anv_device *device)
 +{
 +   VkResult res = VK_SUCCESS;
 +   VkDevice device_h = anv_device_to_handle(device);
 +   const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
 +
 +   const isl_sample_count_mask_t sample_count_mask =
 +      isl_device_get_sample_counts(&device->isl_dev);
 +
 +   zero(device->meta_state.resolve);
 +
 +   struct anv_shader_module vs_module = { .nir = build_nir_vs() };
 +   if (!vs_module.nir) {
 +      /* XXX: Need more accurate error */
 +      res = VK_ERROR_OUT_OF_HOST_MEMORY;
 +      goto fail;
 +   }
 +
 +   VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
 +
 +   res = anv_CreateDescriptorSetLayout(device_h,
 +      &(VkDescriptorSetLayoutCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 +         .bindingCount = 1,
 +         .pBindings = (VkDescriptorSetLayoutBinding[]) {
 +            {
 +               .binding = 0,
 +               .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +               .descriptorCount = 1,
 +               .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
 +            },
 +         },
 +      },
 +      alloc,
 +      &device->meta_state.resolve.ds_layout);
 +   if (res != VK_SUCCESS)
 +      goto fail;
 +
 +   res = anv_CreatePipelineLayout(device_h,
 +      &(VkPipelineLayoutCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 +         .setLayoutCount = 1,
 +         .pSetLayouts = (VkDescriptorSetLayout[]) {
 +            device->meta_state.resolve.ds_layout,
 +         },
 +      },
 +      alloc,
 +      &device->meta_state.resolve.pipeline_layout);
 +   if (res != VK_SUCCESS)
 +      goto fail;
 +
 +   res = create_pass(device);
 +   if (res != VK_SUCCESS)
 +      goto fail;
 +
 +   for (uint32_t i = 0;
 +        i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
 +
 +      uint32_t sample_count = 1 << (1 + i);
 +      if (!(sample_count_mask & sample_count))
 +         continue;
 +
 +      res = create_pipeline(device, sample_count, vs_module_h);
 +      if (res != VK_SUCCESS)
 +         goto fail;
 +   }
 +
 +   goto cleanup;
 +
 +fail:
 +   anv_device_finish_meta_resolve_state(device);
 +
 +cleanup:
 +   ralloc_free(vs_module.nir);
 +
 +   return res;
 +}
 +
 +static void
 +emit_resolve(struct anv_cmd_buffer *cmd_buffer,
 +             struct anv_image_view *src_iview,
 +             const VkOffset2D *src_offset,
 +             struct anv_image_view *dest_iview,
 +             const VkOffset2D *dest_offset,
 +             const VkExtent2D *resolve_extent)
 +{
 +   struct anv_device *device = cmd_buffer->device;
 +   VkDevice device_h = anv_device_to_handle(device);
 +   VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
 +   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 +   const struct anv_image *src_image = src_iview->image;
 +
 +   const struct vertex_attrs vertex_data[3] = {
 +      {
 +         .vue_header = {0},
 +         .position = {
 +            dest_offset->x + resolve_extent->width,
 +            dest_offset->y + resolve_extent->height,
 +         },
 +         .tex_position = {
 +            src_offset->x + resolve_extent->width,
 +            src_offset->y + resolve_extent->height,
 +         },
 +      },
 +      {
 +         .vue_header = {0},
 +         .position = {
 +            dest_offset->x,
 +            dest_offset->y + resolve_extent->height,
 +         },
 +         .tex_position = {
 +            src_offset->x,
 +            src_offset->y + resolve_extent->height,
 +         },
 +      },
 +      {
 +         .vue_header = {0},
 +         .position = {
 +            dest_offset->x,
 +            dest_offset->y,
 +         },
 +         .tex_position = {
 +            src_offset->x,
 +            src_offset->y,
 +         },
 +      },
 +   };
 +
 +   struct anv_state vertex_mem =
 +      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
 +                                  sizeof(vertex_data), 16);
 +
 +   struct anv_buffer vertex_buffer = {
 +      .device = device,
 +      .size = sizeof(vertex_data),
 +      .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
 +      .offset = vertex_mem.offset,
 +   };
 +
 +   VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
 +
 +   anv_CmdBindVertexBuffers(cmd_buffer_h,
 +      /*firstBinding*/ 0,
 +      /*bindingCount*/ 1,
 +      (VkBuffer[]) { vertex_buffer_h },
 +      (VkDeviceSize[]) { 0 });
 +
 +   VkSampler sampler_h;
 +   ANV_CALL(CreateSampler)(device_h,
 +      &(VkSamplerCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
 +         .magFilter = VK_FILTER_NEAREST,
 +         .minFilter = VK_FILTER_NEAREST,
 +         .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
 +         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
 +         .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
 +         .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
 +         .mipLodBias = 0.0,
 +         .anisotropyEnable = false,
 +         .compareEnable = false,
 +         .minLod = 0.0,
 +         .maxLod = 0.0,
 +         .unnormalizedCoordinates = false,
 +      },
 +      &cmd_buffer->pool->alloc,
 +      &sampler_h);
 +
 +   VkDescriptorPool desc_pool;
 +   anv_CreateDescriptorPool(anv_device_to_handle(device),
 +      &(const VkDescriptorPoolCreateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 +         .pNext = NULL,
 +         .flags = 0,
 +         .maxSets = 1,
 +         .poolSizeCount = 1,
 +         .pPoolSizes = (VkDescriptorPoolSize[]) {
 +            {
 +               .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +               .descriptorCount = 1
 +            },
 +         }
 +      }, &cmd_buffer->pool->alloc, &desc_pool);
 +
 +   VkDescriptorSet desc_set_h;
 +   anv_AllocateDescriptorSets(device_h,
 +      &(VkDescriptorSetAllocateInfo) {
 +         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 +         .descriptorPool = desc_pool,
 +         .descriptorSetCount = 1,
 +         .pSetLayouts = (VkDescriptorSetLayout[]) {
 +            device->meta_state.resolve.ds_layout,
 +         },
 +      },
 +      &desc_set_h);
 +
 +   anv_UpdateDescriptorSets(device_h,
 +      /*writeCount*/ 1,
 +      (VkWriteDescriptorSet[]) {
 +         {
 +            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 +            .dstSet = desc_set_h,
 +            .dstBinding = 0,
 +            .dstArrayElement = 0,
 +            .descriptorCount = 1,
 +            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 +            .pImageInfo = (VkDescriptorImageInfo[]) {
 +               {
 +                  .sampler = sampler_h,
 +                  .imageView = anv_image_view_to_handle(src_iview),
 +                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 +               },
 +            },
 +         },
 +      },
 +      /*copyCount*/ 0,
 +      /*copies */ NULL);
 +
 +   ANV_CALL(CmdSetViewport)(cmd_buffer_h,
 +      /*firstViewport*/ 0,
 +      /*viewportCount*/ 1,
 +      (VkViewport[]) {
 +         {
 +            .x = 0,
 +            .y = 0,
 +            .width = fb->width,
 +            .height = fb->height,
 +            .minDepth = 0.0,
 +            .maxDepth = 1.0,
 +         },
 +      });
 +
 +   ANV_CALL(CmdSetScissor)(cmd_buffer_h,
 +      /*firstScissor*/ 0,
 +      /*scissorCount*/ 1,
 +      (VkRect2D[]) {
 +         {
 +            .offset = { 0, 0 },
 +            .extent = (VkExtent2D) { fb->width, fb->height },
 +         },
 +      });
 +
 +   VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
 +   ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
 +
 +   if (cmd_buffer->state.pipeline != pipeline) {
 +      anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
 +                          pipeline_h);
 +   }
 +
 +   anv_CmdBindDescriptorSets(cmd_buffer_h,
 +      VK_PIPELINE_BIND_POINT_GRAPHICS,
 +      device->meta_state.resolve.pipeline_layout,
 +      /*firstSet*/ 0,
 +      /* setCount */ 1,
 +      (VkDescriptorSet[]) {
 +         desc_set_h,
 +      },
 +      /*copyCount*/ 0,
 +      /*copies */ NULL);
 +
 +   ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
 +
 +   /* All objects below are consumed by the draw call. We may safely destroy
 +    * them.
 +    */
 +   anv_DestroyDescriptorPool(anv_device_to_handle(device),
 +                             desc_pool, &cmd_buffer->pool->alloc);
 +   anv_DestroySampler(device_h, sampler_h,
 +                      &cmd_buffer->pool->alloc);
 +}
 +
 +void anv_CmdResolveImage(
 +    VkCommandBuffer                             cmd_buffer_h,
 +    VkImage                                     src_image_h,
 +    VkImageLayout                               src_image_layout,
 +    VkImage                                     dest_image_h,
 +    VkImageLayout                               dest_image_layout,
 +    uint32_t                                    region_count,
 +    const VkImageResolve*                       regions)
 +{
 +   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
 +   ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
 +   ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
 +   struct anv_device *device = cmd_buffer->device;
 +   struct anv_meta_saved_state state;
 +   VkDevice device_h = anv_device_to_handle(device);
 +
 +   meta_resolve_save(&state, cmd_buffer);
 +
 +   assert(src_image->samples > 1);
 +   assert(dest_image->samples == 1);
 +
 +   if (src_image->samples >= 16) {
 +      /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
 +       * glBlitFramebuffer workaround for samples >= 16.
 +       */
 +      anv_finishme("vkCmdResolveImage: need interpolation workaround when "
 +                   "samples >= 16");
 +   }
 +
 +   if (src_image->array_size > 1)
 +      anv_finishme("vkCmdResolveImage: multisample array images");
 +
 +   for (uint32_t r = 0; r < region_count; ++r) {
 +      const VkImageResolve *region = &regions[r];
 +
 +      /* From the Vulkan 1.0 spec:
 +       *
 +       *    - The aspectMask member of srcSubresource and dstSubresource must
 +       *      only contain VK_IMAGE_ASPECT_COLOR_BIT
 +       *
 +       *    - The layerCount member of srcSubresource and dstSubresource must
 +       *      match
 +       */
 +      assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
 +      assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
 +      assert(region->srcSubresource.layerCount ==
 +             region->dstSubresource.layerCount);
 +
 +      const uint32_t src_base_layer =
 +         anv_meta_get_iview_layer(src_image, &region->srcSubresource,
 +                                  &region->srcOffset);
 +
 +      const uint32_t dest_base_layer =
 +         anv_meta_get_iview_layer(dest_image, &region->dstSubresource,
 +                                  &region->dstOffset);
 +
 +      /**
 +       * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
 +       *
 +       *    extent is the size in texels of the source image to resolve in width,
 +       *    height and depth. 1D images use only x and width. 2D images use x, y,
 +       *    width and height. 3D images use x, y, z, width, height and depth.
 +       *
 +       *    srcOffset and dstOffset select the initial x, y, and z offsets in
 +       *    texels of the sub-regions of the source and destination image data.
 +       *    extent is the size in texels of the source image to resolve in width,
 +       *    height and depth. 1D images use only x and width. 2D images use x, y,
 +       *    width and height. 3D images use x, y, z, width, height and depth.
 +       */
 +      const struct VkExtent3D extent =
 +         anv_sanitize_image_extent(src_image->type, region->extent);
 +      const struct VkOffset3D srcOffset =
 +         anv_sanitize_image_offset(src_image->type, region->srcOffset);
 +      const struct VkOffset3D dstOffset =
 +         anv_sanitize_image_offset(dest_image->type, region->dstOffset);
 +
 +
 +      for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
 +           ++layer) {
 +
 +         struct anv_image_view src_iview;
 +         anv_image_view_init(&src_iview, cmd_buffer->device,
 +            &(VkImageViewCreateInfo) {
 +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 +               .image = src_image_h,
 +               .viewType = anv_meta_get_view_type(src_image),
 +               .format = src_image->format->vk_format,
 +               .subresourceRange = {
 +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 +                  .baseMipLevel = region->srcSubresource.mipLevel,
 +                  .levelCount = 1,
 +                  .baseArrayLayer = src_base_layer + layer,
 +                  .layerCount = 1,
 +               },
 +            },
 +            cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
 +
 +         struct anv_image_view dest_iview;
 +         anv_image_view_init(&dest_iview, cmd_buffer->device,
 +            &(VkImageViewCreateInfo) {
 +               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 +               .image = dest_image_h,
 +               .viewType = anv_meta_get_view_type(dest_image),
 +               .format = dest_image->format->vk_format,
 +               .subresourceRange = {
 +                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 +                  .baseMipLevel = region->dstSubresource.mipLevel,
 +                  .levelCount = 1,
 +                  .baseArrayLayer = dest_base_layer + layer,
 +                  .layerCount = 1,
 +               },
 +            },
 +            cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
 +
 +         VkFramebuffer fb_h;
 +         anv_CreateFramebuffer(device_h,
 +            &(VkFramebufferCreateInfo) {
 +               .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
 +               .attachmentCount = 1,
 +               .pAttachments = (VkImageView[]) {
 +                  anv_image_view_to_handle(&dest_iview),
 +               },
 +               .width = anv_minify(dest_image->extent.width,
 +                                   region->dstSubresource.mipLevel),
 +               .height = anv_minify(dest_image->extent.height,
 +                                    region->dstSubresource.mipLevel),
 +               .layers = 1
 +            },
 +            &cmd_buffer->pool->alloc,
 +            &fb_h);
 +
 +         ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
 +            &(VkRenderPassBeginInfo) {
 +               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
 +               .renderPass = device->meta_state.resolve.pass,
 +               .framebuffer = fb_h,
 +               .renderArea = {
 +                  .offset = {
 +                     dstOffset.x,
 +                     dstOffset.y,
 +                  },
 +                  .extent = {
 +                     extent.width,
 +                     extent.height,
 +                  }
 +               },
 +               .clearValueCount = 0,
 +               .pClearValues = NULL,
 +            },
 +            VK_SUBPASS_CONTENTS_INLINE);
 +
 +         emit_resolve(cmd_buffer,
 +             &src_iview,
 +             &(VkOffset2D) {
 +               .x = srcOffset.x,
 +               .y = srcOffset.y,
 +             },
 +             &dest_iview,
 +             &(VkOffset2D) {
 +               .x = dstOffset.x,
 +               .y = dstOffset.y,
 +             },
 +             &(VkExtent2D) {
 +               .width = extent.width,
 +               .height = extent.height,
 +             });
 +
 +         ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
 +
 +         anv_DestroyFramebuffer(device_h, fb_h,
 +                                &cmd_buffer->pool->alloc);
 +      }
 +   }
 +
 +   meta_resolve_restore(&state, cmd_buffer);
 +}
 +
 +/**
 + * Emit any needed resolves for the current subpass.
 + */
 +void
 +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
 +{
 +   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 +   struct anv_subpass *subpass = cmd_buffer->state.subpass;
 +   struct anv_meta_saved_state saved_state;
 +
 +   /* FINISHME(perf): Skip clears for resolve attachments.
 +    *
 +    * From the Vulkan 1.0 spec:
 +    *
 +    *    If the first use of an attachment in a render pass is as a resolve
 +    *    attachment, then the loadOp is effectively ignored as the resolve is
 +    *    guaranteed to overwrite all pixels in the render area.
 +    */
 +
 +   if (!subpass->has_resolve)
 +      return;
 +
 +   meta_resolve_save(&saved_state, cmd_buffer);
 +
 +   for (uint32_t i = 0; i < subpass->color_count; ++i) {
 +      uint32_t src_att = subpass->color_attachments[i];
 +      uint32_t dest_att = subpass->resolve_attachments[i];
 +
 +      if (dest_att == VK_ATTACHMENT_UNUSED)
 +         continue;
 +
 +      struct anv_image_view *src_iview = fb->attachments[src_att];
 +      struct anv_image_view *dest_iview = fb->attachments[dest_att];
 +
 +      struct anv_subpass resolve_subpass = {
 +         .color_count = 1,
 +         .color_attachments = (uint32_t[]) { dest_att },
 +         .depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
 +      };
 +
 +      anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
 +
 +      /* Subpass resolves must respect the render area. We can ignore the
 +       * render area here because vkCmdBeginRenderPass set the render area
 +       * with 3DSTATE_DRAWING_RECTANGLE.
 +       *
 +       * XXX(chadv): Does the hardware really respect
 +       * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
 +       */
 +      emit_resolve(cmd_buffer,
 +          src_iview,
 +          &(VkOffset2D) { 0, 0 },
 +          dest_iview,
 +          &(VkOffset2D) { 0, 0 },
 +          &(VkExtent2D) { fb->width, fb->height });
 +   }
 +
 +   cmd_buffer->state.subpass = subpass;
 +   meta_resolve_restore(&saved_state, cmd_buffer);
 +}
index 46bc5d2,0000000..234855c
mode 100644,000000..100644
--- /dev/null
@@@ -1,171 -1,0 +1,172 @@@
-       nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include "anv_nir.h"
 +#include "nir/nir_builder.h"
 +
 +struct apply_dynamic_offsets_state {
 +   nir_shader *shader;
 +   nir_builder builder;
 +
 +   const struct anv_pipeline_layout *layout;
 +
 +   uint32_t indices_start;
 +};
 +
 +static bool
 +apply_dynamic_offsets_block(nir_block *block, void *void_state)
 +{
 +   struct apply_dynamic_offsets_state *state = void_state;
 +   struct anv_descriptor_set_layout *set_layout;
 +
 +   nir_builder *b = &state->builder;
 +
 +   nir_foreach_instr_safe(block, instr) {
 +      if (instr->type != nir_instr_type_intrinsic)
 +         continue;
 +
 +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 +
 +      unsigned block_idx_src;
 +      switch (intrin->intrinsic) {
 +      case nir_intrinsic_load_ubo:
 +      case nir_intrinsic_load_ssbo:
 +         block_idx_src = 0;
 +         break;
 +      case nir_intrinsic_store_ssbo:
 +         block_idx_src = 1;
 +         break;
 +      default:
 +         continue; /* the loop */
 +      }
 +
 +      nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
 +      assert(res_instr->type == nir_instr_type_intrinsic);
 +      nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
 +      assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
 +
 +      unsigned set = res_intrin->const_index[0];
 +      unsigned binding = res_intrin->const_index[1];
 +
 +      set_layout = state->layout->set[set].layout;
 +      if (set_layout->binding[binding].dynamic_offset_index < 0)
 +         continue;
 +
 +      b->cursor = nir_before_instr(&intrin->instr);
 +
 +      /* First, we need to generate the uniform load for the buffer offset */
 +      uint32_t index = state->layout->set[set].dynamic_offset_start +
 +                       set_layout->binding[binding].dynamic_offset_index;
 +
 +      nir_intrinsic_instr *offset_load =
 +         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
 +      offset_load->num_components = 2;
 +      offset_load->const_index[0] = state->indices_start + index * 8;
 +      offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
 +                                                     nir_imm_int(b, 8)));
 +
-                            intrin->num_components, NULL);
++      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
 +      nir_builder_instr_insert(b, &offset_load->instr);
 +
 +      nir_src *offset_src = nir_get_io_offset_src(intrin);
 +      nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
 +                                         &offset_load->dest.ssa);
 +
 +      /* In order to avoid out-of-bounds access, we predicate */
 +      nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
 +                                  offset_src->ssa);
 +      nir_if *if_stmt = nir_if_create(b->shader);
 +      if_stmt->condition = nir_src_for_ssa(pred);
 +      nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
 +
 +      nir_instr_remove(&intrin->instr);
 +      *offset_src = nir_src_for_ssa(new_offset);
 +      nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
 +
 +      if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
 +         /* It's a load, we need a phi node */
 +         nir_phi_instr *phi = nir_phi_instr_create(b->shader);
 +         nir_ssa_dest_init(&phi->instr, &phi->dest,
-             (nir_const_value) { .u = { 0, 0, 0, 0 } });
++                           intrin->num_components,
++                           intrin->dest.ssa.bit_size, NULL);
 +
 +         nir_phi_src *src1 = ralloc(phi, nir_phi_src);
 +         struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
 +         src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
 +         src1->src = nir_src_for_ssa(&intrin->dest.ssa);
 +         exec_list_push_tail(&phi->srcs, &src1->node);
 +
 +         b->cursor = nir_after_cf_list(&if_stmt->else_list);
 +         nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
++            (nir_const_value) { .u32 = { 0, 0, 0, 0 } });
 +
 +         nir_phi_src *src2 = ralloc(phi, nir_phi_src);
 +         struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
 +         src2->pred = exec_node_data(nir_block, enode, cf_node.node);
 +         src2->src = nir_src_for_ssa(zero);
 +         exec_list_push_tail(&phi->srcs, &src2->node);
 +
 +         assert(intrin->dest.is_ssa);
 +         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
 +                                  nir_src_for_ssa(&phi->dest.ssa));
 +
 +         nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
 +      }
 +   }
 +
 +   return true;
 +}
 +
 +void
 +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
 +                              nir_shader *shader,
 +                              struct brw_stage_prog_data *prog_data)
 +{
 +   struct apply_dynamic_offsets_state state = {
 +      .shader = shader,
 +      .layout = pipeline->layout,
 +      .indices_start = shader->num_uniforms,
 +   };
 +
 +   if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
 +      return;
 +
 +   nir_foreach_function(shader, function) {
 +      if (function->impl) {
 +         nir_builder_init(&state.builder, function->impl);
 +         nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
 +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
 +                                               nir_metadata_dominance);
 +      }
 +   }
 +
 +   struct anv_push_constants *null_data = NULL;
 +   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
 +      prog_data->param[i * 2 + shader->num_uniforms / 4] =
 +         (const union gl_constant_value *)&null_data->dynamic[i].offset;
 +      prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] =
 +         (const union gl_constant_value *)&null_data->dynamic[i].range;
 +   }
 +
 +   shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
 +}
index eeb9b97,0000000..ef81afa
mode 100644,000000..100644
--- /dev/null
@@@ -1,387 -1,0 +1,387 @@@
-       block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
 +/*
 + * Copyright Â© 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + */
 +
 +#include "anv_nir.h"
 +#include "program/prog_parameter.h"
 +#include "nir/nir_builder.h"
 +
 +struct apply_pipeline_layout_state {
 +   nir_shader *shader;
 +   nir_builder builder;
 +
 +   struct {
 +      BITSET_WORD *used;
 +      uint8_t *surface_offsets;
 +      uint8_t *sampler_offsets;
 +      uint8_t *image_offsets;
 +   } set[MAX_SETS];
 +};
 +
 +static void
 +add_binding(struct apply_pipeline_layout_state *state,
 +            uint32_t set, uint32_t binding)
 +{
 +   BITSET_SET(state->set[set].used, binding);
 +}
 +
 +static void
 +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
 +{
 +   add_binding(state, var->data.descriptor_set, var->data.binding);
 +}
 +
 +static bool
 +get_used_bindings_block(nir_block *block, void *void_state)
 +{
 +   struct apply_pipeline_layout_state *state = void_state;
 +
 +   nir_foreach_instr_safe(block, instr) {
 +      switch (instr->type) {
 +      case nir_instr_type_intrinsic: {
 +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 +         switch (intrin->intrinsic) {
 +         case nir_intrinsic_vulkan_resource_index:
 +            add_binding(state, nir_intrinsic_desc_set(intrin),
 +                        nir_intrinsic_binding(intrin));
 +            break;
 +
 +         case nir_intrinsic_image_load:
 +         case nir_intrinsic_image_store:
 +         case nir_intrinsic_image_atomic_add:
 +         case nir_intrinsic_image_atomic_min:
 +         case nir_intrinsic_image_atomic_max:
 +         case nir_intrinsic_image_atomic_and:
 +         case nir_intrinsic_image_atomic_or:
 +         case nir_intrinsic_image_atomic_xor:
 +         case nir_intrinsic_image_atomic_exchange:
 +         case nir_intrinsic_image_atomic_comp_swap:
 +         case nir_intrinsic_image_size:
 +         case nir_intrinsic_image_samples:
 +            add_var_binding(state, intrin->variables[0]->var);
 +            break;
 +
 +         default:
 +            break;
 +         }
 +         break;
 +      }
 +      case nir_instr_type_tex: {
 +         nir_tex_instr *tex = nir_instr_as_tex(instr);
 +         assert(tex->texture);
 +         add_var_binding(state, tex->texture->var);
 +         if (tex->sampler)
 +            add_var_binding(state, tex->sampler->var);
 +         break;
 +      }
 +      default:
 +         continue;
 +      }
 +   }
 +
 +   return true;
 +}
 +
 +static void
 +lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
 +                          struct apply_pipeline_layout_state *state)
 +{
 +   nir_builder *b = &state->builder;
 +
 +   b->cursor = nir_before_instr(&intrin->instr);
 +
 +   uint32_t set = nir_intrinsic_desc_set(intrin);
 +   uint32_t binding = nir_intrinsic_binding(intrin);
 +
 +   uint32_t surface_index = state->set[set].surface_offsets[binding];
 +
 +   nir_const_value *const_block_idx =
 +      nir_src_as_const_value(intrin->src[0]);
 +
 +   nir_ssa_def *block_index;
 +   if (const_block_idx) {
++      block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
 +   } else {
 +      block_index = nir_iadd(b, nir_imm_int(b, surface_index),
 +                             nir_ssa_for_src(b, intrin->src[0], 1));
 +   }
 +
 +   assert(intrin->dest.is_ssa);
 +   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
 +   nir_instr_remove(&intrin->instr);
 +}
 +
 +static void
 +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
 +                unsigned *const_index, nir_tex_src_type src_type,
 +                struct apply_pipeline_layout_state *state)
 +{
 +   if (deref->deref.child) {
 +      assert(deref->deref.child->deref_type == nir_deref_type_array);
 +      nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
 +
 +      *const_index += deref_array->base_offset;
 +
 +      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
 +         nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
 +                                               tex->num_srcs + 1);
 +
 +         for (unsigned i = 0; i < tex->num_srcs; i++) {
 +            new_srcs[i].src_type = tex->src[i].src_type;
 +            nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
 +         }
 +
 +         ralloc_free(tex->src);
 +         tex->src = new_srcs;
 +
 +         /* Now we can go ahead and move the source over to being a
 +          * first-class texture source.
 +          */
 +         tex->src[tex->num_srcs].src_type = src_type;
 +         tex->num_srcs++;
 +         assert(deref_array->indirect.is_ssa);
 +         nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
 +                               deref_array->indirect);
 +      }
 +   }
 +}
 +
 +static void
 +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
 +{
 +   if (deref->deref.child == NULL)
 +      return;
 +
 +   nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
 +
 +   if (deref_array->deref_array_type != nir_deref_array_type_indirect)
 +      return;
 +
 +   nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
 +}
 +
 +static void
 +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
 +{
 +   /* No one should have come by and lowered it already */
 +   assert(tex->texture);
 +
 +   unsigned set = tex->texture->var->data.descriptor_set;
 +   unsigned binding = tex->texture->var->data.binding;
 +   tex->texture_index = state->set[set].surface_offsets[binding];
 +   lower_tex_deref(tex, tex->texture, &tex->texture_index,
 +                   nir_tex_src_texture_offset, state);
 +
 +   if (tex->sampler) {
 +      unsigned set = tex->sampler->var->data.descriptor_set;
 +      unsigned binding = tex->sampler->var->data.binding;
 +      tex->sampler_index = state->set[set].sampler_offsets[binding];
 +      lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
 +                      nir_tex_src_sampler_offset, state);
 +   }
 +
 +   /* The backend only ever uses this to mark used surfaces.  We don't care
 +    * about that little optimization so it just needs to be non-zero.
 +    */
 +   tex->texture_array_size = 1;
 +
 +   cleanup_tex_deref(tex, tex->texture);
 +   if (tex->sampler)
 +      cleanup_tex_deref(tex, tex->sampler);
 +   tex->texture = NULL;
 +   tex->sampler = NULL;
 +}
 +
 +static bool
 +apply_pipeline_layout_block(nir_block *block, void *void_state)
 +{
 +   struct apply_pipeline_layout_state *state = void_state;
 +
 +   nir_foreach_instr_safe(block, instr) {
 +      switch (instr->type) {
 +      case nir_instr_type_intrinsic: {
 +         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 +         if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
 +            lower_res_index_intrinsic(intrin, state);
 +         }
 +         break;
 +      }
 +      case nir_instr_type_tex:
 +         lower_tex(nir_instr_as_tex(instr), state);
 +         break;
 +      default:
 +         continue;
 +      }
 +   }
 +
 +   return true;
 +}
 +
 +static void
 +setup_vec4_uniform_value(const union gl_constant_value **params,
 +                         const union gl_constant_value *values,
 +                         unsigned n)
 +{
 +   static const gl_constant_value zero = { 0 };
 +
 +   for (unsigned i = 0; i < n; ++i)
 +      params[i] = &values[i];
 +
 +   for (unsigned i = n; i < 4; ++i)
 +      params[i] = &zero;
 +}
 +
 +void
 +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
 +                              nir_shader *shader,
 +                              struct brw_stage_prog_data *prog_data,
 +                              struct anv_pipeline_bind_map *map)
 +{
 +   struct anv_pipeline_layout *layout = pipeline->layout;
 +
 +   struct apply_pipeline_layout_state state = {
 +      .shader = shader,
 +   };
 +
 +   void *mem_ctx = ralloc_context(NULL);
 +
 +   for (unsigned s = 0; s < layout->num_sets; s++) {
 +      const unsigned count = layout->set[s].layout->binding_count;
 +      const unsigned words = BITSET_WORDS(count);
 +      state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
 +      state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
 +      state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
 +      state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
 +   }
 +
 +   nir_foreach_function(shader, function) {
 +      if (function->impl)
 +         nir_foreach_block(function->impl, get_used_bindings_block, &state);
 +   }
 +
 +   for (uint32_t set = 0; set < layout->num_sets; set++) {
 +      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 +
 +      BITSET_WORD b, _tmp;
 +      BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
 +                         set_layout->binding_count) {
 +         if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
 +            map->surface_count += set_layout->binding[b].array_size;
 +         if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
 +            map->sampler_count += set_layout->binding[b].array_size;
 +         if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
 +            map->image_count += set_layout->binding[b].array_size;
 +      }
 +   }
 +
 +   unsigned surface = 0;
 +   unsigned sampler = 0;
 +   unsigned image = 0;
 +   for (uint32_t set = 0; set < layout->num_sets; set++) {
 +      struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 +
 +      BITSET_WORD b, _tmp;
 +      BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
 +                         set_layout->binding_count) {
 +         unsigned array_size = set_layout->binding[b].array_size;
 +         unsigned set_offset = set_layout->binding[b].descriptor_index;
 +
 +         if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
 +            state.set[set].surface_offsets[b] = surface;
 +            for (unsigned i = 0; i < array_size; i++) {
 +               map->surface_to_descriptor[surface + i].set = set;
 +               map->surface_to_descriptor[surface + i].offset = set_offset + i;
 +            }
 +            surface += array_size;
 +         }
 +
 +         if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
 +            state.set[set].sampler_offsets[b] = sampler;
 +            for (unsigned i = 0; i < array_size; i++) {
 +               map->sampler_to_descriptor[sampler + i].set = set;
 +               map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
 +            }
 +            sampler += array_size;
 +         }
 +
 +         if (set_layout->binding[b].stage[shader->stage].image_index >= 0) {
 +            state.set[set].image_offsets[b] = image;
 +            image += array_size;
 +         }
 +      }
 +   }
 +
 +   nir_foreach_function(shader, function) {
 +      if (function->impl) {
 +         nir_builder_init(&state.builder, function->impl);
 +         nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
 +         nir_metadata_preserve(function->impl, nir_metadata_block_index |
 +                                               nir_metadata_dominance);
 +      }
 +   }
 +
 +   if (map->image_count > 0) {
 +      assert(map->image_count <= MAX_IMAGES);
 +      nir_foreach_variable(var, &shader->uniforms) {
 +         if (glsl_type_is_image(var->type) ||
 +             (glsl_type_is_array(var->type) &&
 +              glsl_type_is_image(glsl_get_array_element(var->type)))) {
 +            /* Images are represented as uniform push constants and the actual
 +             * information required for reading/writing to/from the image is
 +             * storred in the uniform.
 +             */
 +            unsigned set = var->data.descriptor_set;
 +            unsigned binding = var->data.binding;
 +            unsigned image_index = state.set[set].image_offsets[binding];
 +
 +            var->data.driver_location = shader->num_uniforms +
 +                                        image_index * BRW_IMAGE_PARAM_SIZE * 4;
 +         }
 +      }
 +
 +      struct anv_push_constants *null_data = NULL;
 +      const gl_constant_value **param =
 +         prog_data->param + (shader->num_uniforms / 4);
 +      const struct brw_image_param *image_param = null_data->images;
 +      for (uint32_t i = 0; i < map->image_count; i++) {
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
 +            (const union gl_constant_value *)&image_param->surface_idx, 1);
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
 +            (const union gl_constant_value *)image_param->offset, 2);
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
 +            (const union gl_constant_value *)image_param->size, 3);
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
 +            (const union gl_constant_value *)image_param->stride, 4);
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
 +            (const union gl_constant_value *)image_param->tiling, 3);
 +         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
 +            (const union gl_constant_value *)image_param->swizzling, 2);
 +
 +         param += BRW_IMAGE_PARAM_SIZE;
 +         image_param ++;
 +      }
 +
 +      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
 +   }
 +
 +   ralloc_free(mem_ctx);
 +}
Simple merge
@@@ -2411,82 -2328,6 +2411,82 @@@ fs_visitor::nir_emit_cs_intrinsic(cons
        nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
        break;
  
-          offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
 +   case nir_intrinsic_load_shared: {
 +      assert(devinfo->gen >= 7);
 +
 +      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
 +
 +      /* Get the offset to read from */
 +      fs_reg offset_reg;
 +      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
 +      if (const_offset) {
-             offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
++         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
 +      } else {
 +         offset_reg = vgrf(glsl_type::uint_type);
 +         bld.ADD(offset_reg,
 +                 retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
 +                 brw_imm_ud(instr->const_index[0]));
 +      }
 +
 +      /* Read the vector */
 +      fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
 +                                             1 /* dims */,
 +                                             instr->num_components,
 +                                             BRW_PREDICATE_NONE);
 +      read_result.type = dest.type;
 +      for (int i = 0; i < instr->num_components; i++)
 +         bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
 +
 +      break;
 +   }
 +
 +   case nir_intrinsic_store_shared: {
 +      assert(devinfo->gen >= 7);
 +
 +      /* Block index */
 +      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
 +
 +      /* Value */
 +      fs_reg val_reg = get_nir_src(instr->src[0]);
 +
 +      /* Writemask */
 +      unsigned writemask = instr->const_index[1];
 +
 +      /* Combine groups of consecutive enabled channels in one write
 +       * message. We use ffs to find the first enabled channel and then ffs on
 +       * the bit-inverse, down-shifted writemask to determine the length of
 +       * the block of enabled bits.
 +       */
 +      while (writemask) {
 +         unsigned first_component = ffs(writemask) - 1;
 +         unsigned length = ffs(~(writemask >> first_component)) - 1;
 +         fs_reg offset_reg;
 +
 +         nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
 +         if (const_offset) {
++            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
 +                                    4 * first_component);
 +         } else {
 +            offset_reg = vgrf(glsl_type::uint_type);
 +            bld.ADD(offset_reg,
 +                    retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
 +                    brw_imm_ud(instr->const_index[0] + 4 * first_component));
 +         }
 +
 +         emit_untyped_write(bld, surf_index, offset_reg,
 +                            offset(val_reg, bld, first_component),
 +                            1 /* dims */, length,
 +                            BRW_PREDICATE_NONE);
 +
 +         /* Clear the bits in the writemask that we just wrote, then try
 +          * again to see if more channels are left.
 +          */
 +         writemask &= (15 << (first_component + length));
 +      }
 +
 +      break;
 +   }
 +
     default:
        nir_emit_intrinsic(bld, instr);
        break;
@@@ -2695,30 -2536,14 +2695,30 @@@ fs_visitor::nir_emit_intrinsic(const fs
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        if (const_offset) {
           /* Offsets are in bytes but they should always be multiples of 4 */
-          assert(const_offset->u[0] % 4 == 0);
-          src.reg_offset = const_offset->u[0] / 4;
+          assert(const_offset->u32[0] % 4 == 0);
+          src.reg_offset = const_offset->u32[0] / 4;
 +
 +         for (unsigned j = 0; j < instr->num_components; j++) {
 +            bld.MOV(offset(dest, bld, j), offset(src, bld, j));
 +         }
        } else {
 -         src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
 -      }
 +         fs_reg indirect = retype(get_nir_src(instr->src[0]),
 +                                  BRW_REGISTER_TYPE_UD);
  
 -      for (unsigned j = 0; j < instr->num_components; j++) {
 -         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
 +         /* We need to pass a size to the MOV_INDIRECT but we don't want it to
 +          * go past the end of the uniform.  In order to keep the n'th
 +          * component from running past, we subtract off the size of all but
 +          * one component of the vector.
 +          */
 +         assert(instr->const_index[1] >= instr->num_components * 4);
 +         unsigned read_size = instr->const_index[1] -
 +                              (instr->num_components - 1) * 4;
 +
 +         for (unsigned j = 0; j < instr->num_components; j++) {
 +            bld.emit(SHADER_OPCODE_MOV_INDIRECT,
 +                     offset(dest, bld, j), offset(src, bld, j),
 +                     indirect, brw_imm_ud(read_size));
 +         }
        }
        break;
     }
Simple merge
@@@ -697,16 -706,14 +697,16 @@@ vec4_visitor::nir_emit_intrinsic(nir_in
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        if (const_offset) {
           /* Offsets are in bytes but they should always be multiples of 16 */
-          assert(const_offset->u[0] % 16 == 0);
-          src.reg_offset = const_offset->u[0] / 16;
+          assert(const_offset->u32[0] % 16 == 0);
+          src.reg_offset = const_offset->u32[0] / 16;
 +
 +         emit(MOV(dest, src));
        } else {
 -         src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
 -         src.reladdr = new(mem_ctx) src_reg(tmp);
 -      }
 +         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
  
 -      emit(MOV(dest, src));
 +         emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
 +              indirect, brw_imm_ud(instr->const_index[1]));
 +      }
        break;
     }
  
Simple merge