b->cursor = nir_after_instr(&load_var->instr);
- nir_intrinsic_op sysval_op =
- nir_intrinsic_from_system_value(var->data.location);
- nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+ nir_ssa_def *sysval;
+ switch (var->data.location) {
+ case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
+ /* From the GLSL man page for gl_GlobalInvocationID:
+ *
+ * "The value of gl_GlobalInvocationID is equal to
+ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
+ */
+
+ nir_const_value local_size;
- local_size.u[0] = b->shader->info.cs.local_size[0];
- local_size.u[1] = b->shader->info.cs.local_size[1];
- local_size.u[2] = b->shader->info.cs.local_size[2];
++ local_size.u32[0] = b->shader->info.cs.local_size[0];
++ local_size.u32[1] = b->shader->info.cs.local_size[1];
++ local_size.u32[2] = b->shader->info.cs.local_size[2];
+
+ nir_ssa_def *group_id =
+ nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
+ nir_ssa_def *local_id =
+ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+
+ sysval = nir_iadd(b, nir_imul(b, group_id,
+ nir_build_imm(b, 3, local_size)),
+ local_id);
+ break;
+ }
+
+ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
+ /* From the GLSL man page for gl_LocalInvocationIndex:
+ *
+ * ?The value of gl_LocalInvocationIndex is equal to
+ * gl_LocalInvocationID.z * gl_WorkGroupSize.x *
+ * gl_WorkGroupSize.y + gl_LocalInvocationID.y *
+ * gl_WorkGroupSize.x + gl_LocalInvocationID.x"
+ */
+ nir_ssa_def *local_id =
+ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0);
+
+ unsigned stride_y = b->shader->info.cs.local_size[0];
+ unsigned stride_z = b->shader->info.cs.local_size[0] *
+ b->shader->info.cs.local_size[1];
+
+ sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2),
+ nir_imm_int(b, stride_z)),
+ nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1),
+ nir_imm_int(b, stride_y)),
+ nir_channel(b, local_id, 0)));
+ break;
+ }
+
+ case SYSTEM_VALUE_VERTEX_ID:
+ if (b->shader->options->vertex_id_zero_based) {
+ sysval = nir_iadd(b,
+ nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0),
+ nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0));
+ } else {
+ sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0);
+ }
+ break;
+
+ case SYSTEM_VALUE_INSTANCE_INDEX:
+ sysval = nir_iadd(b,
+ nir_load_system_value(b, nir_intrinsic_load_instance_id, 0),
+ nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
+ break;
+
+ default: {
+ nir_intrinsic_op sysval_op =
+ nir_intrinsic_from_system_value(var->data.location);
+ sysval = nir_load_system_value(b, sysval_op, 0);
+ break;
+ } /* default */
+ }
nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
nir_instr_remove(&load_var->instr);
# Unary floating-point rounding operations.
- unop("ftrunc", tfloat, "truncf(src0)")
- unop("fceil", tfloat, "ceilf(src0)")
- unop("ffloor", tfloat, "floorf(src0)")
- unop("ffract", tfloat, "src0 - floorf(src0)")
- unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+ unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+ unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+ unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+ unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+ unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
+unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
# Trigonometric operations.
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
- (('ffma', 0.0, a, b), b),
- (('ffma', a, 0.0, b), b),
- (('ffma', a, b, 0.0), ('fmul', a, b)),
+ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+ (('~ffma', 0.0, a, b), b),
+ (('~ffma', a, 0.0, b), b),
+ (('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma', a, 1.0, b), ('fadd', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
- (('flrp', a, b, 0.0), a),
- (('flrp', a, b, 1.0), b),
- (('flrp', a, a, b), a),
- (('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, 0.0), a),
+ (('~flrp', a, b, 1.0), b),
+ (('~flrp', a, a, b), a),
+ (('~flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
- (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
- (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
- (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
- (('inot', ('flt', a, b)), ('fge', a, b)),
- (('inot', ('fge', a, b)), ('flt', a, b)),
- (('inot', ('feq', a, b)), ('fne', a, b)),
- (('inot', ('fne', a, b)), ('feq', a, b)),
+ (('~inot', ('flt', a, b)), ('fge', a, b)),
+ (('~inot', ('fge', a, b)), ('flt', a, b)),
+ (('~inot', ('feq', a, b)), ('fne', a, b)),
+ (('~inot', ('fne', a, b)), ('feq', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
(('inot', ('ieq', a, b)), ('ine', a, b)),
--- /dev/null
- load->value.u[i] = constant->value.u[i];
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "vtn_private.h"
+#include "nir/nir_vla.h"
+#include "nir/nir_control_flow.h"
+#include "nir/nir_constant_expressions.h"
+
+static struct vtn_ssa_value *
+vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
+{
+ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ val->type = type;
+
+ if (glsl_type_is_vector_or_scalar(type)) {
+ unsigned num_components = glsl_get_vector_elements(val->type);
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(b->shader, num_components);
+
+ nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr);
+ val->def = &undef->def;
+ } else {
+ unsigned elems = glsl_get_length(val->type);
+ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ if (glsl_type_is_matrix(type)) {
+ const struct glsl_type *elem_type =
+ glsl_vector_type(glsl_get_base_type(type),
+ glsl_get_vector_elements(type));
+
+ for (unsigned i = 0; i < elems; i++)
+ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ } else if (glsl_type_is_array(type)) {
+ const struct glsl_type *elem_type = glsl_get_array_element(type);
+ for (unsigned i = 0; i < elems; i++)
+ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ } else {
+ for (unsigned i = 0; i < elems; i++) {
+ const struct glsl_type *elem_type = glsl_get_struct_field(type, i);
+ val->elems[i] = vtn_undef_ssa_value(b, elem_type);
+ }
+ }
+ }
+
+ return val;
+}
+
+static struct vtn_ssa_value *
+vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
+ const struct glsl_type *type)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant);
+
+ if (entry)
+ return entry->data;
+
+ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ val->type = type;
+
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ if (glsl_type_is_vector_or_scalar(type)) {
+ unsigned num_components = glsl_get_vector_elements(val->type);
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(b->shader, num_components);
+
+ for (unsigned i = 0; i < num_components; i++)
- load->value.u[j] = constant->value.u[rows * i + j];
++ load->value.u32[i] = constant->value.u[i];
+
+ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+ val->def = &load->def;
+ } else {
+ assert(glsl_type_is_matrix(type));
+ unsigned rows = glsl_get_vector_elements(val->type);
+ unsigned columns = glsl_get_matrix_columns(val->type);
+ val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
+
+ for (unsigned i = 0; i < columns; i++) {
+ struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
+ col_val->type = glsl_get_column_type(val->type);
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(b->shader, rows);
+
+ for (unsigned j = 0; j < rows; j++)
- src[j].u[k] = c->value.u[k];
++ load->value.u32[j] = constant->value.u[rows * i + j];
+
+ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
+ col_val->def = &load->def;
+
+ val->elems[i] = col_val;
+ }
+ }
+ break;
+
+ case GLSL_TYPE_ARRAY: {
+ unsigned elems = glsl_get_length(val->type);
+ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ const struct glsl_type *elem_type = glsl_get_array_element(val->type);
+ for (unsigned i = 0; i < elems; i++)
+ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+ elem_type);
+ break;
+ }
+
+ case GLSL_TYPE_STRUCT: {
+ unsigned elems = glsl_get_length(val->type);
+ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ for (unsigned i = 0; i < elems; i++) {
+ const struct glsl_type *elem_type =
+ glsl_get_struct_field(val->type, i);
+ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
+ elem_type);
+ }
+ break;
+ }
+
+ default:
+ unreachable("bad constant type");
+ }
+
+ return val;
+}
+
+struct vtn_ssa_value *
+vtn_ssa_value(struct vtn_builder *b, uint32_t value_id)
+{
+ struct vtn_value *val = vtn_untyped_value(b, value_id);
+ switch (val->value_type) {
+ case vtn_value_type_undef:
+ return vtn_undef_ssa_value(b, val->type->type);
+
+ case vtn_value_type_constant:
+ return vtn_const_ssa_value(b, val->constant, val->const_type);
+
+ case vtn_value_type_ssa:
+ return val->ssa;
+
+ case vtn_value_type_access_chain:
+ /* This is needed for function parameters */
+ return vtn_variable_load(b, val->access_chain);
+
+ default:
+ unreachable("Invalid type for an SSA value");
+ }
+}
+
+static char *
+vtn_string_literal(struct vtn_builder *b, const uint32_t *words,
+ unsigned word_count, unsigned *words_used)
+{
+ char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words));
+ if (words_used) {
+ /* Ammount of space taken by the string (including the null) */
+ unsigned len = strlen(dup) + 1;
+ *words_used = DIV_ROUND_UP(len, sizeof(*words));
+ }
+ return dup;
+}
+
+const uint32_t *
+vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
+ const uint32_t *end, vtn_instruction_handler handler)
+{
+ b->file = NULL;
+ b->line = -1;
+ b->col = -1;
+
+ const uint32_t *w = start;
+ while (w < end) {
+ SpvOp opcode = w[0] & SpvOpCodeMask;
+ unsigned count = w[0] >> SpvWordCountShift;
+ assert(count >= 1 && w + count <= end);
+
+ switch (opcode) {
+ case SpvOpNop:
+ break; /* Do nothing */
+
+ case SpvOpLine:
+ b->file = vtn_value(b, w[1], vtn_value_type_string)->str;
+ b->line = w[2];
+ b->col = w[3];
+ break;
+
+ case SpvOpNoLine:
+ b->file = NULL;
+ b->line = -1;
+ b->col = -1;
+ break;
+
+ default:
+ if (!handler(b, opcode, w, count))
+ return w;
+ break;
+ }
+
+ w += count;
+ }
+ assert(w == end);
+ return w;
+}
+
+static void
+vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch (opcode) {
+ case SpvOpExtInstImport: {
+ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension);
+ if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
+ val->ext_handler = vtn_handle_glsl450_instruction;
+ } else {
+ assert(!"Unsupported extension");
+ }
+ break;
+ }
+
+ case SpvOpExtInst: {
+ struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension);
+ bool handled = val->ext_handler(b, w[4], w, count);
+ (void)handled;
+ assert(handled);
+ break;
+ }
+
+ default:
+ unreachable("Unhandled opcode");
+ }
+}
+
+static void
+_foreach_decoration_helper(struct vtn_builder *b,
+ struct vtn_value *base_value,
+ int parent_member,
+ struct vtn_value *value,
+ vtn_decoration_foreach_cb cb, void *data)
+{
+ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
+ int member;
+ if (dec->scope == VTN_DEC_DECORATION) {
+ member = parent_member;
+ } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) {
+ assert(parent_member == -1);
+ member = dec->scope - VTN_DEC_STRUCT_MEMBER0;
+ } else {
+ /* Not a decoration */
+ continue;
+ }
+
+ if (dec->group) {
+ assert(dec->group->value_type == vtn_value_type_decoration_group);
+ _foreach_decoration_helper(b, base_value, member, dec->group,
+ cb, data);
+ } else {
+ cb(b, base_value, member, dec, data);
+ }
+ }
+}
+
+/** Iterates (recursively if needed) over all of the decorations on a value
+ *
+ * This function iterates over all of the decorations applied to a given
+ * value. If it encounters a decoration group, it recurses into the group
+ * and iterates over all of those decorations as well.
+ */
+void
+vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
+ vtn_decoration_foreach_cb cb, void *data)
+{
+ _foreach_decoration_helper(b, value, -1, value, cb, data);
+}
+
+void
+vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
+ vtn_execution_mode_foreach_cb cb, void *data)
+{
+ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) {
+ if (dec->scope != VTN_DEC_EXECUTION_MODE)
+ continue;
+
+ assert(dec->group == NULL);
+ cb(b, value, dec, data);
+ }
+}
+
+static void
+vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ const uint32_t *w_end = w + count;
+ const uint32_t target = w[1];
+ w += 2;
+
+ switch (opcode) {
+ case SpvOpDecorationGroup:
+ vtn_push_value(b, target, vtn_value_type_decoration_group);
+ break;
+
+ case SpvOpDecorate:
+ case SpvOpMemberDecorate:
+ case SpvOpExecutionMode: {
+ struct vtn_value *val = &b->values[target];
+
+ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
+ switch (opcode) {
+ case SpvOpDecorate:
+ dec->scope = VTN_DEC_DECORATION;
+ break;
+ case SpvOpMemberDecorate:
+ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++);
+ break;
+ case SpvOpExecutionMode:
+ dec->scope = VTN_DEC_EXECUTION_MODE;
+ break;
+ default:
+ unreachable("Invalid decoration opcode");
+ }
+ dec->decoration = *(w++);
+ dec->literals = w;
+
+ /* Link into the list */
+ dec->next = val->decoration;
+ val->decoration = dec;
+ break;
+ }
+
+ case SpvOpGroupMemberDecorate:
+ case SpvOpGroupDecorate: {
+ struct vtn_value *group =
+ vtn_value(b, target, vtn_value_type_decoration_group);
+
+ for (; w < w_end; w++) {
+ struct vtn_value *val = vtn_untyped_value(b, *w);
+ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration);
+
+ dec->group = group;
+ if (opcode == SpvOpGroupDecorate) {
+ dec->scope = VTN_DEC_DECORATION;
+ } else {
+ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w);
+ }
+
+ /* Link into the list */
+ dec->next = val->decoration;
+ val->decoration = dec;
+ }
+ break;
+ }
+
+ default:
+ unreachable("Unhandled opcode");
+ }
+}
+
+struct member_decoration_ctx {
+ unsigned num_fields;
+ struct glsl_struct_field *fields;
+ struct vtn_type *type;
+};
+
+/* does a shallow copy of a vtn_type */
+
+static struct vtn_type *
+vtn_type_copy(struct vtn_builder *b, struct vtn_type *src)
+{
+ struct vtn_type *dest = ralloc(b, struct vtn_type);
+ dest->type = src->type;
+ dest->is_builtin = src->is_builtin;
+ if (src->is_builtin)
+ dest->builtin = src->builtin;
+
+ if (!glsl_type_is_scalar(src->type)) {
+ switch (glsl_get_base_type(src->type)) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_ARRAY:
+ dest->row_major = src->row_major;
+ dest->stride = src->stride;
+ dest->array_element = src->array_element;
+ break;
+
+ case GLSL_TYPE_STRUCT: {
+ unsigned elems = glsl_get_length(src->type);
+
+ dest->members = ralloc_array(b, struct vtn_type *, elems);
+ memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *));
+
+ dest->offsets = ralloc_array(b, unsigned, elems);
+ memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned));
+ break;
+ }
+
+ default:
+ unreachable("unhandled type");
+ }
+ }
+
+ return dest;
+}
+
+static struct vtn_type *
+mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member)
+{
+ type->members[member] = vtn_type_copy(b, type->members[member]);
+ type = type->members[member];
+
+ /* We may have an array of matrices.... Oh, joy! */
+ while (glsl_type_is_array(type->type)) {
+ type->array_element = vtn_type_copy(b, type->array_element);
+ type = type->array_element;
+ }
+
+ assert(glsl_type_is_matrix(type->type));
+
+ return type;
+}
+
+static void
+struct_member_decoration_cb(struct vtn_builder *b,
+ struct vtn_value *val, int member,
+ const struct vtn_decoration *dec, void *void_ctx)
+{
+ struct member_decoration_ctx *ctx = void_ctx;
+
+ if (member < 0)
+ return;
+
+ assert(member < ctx->num_fields);
+
+ switch (dec->decoration) {
+ case SpvDecorationRelaxedPrecision:
+ break; /* FIXME: Do nothing with this for now. */
+ case SpvDecorationNoPerspective:
+ ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ break;
+ case SpvDecorationFlat:
+ ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT;
+ break;
+ case SpvDecorationCentroid:
+ ctx->fields[member].centroid = true;
+ break;
+ case SpvDecorationSample:
+ ctx->fields[member].sample = true;
+ break;
+ case SpvDecorationLocation:
+ ctx->fields[member].location = dec->literals[0];
+ break;
+ case SpvDecorationBuiltIn:
+ ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]);
+ ctx->type->members[member]->is_builtin = true;
+ ctx->type->members[member]->builtin = dec->literals[0];
+ ctx->type->builtin_block = true;
+ break;
+ case SpvDecorationOffset:
+ ctx->type->offsets[member] = dec->literals[0];
+ break;
+ case SpvDecorationMatrixStride:
+ mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0];
+ break;
+ case SpvDecorationColMajor:
+ break; /* Nothing to do here. Column-major is the default. */
+ case SpvDecorationRowMajor:
+ mutable_matrix_member(b, ctx->type, member)->row_major = true;
+ break;
+ default:
+ unreachable("Unhandled member decoration");
+ }
+}
+
+static void
+type_decoration_cb(struct vtn_builder *b,
+ struct vtn_value *val, int member,
+ const struct vtn_decoration *dec, void *ctx)
+{
+ struct vtn_type *type = val->type;
+
+ if (member != -1)
+ return;
+
+ switch (dec->decoration) {
+ case SpvDecorationArrayStride:
+ type->stride = dec->literals[0];
+ break;
+ case SpvDecorationBlock:
+ type->block = true;
+ break;
+ case SpvDecorationBufferBlock:
+ type->buffer_block = true;
+ break;
+ case SpvDecorationGLSLShared:
+ case SpvDecorationGLSLPacked:
+ /* Ignore these, since we get explicit offsets anyways */
+ break;
+
+ case SpvDecorationStream:
+ assert(dec->literals[0] == 0);
+ break;
+
+ default:
+ unreachable("Unhandled type decoration");
+ }
+}
+
+static unsigned
+translate_image_format(SpvImageFormat format)
+{
+ switch (format) {
+ case SpvImageFormatUnknown: return 0; /* GL_NONE */
+ case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */
+ case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */
+ case SpvImageFormatR32f: return 0x822E; /* GL_R32F */
+ case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */
+ case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */
+ case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */
+ case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */
+ case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */
+ case SpvImageFormatR16f: return 0x822D; /* GL_R16F */
+ case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */
+ case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */
+ case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */
+ case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */
+ case SpvImageFormatR16: return 0x822A; /* GL_R16 */
+ case SpvImageFormatR8: return 0x8229; /* GL_R8 */
+ case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */
+ case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */
+ case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */
+ case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */
+ case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */
+ case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */
+ case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */
+ case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */
+ case SpvImageFormatR32i: return 0x8235; /* GL_R32I */
+ case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */
+ case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */
+ case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */
+ case SpvImageFormatR16i: return 0x8233; /* GL_R16I */
+ case SpvImageFormatR8i: return 0x8231; /* GL_R8I */
+ case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */
+ case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */
+ case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */
+ case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */
+ case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */
+ case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */
+ case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */
+ case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */
+ case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */
+ case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */
+ default:
+ assert(!"Invalid image format");
+ return 0;
+ }
+}
+
+static void
+vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
+
+ val->type = rzalloc(b, struct vtn_type);
+ val->type->is_builtin = false;
+ val->type->val = val;
+
+ switch (opcode) {
+ case SpvOpTypeVoid:
+ val->type->type = glsl_void_type();
+ break;
+ case SpvOpTypeBool:
+ val->type->type = glsl_bool_type();
+ break;
+ case SpvOpTypeInt: {
+ const bool signedness = w[3];
+ val->type->type = (signedness ? glsl_int_type() : glsl_uint_type());
+ break;
+ }
+ case SpvOpTypeFloat:
+ val->type->type = glsl_float_type();
+ break;
+
+ case SpvOpTypeVector: {
+ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
+ unsigned elems = w[3];
+
+ assert(glsl_type_is_scalar(base->type));
+ val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
+
+ /* Vectors implicitly have sizeof(base_type) stride. For now, this
+ * is always 4 bytes. This will have to change if we want to start
+ * supporting doubles or half-floats.
+ */
+ val->type->stride = 4;
+ val->type->array_element = base;
+ break;
+ }
+
+ case SpvOpTypeMatrix: {
+ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type;
+ unsigned columns = w[3];
+
+ assert(glsl_type_is_vector(base->type));
+ val->type->type = glsl_matrix_type(glsl_get_base_type(base->type),
+ glsl_get_vector_elements(base->type),
+ columns);
+ assert(!glsl_type_is_error(val->type->type));
+ val->type->array_element = base;
+ val->type->row_major = false;
+ val->type->stride = 0;
+ break;
+ }
+
+ case SpvOpTypeRuntimeArray:
+ case SpvOpTypeArray: {
+ struct vtn_type *array_element =
+ vtn_value(b, w[2], vtn_value_type_type)->type;
+
+ unsigned length;
+ if (opcode == SpvOpTypeRuntimeArray) {
+ /* A length of 0 is used to denote unsized arrays */
+ length = 0;
+ } else {
+ length =
+ vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0];
+ }
+
+ val->type->type = glsl_array_type(array_element->type, length);
+ val->type->array_element = array_element;
+ val->type->stride = 0;
+ break;
+ }
+
+ case SpvOpTypeStruct: {
+ unsigned num_fields = count - 2;
+ val->type->members = ralloc_array(b, struct vtn_type *, num_fields);
+ val->type->offsets = ralloc_array(b, unsigned, num_fields);
+
+ NIR_VLA(struct glsl_struct_field, fields, count);
+ for (unsigned i = 0; i < num_fields; i++) {
+ val->type->members[i] =
+ vtn_value(b, w[i + 2], vtn_value_type_type)->type;
+ fields[i] = (struct glsl_struct_field) {
+ .type = val->type->members[i]->type,
+ .name = ralloc_asprintf(b, "field%d", i),
+ .location = -1,
+ };
+ }
+
+ struct member_decoration_ctx ctx = {
+ .num_fields = num_fields,
+ .fields = fields,
+ .type = val->type
+ };
+
+ vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx);
+
+ const char *name = val->name ? val->name : "struct";
+
+ val->type->type = glsl_struct_type(fields, num_fields, name);
+ break;
+ }
+
+ case SpvOpTypeFunction: {
+ const struct glsl_type *return_type =
+ vtn_value(b, w[2], vtn_value_type_type)->type->type;
+ NIR_VLA(struct glsl_function_param, params, count - 3);
+ for (unsigned i = 0; i < count - 3; i++) {
+ params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type;
+
+ /* FIXME: */
+ params[i].in = true;
+ params[i].out = true;
+ }
+ val->type->type = glsl_function_type(return_type, params, count - 3);
+ break;
+ }
+
+ case SpvOpTypePointer:
+ /* FIXME: For now, we'll just do the really lame thing and return
+ * the same type. The validator should ensure that the proper number
+ * of dereferences happen
+ */
+ val->type = vtn_value(b, w[3], vtn_value_type_type)->type;
+ break;
+
+ case SpvOpTypeImage: {
+ const struct glsl_type *sampled_type =
+ vtn_value(b, w[2], vtn_value_type_type)->type->type;
+
+ assert(glsl_type_is_vector_or_scalar(sampled_type));
+
+ enum glsl_sampler_dim dim;
+ switch ((SpvDim)w[3]) {
+ case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break;
+ case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break;
+ case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break;
+ case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break;
+ case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break;
+ case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break;
+ default:
+ unreachable("Invalid SPIR-V Sampler dimension");
+ }
+
+ bool is_shadow = w[4];
+ bool is_array = w[5];
+ bool multisampled = w[6];
+ unsigned sampled = w[7];
+ SpvImageFormat format = w[8];
+
+ if (count > 9)
+ val->type->access_qualifier = w[9];
+ else
+ val->type->access_qualifier = SpvAccessQualifierReadWrite;
+
+ if (multisampled) {
+ assert(dim == GLSL_SAMPLER_DIM_2D);
+ dim = GLSL_SAMPLER_DIM_MS;
+ }
+
+ val->type->image_format = translate_image_format(format);
+
+ if (sampled == 1) {
+ val->type->type = glsl_sampler_type(dim, is_shadow, is_array,
+ glsl_get_base_type(sampled_type));
+ } else if (sampled == 2) {
+ assert(format);
+ assert(!is_shadow);
+ val->type->type = glsl_image_type(dim, is_array,
+ glsl_get_base_type(sampled_type));
+ } else {
+ assert(!"We need to know if the image will be sampled");
+ }
+ break;
+ }
+
+ case SpvOpTypeSampledImage:
+ val->type = vtn_value(b, w[2], vtn_value_type_type)->type;
+ break;
+
+ case SpvOpTypeSampler:
+ /* The actual sampler type here doesn't really matter. It gets
+ * thrown away the moment you combine it with an image. What really
+ * matters is that it's a sampler type as opposed to an integer type
+ * so the backend knows what to do.
+ */
+ val->type->type = glsl_bare_sampler_type();
+ break;
+
+ case SpvOpTypeOpaque:
+ case SpvOpTypeEvent:
+ case SpvOpTypeDeviceEvent:
+ case SpvOpTypeReserveId:
+ case SpvOpTypeQueue:
+ case SpvOpTypePipe:
+ default:
+ unreachable("Unhandled opcode");
+ }
+
+ vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
+}
+
+static nir_constant *
+vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type)
+{
+ nir_constant *c = rzalloc(b, nir_constant);
+
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ /* Nothing to do here. It's already initialized to zero */
+ break;
+
+ case GLSL_TYPE_ARRAY:
+ assert(glsl_get_length(type) > 0);
+ c->num_elements = glsl_get_length(type);
+ c->elements = ralloc_array(b, nir_constant *, c->num_elements);
+
+ c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type));
+ for (unsigned i = 1; i < c->num_elements; i++)
+ c->elements[i] = c->elements[0];
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ c->num_elements = glsl_get_length(type);
+ c->elements = ralloc_array(b, nir_constant *, c->num_elements);
+
+ for (unsigned i = 0; i < c->num_elements; i++) {
+ c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i));
+ }
+ break;
+
+ default:
+ unreachable("Invalid type for null constant");
+ }
+
+ return c;
+}
+
+static void
+spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v,
+ int member, const struct vtn_decoration *dec,
+ void *data)
+{
+ assert(member == -1);
+ if (dec->decoration != SpvDecorationSpecId)
+ return;
+
+ uint32_t *const_value = data;
+
+ for (unsigned i = 0; i < b->num_specializations; i++) {
+ if (b->specializations[i].id == dec->literals[0]) {
+ *const_value = b->specializations[i].data;
+ return;
+ }
+ }
+}
+
+static uint32_t
+get_specialization(struct vtn_builder *b, struct vtn_value *val,
+ uint32_t const_value)
+{
+ vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value);
+ return const_value;
+}
+
+static void
+vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant);
+ val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ val->constant = rzalloc(b, nir_constant);
+ switch (opcode) {
+ case SpvOpConstantTrue:
+ assert(val->const_type == glsl_bool_type());
+ val->constant->value.u[0] = NIR_TRUE;
+ break;
+ case SpvOpConstantFalse:
+ assert(val->const_type == glsl_bool_type());
+ val->constant->value.u[0] = NIR_FALSE;
+ break;
+
+ case SpvOpSpecConstantTrue:
+ case SpvOpSpecConstantFalse: {
+ assert(val->const_type == glsl_bool_type());
+ uint32_t int_val =
+ get_specialization(b, val, (opcode == SpvOpSpecConstantTrue));
+ val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE;
+ break;
+ }
+
+ case SpvOpConstant:
+ assert(glsl_type_is_scalar(val->const_type));
+ val->constant->value.u[0] = w[3];
+ break;
+ case SpvOpSpecConstant:
+ assert(glsl_type_is_scalar(val->const_type));
+ val->constant->value.u[0] = get_specialization(b, val, w[3]);
+ break;
+ case SpvOpSpecConstantComposite:
+ case SpvOpConstantComposite: {
+ unsigned elem_count = count - 3;
+ nir_constant **elems = ralloc_array(b, nir_constant *, elem_count);
+ for (unsigned i = 0; i < elem_count; i++)
+ elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant;
+
+ switch (glsl_get_base_type(val->const_type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (glsl_type_is_matrix(val->const_type)) {
+ unsigned rows = glsl_get_vector_elements(val->const_type);
+ assert(glsl_get_matrix_columns(val->const_type) == elem_count);
+ for (unsigned i = 0; i < elem_count; i++)
+ for (unsigned j = 0; j < rows; j++)
+ val->constant->value.u[rows * i + j] = elems[i]->value.u[j];
+ } else {
+ assert(glsl_type_is_vector(val->const_type));
+ assert(glsl_get_vector_elements(val->const_type) == elem_count);
+ for (unsigned i = 0; i < elem_count; i++)
+ val->constant->value.u[i] = elems[i]->value.u[0];
+ }
+ ralloc_free(elems);
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_ARRAY:
+ ralloc_steal(val->constant, elems);
+ val->constant->num_elements = elem_count;
+ val->constant->elements = elems;
+ break;
+
+ default:
+ unreachable("Unsupported type for constants");
+ }
+ break;
+ }
+
+ case SpvOpSpecConstantOp: {
+ SpvOp opcode = get_specialization(b, val, w[3]);
+ switch (opcode) {
+ case SpvOpVectorShuffle: {
+ struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
+ struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
+ unsigned len0 = glsl_get_vector_elements(v0->const_type);
+ unsigned len1 = glsl_get_vector_elements(v1->const_type);
+
+ uint32_t u[8];
+ for (unsigned i = 0; i < len0; i++)
+ u[i] = v0->constant->value.u[i];
+ for (unsigned i = 0; i < len1; i++)
+ u[len0 + i] = v1->constant->value.u[i];
+
+ for (unsigned i = 0; i < count - 6; i++) {
+ uint32_t comp = w[i + 6];
+ if (comp == (uint32_t)-1) {
+ val->constant->value.u[i] = 0xdeadbeef;
+ } else {
+ val->constant->value.u[i] = u[comp];
+ }
+ }
+ return;
+ }
+
+ case SpvOpCompositeExtract:
+ case SpvOpCompositeInsert: {
+ struct vtn_value *comp;
+ unsigned deref_start;
+ struct nir_constant **c;
+ if (opcode == SpvOpCompositeExtract) {
+ comp = vtn_value(b, w[4], vtn_value_type_constant);
+ deref_start = 5;
+ c = &comp->constant;
+ } else {
+ comp = vtn_value(b, w[5], vtn_value_type_constant);
+ deref_start = 6;
+ val->constant = nir_constant_clone(comp->constant,
+ (nir_variable *)b);
+ c = &val->constant;
+ }
+
+ int elem = -1;
+ const struct glsl_type *type = comp->const_type;
+ for (unsigned i = deref_start; i < count; i++) {
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ /* If we hit this granularity, we're picking off an element */
+ if (elem < 0)
+ elem = 0;
+
+ if (glsl_type_is_matrix(type)) {
+ elem += w[i] * glsl_get_vector_elements(type);
+ type = glsl_get_column_type(type);
+ } else {
+ assert(glsl_type_is_vector(type));
+ elem += w[i];
+ type = glsl_scalar_type(glsl_get_base_type(type));
+ }
+ continue;
+
+ case GLSL_TYPE_ARRAY:
+ c = &(*c)->elements[w[i]];
+ type = glsl_get_array_element(type);
+ continue;
+
+ case GLSL_TYPE_STRUCT:
+ c = &(*c)->elements[w[i]];
+ type = glsl_get_struct_field(type, w[i]);
+ continue;
+
+ default:
+ unreachable("Invalid constant type");
+ }
+ }
+
+ if (opcode == SpvOpCompositeExtract) {
+ if (elem == -1) {
+ val->constant = *c;
+ } else {
+ unsigned num_components = glsl_get_vector_elements(type);
+ for (unsigned i = 0; i < num_components; i++)
+ val->constant->value.u[i] = (*c)->value.u[elem + i];
+ }
+ } else {
+ struct vtn_value *insert =
+ vtn_value(b, w[4], vtn_value_type_constant);
+ assert(insert->const_type == type);
+ if (elem == -1) {
+ *c = insert->constant;
+ } else {
+ unsigned num_components = glsl_get_vector_elements(type);
+ for (unsigned i = 0; i < num_components; i++)
+ (*c)->value.u[elem + i] = insert->constant->value.u[i];
+ }
+ }
+ return;
+ }
+
+ default: {
+ bool swap;
+ nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
+
+ unsigned num_components = glsl_get_vector_elements(val->const_type);
++ unsigned bit_size =
++ glsl_get_bit_size(glsl_get_base_type(val->const_type));
+
+ nir_const_value src[3];
+ assert(count <= 7);
+ for (unsigned i = 0; i < count - 4; i++) {
+ nir_constant *c =
+ vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
+
+ unsigned j = swap ? 1 - i : i;
++ assert(bit_size == 32);
+ for (unsigned k = 0; k < num_components; k++)
- nir_const_value res = nir_eval_const_opcode(op, num_components, src);
++ src[j].u32[k] = c->value.u[k];
+ }
+
- val->constant->value.u[k] = res.u[k];
++ nir_const_value res = nir_eval_const_opcode(op, num_components,
++ bit_size, src);
+
+ for (unsigned k = 0; k < num_components; k++)
- nir_tex_instr_dest_size(instr), NULL);
++ val->constant->value.u[k] = res.u32[k];
+
+ return;
+ } /* default */
+ }
+ }
+
+ case SpvOpConstantNull:
+ val->constant = vtn_null_constant(b, val->const_type);
+ break;
+
+ case SpvOpConstantSampler:
+ assert(!"OpConstantSampler requires Kernel Capability");
+ break;
+
+ default:
+ unreachable("Unhandled opcode");
+ }
+}
+
+static void
+vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ struct nir_function *callee =
+ vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
+
+ nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
+ for (unsigned i = 0; i < call->num_params; i++) {
+ unsigned arg_id = w[4 + i];
+ struct vtn_value *arg = vtn_untyped_value(b, arg_id);
+ if (arg->value_type == vtn_value_type_access_chain) {
+ nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain);
+ call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref));
+ } else {
+ struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
+
+ /* Make a temporary to store the argument in */
+ nir_variable *tmp =
+ nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
+ call->params[i] = nir_deref_var_create(call, tmp);
+
+ vtn_local_store(b, arg_ssa, call->params[i]);
+ }
+ }
+
+ nir_variable *out_tmp = NULL;
+ if (!glsl_type_is_void(callee->return_type)) {
+ out_tmp = nir_local_variable_create(b->impl, callee->return_type,
+ "out_tmp");
+ call->return_deref = nir_deref_var_create(call, out_tmp);
+ }
+
+ nir_builder_instr_insert(&b->nb, &call->instr);
+
+ if (glsl_type_is_void(callee->return_type)) {
+ vtn_push_value(b, w[2], vtn_value_type_undef);
+ } else {
+ struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ retval->ssa = vtn_local_load(b, call->return_deref);
+ }
+}
+
+struct vtn_ssa_value *
+vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type)
+{
+ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value);
+ val->type = type;
+
+ if (!glsl_type_is_vector_or_scalar(type)) {
+ unsigned elems = glsl_get_length(type);
+ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ for (unsigned i = 0; i < elems; i++) {
+ const struct glsl_type *child_type;
+
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ child_type = glsl_get_column_type(type);
+ break;
+ case GLSL_TYPE_ARRAY:
+ child_type = glsl_get_array_element(type);
+ break;
+ case GLSL_TYPE_STRUCT:
+ child_type = glsl_get_struct_field(type, i);
+ break;
+ default:
+ unreachable("unkown base type");
+ }
+
+ val->elems[i] = vtn_create_ssa_value(b, child_type);
+ }
+ }
+
+ return val;
+}
+
+static nir_tex_src
+vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type)
+{
+ nir_tex_src src;
+ src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def);
+ src.src_type = type;
+ return src;
+}
+
+static void
+vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ if (opcode == SpvOpSampledImage) {
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_sampled_image);
+ val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ val->sampled_image->image =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ val->sampled_image->sampler =
+ vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain;
+ return;
+ } else if (opcode == SpvOpImage) {
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
+ if (src_val->value_type == vtn_value_type_sampled_image) {
+ val->access_chain = src_val->sampled_image->image;
+ } else {
+ assert(src_val->value_type == vtn_value_type_access_chain);
+ val->access_chain = src_val->access_chain;
+ }
+ return;
+ }
+
+ struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+
+ struct vtn_sampled_image sampled;
+ struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
+ if (sampled_val->value_type == vtn_value_type_sampled_image) {
+ sampled = *sampled_val->sampled_image;
+ } else {
+ assert(sampled_val->value_type == vtn_value_type_access_chain);
+ sampled.image = NULL;
+ sampled.sampler = sampled_val->access_chain;
+ }
+
+ const struct glsl_type *image_type;
+ if (sampled.image) {
+ image_type = sampled.image->var->var->interface_type;
+ } else {
+ image_type = sampled.sampler->var->var->interface_type;
+ }
+
+ nir_tex_src srcs[8]; /* 8 should be enough */
+ nir_tex_src *p = srcs;
+
+ unsigned idx = 4;
+
+ bool has_coord = false;
+ switch (opcode) {
+ case SpvOpImageSampleImplicitLod:
+ case SpvOpImageSampleExplicitLod:
+ case SpvOpImageSampleDrefImplicitLod:
+ case SpvOpImageSampleDrefExplicitLod:
+ case SpvOpImageSampleProjImplicitLod:
+ case SpvOpImageSampleProjExplicitLod:
+ case SpvOpImageSampleProjDrefImplicitLod:
+ case SpvOpImageSampleProjDrefExplicitLod:
+ case SpvOpImageFetch:
+ case SpvOpImageGather:
+ case SpvOpImageDrefGather:
+ case SpvOpImageQueryLod: {
+ /* All these types have the coordinate as their first real argument */
+ struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
+ has_coord = true;
+ p->src = nir_src_for_ssa(coord->def);
+ p->src_type = nir_tex_src_coord;
+ p++;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ /* These all have an explicit depth value as their next source */
+ switch (opcode) {
+ case SpvOpImageSampleDrefImplicitLod:
+ case SpvOpImageSampleDrefExplicitLod:
+ case SpvOpImageSampleProjDrefImplicitLod:
+ case SpvOpImageSampleProjDrefExplicitLod:
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
+ break;
+ default:
+ break;
+ }
+
+ /* For OpImageQuerySizeLod, we always have an LOD */
+ if (opcode == SpvOpImageQuerySizeLod)
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
+
+ /* Figure out the base texture operation */
+ nir_texop texop;
+ switch (opcode) {
+ case SpvOpImageSampleImplicitLod:
+ case SpvOpImageSampleDrefImplicitLod:
+ case SpvOpImageSampleProjImplicitLod:
+ case SpvOpImageSampleProjDrefImplicitLod:
+ texop = nir_texop_tex;
+ break;
+
+ case SpvOpImageSampleExplicitLod:
+ case SpvOpImageSampleDrefExplicitLod:
+ case SpvOpImageSampleProjExplicitLod:
+ case SpvOpImageSampleProjDrefExplicitLod:
+ texop = nir_texop_txl;
+ break;
+
+ case SpvOpImageFetch:
+ if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
+ texop = nir_texop_txf_ms;
+ } else {
+ texop = nir_texop_txf;
+ }
+ break;
+
+ case SpvOpImageGather:
+ case SpvOpImageDrefGather:
+ texop = nir_texop_tg4;
+ break;
+
+ case SpvOpImageQuerySizeLod:
+ case SpvOpImageQuerySize:
+ texop = nir_texop_txs;
+ break;
+
+ case SpvOpImageQueryLod:
+ texop = nir_texop_lod;
+ break;
+
+ case SpvOpImageQueryLevels:
+ texop = nir_texop_query_levels;
+ break;
+
+ case SpvOpImageQuerySamples:
+ default:
+ unreachable("Unhandled opcode");
+ }
+
+ /* Now we need to handle some number of optional arguments */
+ if (idx < count) {
+ uint32_t operands = w[idx++];
+
+ if (operands & SpvImageOperandsBiasMask) {
+ assert(texop == nir_texop_tex);
+ texop = nir_texop_txb;
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias);
+ }
+
+ if (operands & SpvImageOperandsLodMask) {
+ assert(texop == nir_texop_txl || texop == nir_texop_txf ||
+ texop == nir_texop_txf_ms || texop == nir_texop_txs);
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
+ }
+
+ if (operands & SpvImageOperandsGradMask) {
+ assert(texop == nir_texop_tex);
+ texop = nir_texop_txd;
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
+ }
+
+ if (operands & SpvImageOperandsOffsetMask ||
+ operands & SpvImageOperandsConstOffsetMask)
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset);
+
+ if (operands & SpvImageOperandsConstOffsetsMask)
+ assert(!"Constant offsets to texture gather not yet implemented");
+
+ if (operands & SpvImageOperandsSampleMask) {
+ assert(texop == nir_texop_txf_ms);
+ texop = nir_texop_txf_ms;
+ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
+ }
+ }
+ /* We should have now consumed exactly all of the arguments */
+ assert(idx == count);
+
+ nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs);
+ instr->op = texop;
+
+ memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
+
+ instr->sampler_dim = glsl_get_sampler_dim(image_type);
+ instr->is_array = glsl_sampler_type_is_array(image_type);
+ instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
+ instr->is_new_style_shadow = instr->is_shadow;
+
+ if (has_coord) {
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_BUF:
+ instr->coord_components = 1;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_MS:
+ instr->coord_components = 2;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ instr->coord_components = 3;
+ break;
+ default:
+ assert("Invalid sampler type");
+ }
+
+ if (instr->is_array)
+ instr->coord_components++;
+ } else {
+ instr->coord_components = 0;
+ }
+
+ switch (glsl_get_sampler_result_type(image_type)) {
+ case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
+ case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break;
+ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break;
+ case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break;
+ default:
+ unreachable("Invalid base type for sampler result");
+ }
+
+ nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler);
+ if (sampled.image) {
+ nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image);
+ instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref));
+ } else {
+ instr->texture = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
+ }
+
+ switch (instr->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd:
+ /* These operations require a sampler */
+ instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref));
+ break;
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_tg4:
+ case nir_texop_query_levels:
+ case nir_texop_texture_samples:
+ case nir_texop_samples_identical:
+ /* These don't */
+ instr->sampler = NULL;
+ break;
+ }
+
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
- nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
++ nir_tex_instr_dest_size(instr), 32, NULL);
+
+ assert(glsl_get_vector_elements(ret_type->type) ==
+ nir_tex_instr_dest_size(instr));
+
+ val->ssa = vtn_create_ssa_value(b, ret_type->type);
+ val->ssa->def = &instr->dest.ssa;
+
+ nir_builder_instr_insert(&b->nb, &instr->instr);
+}
+
+static nir_ssa_def *
+get_image_coord(struct vtn_builder *b, uint32_t value)
+{
+ struct vtn_ssa_value *coord = vtn_ssa_value(b, value);
+
+ /* The image_load_store intrinsics assume a 4-dim coordinate */
+ unsigned dim = glsl_get_vector_elements(coord->type);
+ unsigned swizzle[4];
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = MIN2(i, dim - 1);
+
+ return nir_swizzle(&b->nb, coord->def, swizzle, 4, false);
+}
+
+static void
+vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ /* Just get this one out of the way */
+ if (opcode == SpvOpImageTexelPointer) {
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_image_pointer);
+ val->image = ralloc(b, struct vtn_image_pointer);
+
+ val->image->image =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ val->image->coord = get_image_coord(b, w[4]);
+ val->image->sample = vtn_ssa_value(b, w[5])->def;
+ return;
+ }
+
+ struct vtn_image_pointer image;
+
+ switch (opcode) {
+ case SpvOpAtomicExchange:
+ case SpvOpAtomicCompareExchange:
+ case SpvOpAtomicCompareExchangeWeak:
+ case SpvOpAtomicIIncrement:
+ case SpvOpAtomicIDecrement:
+ case SpvOpAtomicIAdd:
+ case SpvOpAtomicISub:
+ case SpvOpAtomicSMin:
+ case SpvOpAtomicUMin:
+ case SpvOpAtomicSMax:
+ case SpvOpAtomicUMax:
+ case SpvOpAtomicAnd:
+ case SpvOpAtomicOr:
+ case SpvOpAtomicXor:
+ image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
+ break;
+
+ case SpvOpImageQuerySize:
+ image.image =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ image.coord = NULL;
+ image.sample = NULL;
+ break;
+
+ case SpvOpImageRead:
+ image.image =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ image.coord = get_image_coord(b, w[4]);
+
+ if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) {
+ assert(w[5] == SpvImageOperandsSampleMask);
+ image.sample = vtn_ssa_value(b, w[6])->def;
+ } else {
+ image.sample = nir_ssa_undef(&b->nb, 1);
+ }
+ break;
+
+ case SpvOpImageWrite:
+ image.image =
+ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
+ image.coord = get_image_coord(b, w[2]);
+
+ /* texel = w[3] */
+
+ if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) {
+ assert(w[4] == SpvImageOperandsSampleMask);
+ image.sample = vtn_ssa_value(b, w[5])->def;
+ } else {
+ image.sample = nir_ssa_undef(&b->nb, 1);
+ }
+ break;
+
+ default:
+ unreachable("Invalid image opcode");
+ }
+
+ nir_intrinsic_op op;
+ switch (opcode) {
+#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break;
+ OP(ImageQuerySize, size)
+ OP(ImageRead, load)
+ OP(ImageWrite, store)
+ OP(AtomicExchange, atomic_exchange)
+ OP(AtomicCompareExchange, atomic_comp_swap)
+ OP(AtomicIIncrement, atomic_add)
+ OP(AtomicIDecrement, atomic_add)
+ OP(AtomicIAdd, atomic_add)
+ OP(AtomicISub, atomic_add)
+ OP(AtomicSMin, atomic_min)
+ OP(AtomicUMin, atomic_min)
+ OP(AtomicSMax, atomic_max)
+ OP(AtomicUMax, atomic_max)
+ OP(AtomicAnd, atomic_and)
+ OP(AtomicOr, atomic_or)
+ OP(AtomicXor, atomic_xor)
+#undef OP
+ default:
+ unreachable("Invalid image opcode");
+ }
+
+ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
+
+ nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image);
+ intrin->variables[0] =
+ nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref));
+
+ /* ImageQuerySize doesn't take any extra parameters */
+ if (opcode != SpvOpImageQuerySize) {
+ /* The image coordinate is always 4 components but we may not have that
+ * many. Swizzle to compensate.
+ */
+ unsigned swiz[4];
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = i < image.coord->num_components ? i : 0;
+ intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord,
+ swiz, 4, false));
+ intrin->src[1] = nir_src_for_ssa(image.sample);
+ }
+
+ switch (opcode) {
+ case SpvOpImageQuerySize:
+ case SpvOpImageRead:
+ break;
+ case SpvOpImageWrite:
+ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
+ break;
+ case SpvOpAtomicIIncrement:
+ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
+ break;
+ case SpvOpAtomicIDecrement:
+ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
+ break;
+
+ case SpvOpAtomicExchange:
+ case SpvOpAtomicIAdd:
+ case SpvOpAtomicSMin:
+ case SpvOpAtomicUMin:
+ case SpvOpAtomicSMax:
+ case SpvOpAtomicUMax:
+ case SpvOpAtomicAnd:
+ case SpvOpAtomicOr:
+ case SpvOpAtomicXor:
+ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ break;
+
+ case SpvOpAtomicCompareExchange:
+ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
+ intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ break;
+
+ case SpvOpAtomicISub:
+ intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
+ break;
+
+ default:
+ unreachable("Invalid image opcode");
+ }
+
+ if (opcode != SpvOpImageWrite) {
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
- nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
++ nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
+
+ nir_builder_instr_insert(&b->nb, &intrin->instr);
+
+ /* The image intrinsics always return 4 channels but we may not want
+ * that many. Emit a mov to trim it down.
+ */
+ unsigned swiz[4] = {0, 1, 2, 3};
+ val->ssa = vtn_create_ssa_value(b, type->type);
+ val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz,
+ glsl_get_vector_elements(type->type), false);
+ } else {
+ nir_builder_instr_insert(&b->nb, &intrin->instr);
+ }
+}
+
+static nir_intrinsic_op
+get_ssbo_nir_atomic_op(SpvOp opcode)
+{
+ switch (opcode) {
+#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N;
+ OP(AtomicExchange, atomic_exchange)
+ OP(AtomicCompareExchange, atomic_comp_swap)
+ OP(AtomicIIncrement, atomic_add)
+ OP(AtomicIDecrement, atomic_add)
+ OP(AtomicIAdd, atomic_add)
+ OP(AtomicISub, atomic_add)
+ OP(AtomicSMin, atomic_imin)
+ OP(AtomicUMin, atomic_umin)
+ OP(AtomicSMax, atomic_imax)
+ OP(AtomicUMax, atomic_umax)
+ OP(AtomicAnd, atomic_and)
+ OP(AtomicOr, atomic_or)
+ OP(AtomicXor, atomic_xor)
+#undef OP
+ default:
+ unreachable("Invalid SSBO atomic");
+ }
+}
+
+static nir_intrinsic_op
+get_shared_nir_atomic_op(SpvOp opcode)
+{
+ switch (opcode) {
+#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N;
+ OP(AtomicExchange, atomic_exchange)
+ OP(AtomicCompareExchange, atomic_comp_swap)
+ OP(AtomicIIncrement, atomic_add)
+ OP(AtomicIDecrement, atomic_add)
+ OP(AtomicIAdd, atomic_add)
+ OP(AtomicISub, atomic_add)
+ OP(AtomicSMin, atomic_imin)
+ OP(AtomicUMin, atomic_umin)
+ OP(AtomicSMax, atomic_imax)
+ OP(AtomicUMax, atomic_umax)
+ OP(AtomicAnd, atomic_and)
+ OP(AtomicOr, atomic_or)
+ OP(AtomicXor, atomic_xor)
+#undef OP
+ default:
+ unreachable("Invalid shared atomic");
+ }
+}
+
+static void
+fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, nir_src *src)
+{
+ switch (opcode) {
+ case SpvOpAtomicIIncrement:
+ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1));
+ break;
+
+ case SpvOpAtomicIDecrement:
+ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1));
+ break;
+
+ case SpvOpAtomicISub:
+ src[0] =
+ nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def));
+ break;
+
+ case SpvOpAtomicCompareExchange:
+ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
+ src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+ break;
+ /* Fall through */
+
+ case SpvOpAtomicExchange:
+ case SpvOpAtomicIAdd:
+ case SpvOpAtomicSMin:
+ case SpvOpAtomicUMin:
+ case SpvOpAtomicSMax:
+ case SpvOpAtomicUMax:
+ case SpvOpAtomicAnd:
+ case SpvOpAtomicOr:
+ case SpvOpAtomicXor:
+ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ break;
+
+ default:
+ unreachable("Invalid SPIR-V atomic");
+ }
+}
+
+static void
+vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ struct vtn_access_chain *chain =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ nir_intrinsic_instr *atomic;
+
+ /*
+ SpvScope scope = w[4];
+ SpvMemorySemanticsMask semantics = w[5];
+ */
+
+ if (chain->var->mode == vtn_variable_mode_workgroup) {
+ nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref;
+ nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
+ atomic = nir_intrinsic_instr_create(b->nb.shader, op);
+ atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref));
+ fill_common_atomic_sources(b, opcode, w, &atomic->src[0]);
+ } else {
+ assert(chain->var->mode == vtn_variable_mode_ssbo);
+ struct vtn_type *type;
+ nir_ssa_def *offset, *index;
+ offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false);
+
+ nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
+
+ atomic = nir_intrinsic_instr_create(b->nb.shader, op);
+ atomic->src[0] = nir_src_for_ssa(index);
+ atomic->src[1] = nir_src_for_ssa(offset);
+ fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
+ }
+
- create_vec(nir_shader *shader, unsigned num_components)
++ nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
+
+ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = rzalloc(b, struct vtn_ssa_value);
+ val->ssa->def = &atomic->dest.ssa;
+ val->ssa->type = type->type;
+
+ nir_builder_instr_insert(&b->nb, &atomic->instr);
+}
+
+static nir_alu_instr *
- nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
++create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
+{
+ nir_op op;
+ switch (num_components) {
+ case 1: op = nir_op_fmov; break;
+ case 2: op = nir_op_vec2; break;
+ case 3: op = nir_op_vec3; break;
+ case 4: op = nir_op_vec4; break;
+ default: unreachable("bad vector size");
+ }
+
+ nir_alu_instr *vec = nir_alu_instr_create(shader, op);
- glsl_get_matrix_columns(src->type));
++ nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
++ bit_size, NULL);
+ vec->dest.write_mask = (1 << num_components) - 1;
+
+ return vec;
+}
+
+struct vtn_ssa_value *
+vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
+{
+ if (src->transposed)
+ return src->transposed;
+
+ struct vtn_ssa_value *dest =
+ vtn_create_ssa_value(b, glsl_transposed_type(src->type));
+
+ for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
+ nir_alu_instr *vec = create_vec(b->shader,
- nir_alu_instr *vec = create_vec(b->shader, src->num_components);
++ glsl_get_matrix_columns(src->type),
++ glsl_get_bit_size(glsl_get_base_type(src->type)));
+ if (glsl_type_is_vector_or_scalar(src->type)) {
+ vec->src[0].src = nir_src_for_ssa(src->def);
+ vec->src[0].swizzle[0] = i;
+ } else {
+ for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) {
+ vec->src[j].src = nir_src_for_ssa(src->elems[j]->def);
+ vec->src[j].swizzle[0] = i;
+ }
+ }
+ nir_builder_instr_insert(&b->nb, &vec->instr);
+ dest->elems[i]->def = &vec->dest.dest.ssa;
+ }
+
+ dest->transposed = src;
+
+ return dest;
+}
+
+nir_ssa_def *
+vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index)
+{
+ unsigned swiz[4] = { index };
+ return nir_swizzle(&b->nb, src, swiz, 1, true);
+}
+
+nir_ssa_def *
+vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
+ unsigned index)
+{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
++ nir_alu_instr *vec = create_vec(b->shader, src->num_components,
++ src->bit_size);
+
+ for (unsigned i = 0; i < src->num_components; i++) {
+ if (i == index) {
+ vec->src[i].src = nir_src_for_ssa(insert);
+ } else {
+ vec->src[i].src = nir_src_for_ssa(src);
+ vec->src[i].swizzle[0] = i;
+ }
+ }
+
+ nir_builder_instr_insert(&b->nb, &vec->instr);
+
+ return &vec->dest.dest.ssa;
+}
+
+nir_ssa_def *
+vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+ nir_ssa_def *index)
+{
+ nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
+ for (unsigned i = 1; i < src->num_components; i++)
+ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ vtn_vector_extract(b, src, i), dest);
+
+ return dest;
+}
+
+nir_ssa_def *
+vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
+ nir_ssa_def *insert, nir_ssa_def *index)
+{
+ nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
+ for (unsigned i = 1; i < src->num_components; i++)
+ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ vtn_vector_insert(b, src, insert, i), dest);
+
+ return dest;
+}
+
+static nir_ssa_def *
+vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
+ nir_ssa_def *src0, nir_ssa_def *src1,
+ const uint32_t *indices)
+{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
++ nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
+
+ nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
+ nir_builder_instr_insert(&b->nb, &undef->instr);
+
+ for (unsigned i = 0; i < num_components; i++) {
+ uint32_t index = indices[i];
+ if (index == 0xffffffff) {
+ vec->src[i].src = nir_src_for_ssa(&undef->def);
+ } else if (index < src0->num_components) {
+ vec->src[i].src = nir_src_for_ssa(src0);
+ vec->src[i].swizzle[0] = index;
+ } else {
+ vec->src[i].src = nir_src_for_ssa(src1);
+ vec->src[i].swizzle[0] = index - src0->num_components;
+ }
+ }
+
+ nir_builder_instr_insert(&b->nb, &vec->instr);
+
+ return &vec->dest.dest.ssa;
+}
+
+/*
+ * Concatentates a number of vectors/scalars together to produce a vector
+ */
+static nir_ssa_def *
+vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
+ unsigned num_srcs, nir_ssa_def **srcs)
+{
++ nir_alu_instr *vec = create_vec(b->shader, num_components,
++ srcs[0]->bit_size);
+
+ unsigned dest_idx = 0;
+ for (unsigned i = 0; i < num_srcs; i++) {
+ nir_ssa_def *src = srcs[i];
+ for (unsigned j = 0; j < src->num_components; j++) {
+ vec->src[dest_idx].src = nir_src_for_ssa(src);
+ vec->src[dest_idx].swizzle[0] = j;
+ dest_idx++;
+ }
+ }
+
+ nir_builder_instr_insert(&b->nb, &vec->instr);
+
+ return &vec->dest.dest.ssa;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src)
+{
+ struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value);
+ dest->type = src->type;
+
+ if (glsl_type_is_vector_or_scalar(src->type)) {
+ dest->def = src->def;
+ } else {
+ unsigned elems = glsl_get_length(src->type);
+
+ dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems);
+ for (unsigned i = 0; i < elems; i++)
+ dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]);
+ }
+
+ return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
+ struct vtn_ssa_value *insert, const uint32_t *indices,
+ unsigned num_indices)
+{
+ struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
+
+ struct vtn_ssa_value *cur = dest;
+ unsigned i;
+ for (i = 0; i < num_indices - 1; i++) {
+ cur = cur->elems[indices[i]];
+ }
+
+ if (glsl_type_is_vector_or_scalar(cur->type)) {
+ /* According to the SPIR-V spec, OpCompositeInsert may work down to
+ * the component granularity. In that case, the last index will be
+ * the index to insert the scalar into the vector.
+ */
+
+ cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]);
+ } else {
+ cur->elems[indices[i]] = insert;
+ }
+
+ return dest;
+}
+
+static struct vtn_ssa_value *
+vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src,
+ const uint32_t *indices, unsigned num_indices)
+{
+ struct vtn_ssa_value *cur = src;
+ for (unsigned i = 0; i < num_indices; i++) {
+ if (glsl_type_is_vector_or_scalar(cur->type)) {
+ assert(i == num_indices - 1);
+ /* According to the SPIR-V spec, OpCompositeExtract may work down to
+ * the component granularity. The last index will be the index of the
+ * vector to extract.
+ */
+
+ struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value);
+ ret->type = glsl_scalar_type(glsl_get_base_type(cur->type));
+ ret->def = vtn_vector_extract(b, cur->def, indices[i]);
+ return ret;
+ } else {
+ cur = cur->elems[indices[i]];
+ }
+ }
+
+ return cur;
+}
+
+static void
+vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ const struct glsl_type *type =
+ vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ val->ssa = vtn_create_ssa_value(b, type);
+
+ switch (opcode) {
+ case SpvOpVectorExtractDynamic:
+ val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
+ vtn_ssa_value(b, w[4])->def);
+ break;
+
+ case SpvOpVectorInsertDynamic:
+ val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
+ vtn_ssa_value(b, w[4])->def,
+ vtn_ssa_value(b, w[5])->def);
+ break;
+
+ case SpvOpVectorShuffle:
+ val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
+ vtn_ssa_value(b, w[3])->def,
+ vtn_ssa_value(b, w[4])->def,
+ w + 5);
+ break;
+
+ case SpvOpCompositeConstruct: {
+ unsigned elems = count - 3;
+ if (glsl_type_is_vector_or_scalar(type)) {
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < elems; i++)
+ srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
+ val->ssa->def =
+ vtn_vector_construct(b, glsl_get_vector_elements(type),
+ elems, srcs);
+ } else {
+ val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+ for (unsigned i = 0; i < elems; i++)
+ val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
+ }
+ break;
+ }
+ case SpvOpCompositeExtract:
+ val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
+ w + 4, count - 4);
+ break;
+
+ case SpvOpCompositeInsert:
+ val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
+ vtn_ssa_value(b, w[3]),
+ w + 5, count - 5);
+ break;
+
+ case SpvOpCopyObject:
+ val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
+ break;
+
+ default:
+ unreachable("unknown composite operation");
+ }
+}
+
+static void
+vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ nir_intrinsic_op intrinsic_op;
+ switch (opcode) {
+ case SpvOpEmitVertex:
+ case SpvOpEmitStreamVertex:
+ intrinsic_op = nir_intrinsic_emit_vertex;
+ break;
+ case SpvOpEndPrimitive:
+ case SpvOpEndStreamPrimitive:
+ intrinsic_op = nir_intrinsic_end_primitive;
+ break;
+ case SpvOpMemoryBarrier:
+ intrinsic_op = nir_intrinsic_memory_barrier;
+ break;
+ case SpvOpControlBarrier:
+ intrinsic_op = nir_intrinsic_barrier;
+ break;
+ default:
+ unreachable("unknown barrier instruction");
+ }
+
+ nir_intrinsic_instr *intrin =
+ nir_intrinsic_instr_create(b->shader, intrinsic_op);
+
+ if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
+ nir_intrinsic_set_stream_id(intrin, w[1]);
+
+ nir_builder_instr_insert(&b->nb, &intrin->instr);
+}
+
+static unsigned
+gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
+{
+ switch (mode) {
+ case SpvExecutionModeInputPoints:
+ case SpvExecutionModeOutputPoints:
+ return 0; /* GL_POINTS */
+ case SpvExecutionModeInputLines:
+ return 1; /* GL_LINES */
+ case SpvExecutionModeInputLinesAdjacency:
+ return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */
+ case SpvExecutionModeTriangles:
+ return 4; /* GL_TRIANGLES */
+ case SpvExecutionModeInputTrianglesAdjacency:
+ return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */
+ case SpvExecutionModeQuads:
+ return 7; /* GL_QUADS */
+ case SpvExecutionModeIsolines:
+ return 0x8E7A; /* GL_ISOLINES */
+ case SpvExecutionModeOutputLineStrip:
+ return 3; /* GL_LINE_STRIP */
+ case SpvExecutionModeOutputTriangleStrip:
+ return 5; /* GL_TRIANGLE_STRIP */
+ default:
+ assert(!"Invalid primitive type");
+ return 4;
+ }
+}
+
+static unsigned
+vertices_in_from_spv_execution_mode(SpvExecutionMode mode)
+{
+ switch (mode) {
+ case SpvExecutionModeInputPoints:
+ return 1;
+ case SpvExecutionModeInputLines:
+ return 2;
+ case SpvExecutionModeInputLinesAdjacency:
+ return 4;
+ case SpvExecutionModeTriangles:
+ return 3;
+ case SpvExecutionModeInputTrianglesAdjacency:
+ return 6;
+ default:
+ assert(!"Invalid GS input mode");
+ return 0;
+ }
+}
+
+static gl_shader_stage
+stage_for_execution_model(SpvExecutionModel model)
+{
+ switch (model) {
+ case SpvExecutionModelVertex:
+ return MESA_SHADER_VERTEX;
+ case SpvExecutionModelTessellationControl:
+ return MESA_SHADER_TESS_CTRL;
+ case SpvExecutionModelTessellationEvaluation:
+ return MESA_SHADER_TESS_EVAL;
+ case SpvExecutionModelGeometry:
+ return MESA_SHADER_GEOMETRY;
+ case SpvExecutionModelFragment:
+ return MESA_SHADER_FRAGMENT;
+ case SpvExecutionModelGLCompute:
+ return MESA_SHADER_COMPUTE;
+ default:
+ unreachable("Unsupported execution model");
+ }
+}
+
+static bool
+vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch (opcode) {
+ case SpvOpSource:
+ case SpvOpSourceExtension:
+ case SpvOpSourceContinued:
+ case SpvOpExtension:
+ /* Unhandled, but these are for debug so that's ok. */
+ break;
+
+ case SpvOpCapability: {
+ SpvCapability cap = w[1];
+ switch (cap) {
+ case SpvCapabilityMatrix:
+ case SpvCapabilityShader:
+ case SpvCapabilityGeometry:
+ case SpvCapabilityTessellationPointSize:
+ case SpvCapabilityGeometryPointSize:
+ case SpvCapabilityUniformBufferArrayDynamicIndexing:
+ case SpvCapabilitySampledImageArrayDynamicIndexing:
+ case SpvCapabilityStorageBufferArrayDynamicIndexing:
+ case SpvCapabilityStorageImageArrayDynamicIndexing:
+ case SpvCapabilityImageRect:
+ case SpvCapabilitySampledRect:
+ case SpvCapabilitySampled1D:
+ case SpvCapabilityImage1D:
+ case SpvCapabilitySampledCubeArray:
+ case SpvCapabilitySampledBuffer:
+ case SpvCapabilityImageBuffer:
+ case SpvCapabilityImageQuery:
+ break;
+ case SpvCapabilityClipDistance:
+ case SpvCapabilityCullDistance:
+ case SpvCapabilityGeometryStreams:
+ fprintf(stderr, "WARNING: Unsupported SPIR-V Capability\n");
+ break;
+ default:
+ assert(!"Unsupported capability");
+ }
+ break;
+ }
+
+ case SpvOpExtInstImport:
+ vtn_handle_extension(b, opcode, w, count);
+ break;
+
+ case SpvOpMemoryModel:
+ assert(w[1] == SpvAddressingModelLogical);
+ assert(w[2] == SpvMemoryModelGLSL450);
+ break;
+
+ case SpvOpEntryPoint: {
+ struct vtn_value *entry_point = &b->values[w[2]];
+ /* Let this be a name label regardless */
+ unsigned name_words;
+ entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words);
+
+ if (strcmp(entry_point->name, b->entry_point_name) != 0 ||
+ stage_for_execution_model(w[1]) != b->entry_point_stage)
+ break;
+
+ assert(b->entry_point == NULL);
+ b->entry_point = entry_point;
+ break;
+ }
+
+ case SpvOpString:
+ vtn_push_value(b, w[1], vtn_value_type_string)->str =
+ vtn_string_literal(b, &w[2], count - 2, NULL);
+ break;
+
+ case SpvOpName:
+ b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL);
+ break;
+
+ case SpvOpMemberName:
+ /* TODO */
+ break;
+
+ case SpvOpExecutionMode:
+ case SpvOpDecorationGroup:
+ case SpvOpDecorate:
+ case SpvOpMemberDecorate:
+ case SpvOpGroupDecorate:
+ case SpvOpGroupMemberDecorate:
+ vtn_handle_decoration(b, opcode, w, count);
+ break;
+
+ default:
+ return false; /* End of preamble */
+ }
+
+ return true;
+}
+
+static void
+vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point,
+ const struct vtn_decoration *mode, void *data)
+{
+ assert(b->entry_point == entry_point);
+
+ switch(mode->exec_mode) {
+ case SpvExecutionModeOriginUpperLeft:
+ case SpvExecutionModeOriginLowerLeft:
+ b->origin_upper_left =
+ (mode->exec_mode == SpvExecutionModeOriginUpperLeft);
+ break;
+
+ case SpvExecutionModeEarlyFragmentTests:
+ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ b->shader->info.fs.early_fragment_tests = true;
+ break;
+
+ case SpvExecutionModeInvocations:
+ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
+ break;
+
+ case SpvExecutionModeDepthReplacing:
+ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+ break;
+ case SpvExecutionModeDepthGreater:
+ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
+ break;
+ case SpvExecutionModeDepthLess:
+ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
+ break;
+ case SpvExecutionModeDepthUnchanged:
+ assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+ break;
+
+ case SpvExecutionModeLocalSize:
+ assert(b->shader->stage == MESA_SHADER_COMPUTE);
+ b->shader->info.cs.local_size[0] = mode->literals[0];
+ b->shader->info.cs.local_size[1] = mode->literals[1];
+ b->shader->info.cs.local_size[2] = mode->literals[2];
+ break;
+ case SpvExecutionModeLocalSizeHint:
+ break; /* Nothing do do with this */
+
+ case SpvExecutionModeOutputVertices:
+ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ b->shader->info.gs.vertices_out = mode->literals[0];
+ break;
+
+ case SpvExecutionModeInputPoints:
+ case SpvExecutionModeInputLines:
+ case SpvExecutionModeInputLinesAdjacency:
+ case SpvExecutionModeTriangles:
+ case SpvExecutionModeInputTrianglesAdjacency:
+ case SpvExecutionModeQuads:
+ case SpvExecutionModeIsolines:
+ if (b->shader->stage == MESA_SHADER_GEOMETRY) {
+ b->shader->info.gs.vertices_in =
+ vertices_in_from_spv_execution_mode(mode->exec_mode);
+ } else {
+ assert(!"Tesselation shaders not yet supported");
+ }
+ break;
+
+ case SpvExecutionModeOutputPoints:
+ case SpvExecutionModeOutputLineStrip:
+ case SpvExecutionModeOutputTriangleStrip:
+ assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ b->shader->info.gs.output_primitive =
+ gl_primitive_from_spv_execution_mode(mode->exec_mode);
+ break;
+
+ case SpvExecutionModeSpacingEqual:
+ case SpvExecutionModeSpacingFractionalEven:
+ case SpvExecutionModeSpacingFractionalOdd:
+ case SpvExecutionModeVertexOrderCw:
+ case SpvExecutionModeVertexOrderCcw:
+ case SpvExecutionModePointMode:
+ assert(!"TODO: Add tessellation metadata");
+ break;
+
+ case SpvExecutionModePixelCenterInteger:
+ case SpvExecutionModeXfb:
+ assert(!"Unhandled execution mode");
+ break;
+
+ case SpvExecutionModeVecTypeHint:
+ case SpvExecutionModeContractionOff:
+ break; /* OpenCL */
+ }
+}
+
+static bool
+vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch (opcode) {
+ case SpvOpSource:
+ case SpvOpSourceContinued:
+ case SpvOpSourceExtension:
+ case SpvOpExtension:
+ case SpvOpCapability:
+ case SpvOpExtInstImport:
+ case SpvOpMemoryModel:
+ case SpvOpEntryPoint:
+ case SpvOpExecutionMode:
+ case SpvOpString:
+ case SpvOpName:
+ case SpvOpMemberName:
+ case SpvOpDecorationGroup:
+ case SpvOpDecorate:
+ case SpvOpMemberDecorate:
+ case SpvOpGroupDecorate:
+ case SpvOpGroupMemberDecorate:
+ assert(!"Invalid opcode types and variables section");
+ break;
+
+ case SpvOpTypeVoid:
+ case SpvOpTypeBool:
+ case SpvOpTypeInt:
+ case SpvOpTypeFloat:
+ case SpvOpTypeVector:
+ case SpvOpTypeMatrix:
+ case SpvOpTypeImage:
+ case SpvOpTypeSampler:
+ case SpvOpTypeSampledImage:
+ case SpvOpTypeArray:
+ case SpvOpTypeRuntimeArray:
+ case SpvOpTypeStruct:
+ case SpvOpTypeOpaque:
+ case SpvOpTypePointer:
+ case SpvOpTypeFunction:
+ case SpvOpTypeEvent:
+ case SpvOpTypeDeviceEvent:
+ case SpvOpTypeReserveId:
+ case SpvOpTypeQueue:
+ case SpvOpTypePipe:
+ vtn_handle_type(b, opcode, w, count);
+ break;
+
+ case SpvOpConstantTrue:
+ case SpvOpConstantFalse:
+ case SpvOpConstant:
+ case SpvOpConstantComposite:
+ case SpvOpConstantSampler:
+ case SpvOpConstantNull:
+ case SpvOpSpecConstantTrue:
+ case SpvOpSpecConstantFalse:
+ case SpvOpSpecConstant:
+ case SpvOpSpecConstantComposite:
+ case SpvOpSpecConstantOp:
+ vtn_handle_constant(b, opcode, w, count);
+ break;
+
+ case SpvOpVariable:
+ vtn_handle_variables(b, opcode, w, count);
+ break;
+
+ default:
+ return false; /* End of preamble */
+ }
+
+ return true;
+}
+
+static bool
+vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch (opcode) {
+ case SpvOpLabel:
+ break;
+
+ case SpvOpLoopMerge:
+ case SpvOpSelectionMerge:
+ /* This is handled by cfg pre-pass and walk_blocks */
+ break;
+
+ case SpvOpUndef: {
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
+ val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+ break;
+ }
+
+ case SpvOpExtInst:
+ vtn_handle_extension(b, opcode, w, count);
+ break;
+
+ case SpvOpVariable:
+ case SpvOpLoad:
+ case SpvOpStore:
+ case SpvOpCopyMemory:
+ case SpvOpCopyMemorySized:
+ case SpvOpAccessChain:
+ case SpvOpInBoundsAccessChain:
+ case SpvOpArrayLength:
+ vtn_handle_variables(b, opcode, w, count);
+ break;
+
+ case SpvOpFunctionCall:
+ vtn_handle_function_call(b, opcode, w, count);
+ break;
+
+ case SpvOpSampledImage:
+ case SpvOpImage:
+ case SpvOpImageSampleImplicitLod:
+ case SpvOpImageSampleExplicitLod:
+ case SpvOpImageSampleDrefImplicitLod:
+ case SpvOpImageSampleDrefExplicitLod:
+ case SpvOpImageSampleProjImplicitLod:
+ case SpvOpImageSampleProjExplicitLod:
+ case SpvOpImageSampleProjDrefImplicitLod:
+ case SpvOpImageSampleProjDrefExplicitLod:
+ case SpvOpImageFetch:
+ case SpvOpImageGather:
+ case SpvOpImageDrefGather:
+ case SpvOpImageQuerySizeLod:
+ case SpvOpImageQueryLod:
+ case SpvOpImageQueryLevels:
+ case SpvOpImageQuerySamples:
+ vtn_handle_texture(b, opcode, w, count);
+ break;
+
+ case SpvOpImageRead:
+ case SpvOpImageWrite:
+ case SpvOpImageTexelPointer:
+ vtn_handle_image(b, opcode, w, count);
+ break;
+
+ case SpvOpImageQuerySize: {
+ struct vtn_access_chain *image =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+ if (glsl_type_is_image(image->var->var->interface_type)) {
+ vtn_handle_image(b, opcode, w, count);
+ } else {
+ vtn_handle_texture(b, opcode, w, count);
+ }
+ break;
+ }
+
+ case SpvOpAtomicExchange:
+ case SpvOpAtomicCompareExchange:
+ case SpvOpAtomicCompareExchangeWeak:
+ case SpvOpAtomicIIncrement:
+ case SpvOpAtomicIDecrement:
+ case SpvOpAtomicIAdd:
+ case SpvOpAtomicISub:
+ case SpvOpAtomicSMin:
+ case SpvOpAtomicUMin:
+ case SpvOpAtomicSMax:
+ case SpvOpAtomicUMax:
+ case SpvOpAtomicAnd:
+ case SpvOpAtomicOr:
+ case SpvOpAtomicXor: {
+ struct vtn_value *pointer = vtn_untyped_value(b, w[3]);
+ if (pointer->value_type == vtn_value_type_image_pointer) {
+ vtn_handle_image(b, opcode, w, count);
+ } else {
+ assert(pointer->value_type == vtn_value_type_access_chain);
+ vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count);
+ }
+ break;
+ }
+
+ case SpvOpSNegate:
+ case SpvOpFNegate:
+ case SpvOpNot:
+ case SpvOpAny:
+ case SpvOpAll:
+ case SpvOpConvertFToU:
+ case SpvOpConvertFToS:
+ case SpvOpConvertSToF:
+ case SpvOpConvertUToF:
+ case SpvOpUConvert:
+ case SpvOpSConvert:
+ case SpvOpFConvert:
+ case SpvOpQuantizeToF16:
+ case SpvOpConvertPtrToU:
+ case SpvOpConvertUToPtr:
+ case SpvOpPtrCastToGeneric:
+ case SpvOpGenericCastToPtr:
+ case SpvOpBitcast:
+ case SpvOpIsNan:
+ case SpvOpIsInf:
+ case SpvOpIsFinite:
+ case SpvOpIsNormal:
+ case SpvOpSignBitSet:
+ case SpvOpLessOrGreater:
+ case SpvOpOrdered:
+ case SpvOpUnordered:
+ case SpvOpIAdd:
+ case SpvOpFAdd:
+ case SpvOpISub:
+ case SpvOpFSub:
+ case SpvOpIMul:
+ case SpvOpFMul:
+ case SpvOpUDiv:
+ case SpvOpSDiv:
+ case SpvOpFDiv:
+ case SpvOpUMod:
+ case SpvOpSRem:
+ case SpvOpSMod:
+ case SpvOpFRem:
+ case SpvOpFMod:
+ case SpvOpVectorTimesScalar:
+ case SpvOpDot:
+ case SpvOpIAddCarry:
+ case SpvOpISubBorrow:
+ case SpvOpUMulExtended:
+ case SpvOpSMulExtended:
+ case SpvOpShiftRightLogical:
+ case SpvOpShiftRightArithmetic:
+ case SpvOpShiftLeftLogical:
+ case SpvOpLogicalEqual:
+ case SpvOpLogicalNotEqual:
+ case SpvOpLogicalOr:
+ case SpvOpLogicalAnd:
+ case SpvOpLogicalNot:
+ case SpvOpBitwiseOr:
+ case SpvOpBitwiseXor:
+ case SpvOpBitwiseAnd:
+ case SpvOpSelect:
+ case SpvOpIEqual:
+ case SpvOpFOrdEqual:
+ case SpvOpFUnordEqual:
+ case SpvOpINotEqual:
+ case SpvOpFOrdNotEqual:
+ case SpvOpFUnordNotEqual:
+ case SpvOpULessThan:
+ case SpvOpSLessThan:
+ case SpvOpFOrdLessThan:
+ case SpvOpFUnordLessThan:
+ case SpvOpUGreaterThan:
+ case SpvOpSGreaterThan:
+ case SpvOpFOrdGreaterThan:
+ case SpvOpFUnordGreaterThan:
+ case SpvOpULessThanEqual:
+ case SpvOpSLessThanEqual:
+ case SpvOpFOrdLessThanEqual:
+ case SpvOpFUnordLessThanEqual:
+ case SpvOpUGreaterThanEqual:
+ case SpvOpSGreaterThanEqual:
+ case SpvOpFOrdGreaterThanEqual:
+ case SpvOpFUnordGreaterThanEqual:
+ case SpvOpDPdx:
+ case SpvOpDPdy:
+ case SpvOpFwidth:
+ case SpvOpDPdxFine:
+ case SpvOpDPdyFine:
+ case SpvOpFwidthFine:
+ case SpvOpDPdxCoarse:
+ case SpvOpDPdyCoarse:
+ case SpvOpFwidthCoarse:
+ case SpvOpBitFieldInsert:
+ case SpvOpBitFieldSExtract:
+ case SpvOpBitFieldUExtract:
+ case SpvOpBitReverse:
+ case SpvOpBitCount:
+ case SpvOpTranspose:
+ case SpvOpOuterProduct:
+ case SpvOpMatrixTimesScalar:
+ case SpvOpVectorTimesMatrix:
+ case SpvOpMatrixTimesVector:
+ case SpvOpMatrixTimesMatrix:
+ vtn_handle_alu(b, opcode, w, count);
+ break;
+
+ case SpvOpVectorExtractDynamic:
+ case SpvOpVectorInsertDynamic:
+ case SpvOpVectorShuffle:
+ case SpvOpCompositeConstruct:
+ case SpvOpCompositeExtract:
+ case SpvOpCompositeInsert:
+ case SpvOpCopyObject:
+ vtn_handle_composite(b, opcode, w, count);
+ break;
+
+ case SpvOpEmitVertex:
+ case SpvOpEndPrimitive:
+ case SpvOpEmitStreamVertex:
+ case SpvOpEndStreamPrimitive:
+ case SpvOpControlBarrier:
+ case SpvOpMemoryBarrier:
+ vtn_handle_barrier(b, opcode, w, count);
+ break;
+
+ default:
+ unreachable("Unhandled opcode");
+ }
+
+ return true;
+}
+
+nir_function *
+spirv_to_nir(const uint32_t *words, size_t word_count,
+ struct nir_spirv_specialization *spec, unsigned num_spec,
+ gl_shader_stage stage, const char *entry_point_name,
+ const nir_shader_compiler_options *options)
+{
+ const uint32_t *word_end = words + word_count;
+
+ /* Handle the SPIR-V header (first 4 dwords) */
+ assert(word_count > 5);
+
+ assert(words[0] == SpvMagicNumber);
+ assert(words[1] >= 0x10000);
+ /* words[2] == generator magic */
+ unsigned value_id_bound = words[3];
+ assert(words[4] == 0);
+
+ words+= 5;
+
+ /* Initialize the stn_builder object */
+ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder);
+ b->value_id_bound = value_id_bound;
+ b->values = rzalloc_array(b, struct vtn_value, value_id_bound);
+ exec_list_make_empty(&b->functions);
+ b->entry_point_stage = stage;
+ b->entry_point_name = entry_point_name;
+
+ /* Handle all the preamble instructions */
+ words = vtn_foreach_instruction(b, words, word_end,
+ vtn_handle_preamble_instruction);
+
+ if (b->entry_point == NULL) {
+ assert(!"Entry point not found");
+ ralloc_free(b);
+ return NULL;
+ }
+
+ b->shader = nir_shader_create(NULL, stage, options);
+
+ /* Parse execution modes */
+ vtn_foreach_execution_mode(b, b->entry_point,
+ vtn_handle_execution_mode, NULL);
+
+ b->specializations = spec;
+ b->num_specializations = num_spec;
+
+ /* Handle all variable, type, and constant instructions */
+ words = vtn_foreach_instruction(b, words, word_end,
+ vtn_handle_variable_or_type_instruction);
+
+ vtn_build_cfg(b, words, word_end);
+
+ foreach_list_typed(struct vtn_function, func, node, &b->functions) {
+ b->impl = func->impl;
+ b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ vtn_function_emit(b, func, vtn_handle_body_instruction);
+ }
+
+ assert(b->entry_point->value_type == vtn_value_type_function);
+ nir_function *entry_point = b->entry_point->func->impl->function;
+ assert(entry_point);
+
+ ralloc_free(b);
+
+ return entry_point;
+}
--- /dev/null
- glsl_get_vector_elements(val->ssa->type), val->name);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "vtn_private.h"
+#include "GLSL.std.450.h"
+
+#define M_PIf ((float) M_PI)
+#define M_PI_2f ((float) M_PI_2)
+#define M_PI_4f ((float) M_PI_4)
+
+static nir_ssa_def *
+build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
+{
+ unsigned swiz[4] = {1, 0, 0, 0};
+ nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
+ return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
+}
+
+static nir_ssa_def *
+build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
+{
+ unsigned yzx[4] = {1, 2, 0, 0};
+ unsigned zxy[4] = {2, 0, 1, 0};
+
+ nir_ssa_def *prod0 =
+ nir_fmul(b, col[0],
+ nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
+ nir_swizzle(b, col[2], zxy, 3, true)));
+ nir_ssa_def *prod1 =
+ nir_fmul(b, col[0],
+ nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
+ nir_swizzle(b, col[2], yzx, 3, true)));
+
+ nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
+
+ return nir_fadd(b, nir_channel(b, diff, 0),
+ nir_fadd(b, nir_channel(b, diff, 1),
+ nir_channel(b, diff, 2)));
+}
+
+static nir_ssa_def *
+build_mat4_det(nir_builder *b, nir_ssa_def **col)
+{
+ nir_ssa_def *subdet[4];
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned swiz[3];
+ for (unsigned j = 0; j < 3; j++)
+ swiz[j] = j + (j >= i);
+
+ nir_ssa_def *subcol[3];
+ subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
+ subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
+ subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
+
+ subdet[i] = build_mat3_det(b, subcol);
+ }
+
+ nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
+
+ return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
+ nir_channel(b, prod, 1)),
+ nir_fsub(b, nir_channel(b, prod, 2),
+ nir_channel(b, prod, 3)));
+}
+
+static nir_ssa_def *
+build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
+{
+ unsigned size = glsl_get_vector_elements(src->type);
+
+ nir_ssa_def *cols[4];
+ for (unsigned i = 0; i < size; i++)
+ cols[i] = src->elems[i]->def;
+
+ switch(size) {
+ case 2: return build_mat2_det(&b->nb, cols);
+ case 3: return build_mat3_det(&b->nb, cols);
+ case 4: return build_mat4_det(&b->nb, cols);
+ default:
+ unreachable("Invalid matrix size");
+ }
+}
+
+/* Computes the determinate of the submatrix given by taking src and
+ * removing the specified row and column.
+ */
+static nir_ssa_def *
+build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
+ unsigned size, unsigned row, unsigned col)
+{
+ assert(row < size && col < size);
+ if (size == 2) {
+ return nir_channel(b, src->elems[1 - col]->def, 1 - row);
+ } else {
+ /* Swizzle to get all but the specified row */
+ unsigned swiz[3];
+ for (unsigned j = 0; j < 3; j++)
+ swiz[j] = j + (j >= row);
+
+ /* Grab all but the specified column */
+ nir_ssa_def *subcol[3];
+ for (unsigned j = 0; j < size; j++) {
+ if (j != col) {
+ subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
+ swiz, size - 1, true);
+ }
+ }
+
+ if (size == 3) {
+ return build_mat2_det(b, subcol);
+ } else {
+ assert(size == 4);
+ return build_mat3_det(b, subcol);
+ }
+ }
+}
+
+static struct vtn_ssa_value *
+matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
+{
+ nir_ssa_def *adj_col[4];
+ unsigned size = glsl_get_vector_elements(src->type);
+
+ /* Build up an adjugate matrix */
+ for (unsigned c = 0; c < size; c++) {
+ nir_ssa_def *elem[4];
+ for (unsigned r = 0; r < size; r++) {
+ elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
+
+ if ((r + c) % 2)
+ elem[r] = nir_fneg(&b->nb, elem[r]);
+ }
+
+ adj_col[c] = nir_vec(&b->nb, elem, size);
+ }
+
+ nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
+
+ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
+ for (unsigned i = 0; i < size; i++)
+ val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
+
+ return val;
+}
+
+static nir_ssa_def*
+build_length(nir_builder *b, nir_ssa_def *vec)
+{
+ switch (vec->num_components) {
+ case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
+ case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
+ case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
+ case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
+ default:
+ unreachable("Invalid number of components");
+ }
+}
+
+static inline nir_ssa_def *
+build_fclamp(nir_builder *b,
+ nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
+{
+ return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
+}
+
+/**
+ * Return e^x.
+ */
+static nir_ssa_def *
+build_exp(nir_builder *b, nir_ssa_def *x)
+{
+ return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
+}
+
+/**
+ * Return ln(x) - the natural logarithm of x.
+ */
+static nir_ssa_def *
+build_log(nir_builder *b, nir_ssa_def *x)
+{
+ return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
+}
+
+/**
+ * Approximate asin(x) by the formula:
+ * asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
+ *
+ * which is correct to first order at x=0 and x=±1 regardless of the p
+ * coefficients but can be made second-order correct at both ends by selecting
+ * the fit coefficients appropriately. Different p coefficients can be used
+ * in the asin and acos implementation to minimize some relative error metric
+ * in each case.
+ */
+static nir_ssa_def *
+build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
+{
+ nir_ssa_def *abs_x = nir_fabs(b, x);
+ return nir_fmul(b, nir_fsign(b, x),
+ nir_fsub(b, nir_imm_float(b, M_PI_2f),
+ nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
+ nir_fadd(b, nir_imm_float(b, M_PI_2f),
+ nir_fmul(b, abs_x,
+ nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
+ nir_fmul(b, abs_x,
+ nir_fadd(b, nir_imm_float(b, p0),
+ nir_fmul(b, abs_x,
+ nir_imm_float(b, p1))))))))));
+}
+
+/**
+ * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
+ */
+static nir_ssa_def *
+build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
+{
+ nir_ssa_def *accum = xs[0];
+
+ for (int i = 1; i < terms; i++)
+ accum = nir_fadd(b, accum, xs[i]);
+
+ return accum;
+}
+
+static nir_ssa_def *
+build_atan(nir_builder *b, nir_ssa_def *y_over_x)
+{
+ nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
+ nir_ssa_def *one = nir_imm_float(b, 1.0f);
+
+ /*
+ * range-reduction, first step:
+ *
+ * / y_over_x if |y_over_x| <= 1.0;
+ * x = <
+ * \ 1.0 / y_over_x otherwise
+ */
+ nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
+ nir_fmax(b, abs_y_over_x, one));
+
+ /*
+ * approximate atan by evaluating polynomial:
+ *
+ * x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
+ * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
+ * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
+ */
+ nir_ssa_def *x_2 = nir_fmul(b, x, x);
+ nir_ssa_def *x_3 = nir_fmul(b, x_2, x);
+ nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2);
+ nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2);
+ nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2);
+ nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
+
+ nir_ssa_def *polynomial_terms[] = {
+ nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)),
+ nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)),
+ nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)),
+ nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)),
+ nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)),
+ nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
+ };
+
+ nir_ssa_def *tmp =
+ build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
+
+ /* range-reduction fixup */
+ tmp = nir_fadd(b, tmp,
+ nir_fmul(b,
+ nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
+ nir_fadd(b, nir_fmul(b, tmp,
+ nir_imm_float(b, -2.0f)),
+ nir_imm_float(b, M_PI_2f))));
+
+ /* sign fixup */
+ return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
+}
+
+static nir_ssa_def *
+build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
+{
+ nir_ssa_def *zero = nir_imm_float(b, 0.0f);
+
+ /* If |x| >= 1.0e-8 * |y|: */
+ nir_ssa_def *condition =
+ nir_fge(b, nir_fabs(b, x),
+ nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
+
+ /* Then...call atan(y/x) and fix it up: */
+ nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
+ nir_ssa_def *r_then =
+ nir_bcsel(b, nir_flt(b, x, zero),
+ nir_fadd(b, atan1,
+ nir_bcsel(b, nir_fge(b, y, zero),
+ nir_imm_float(b, M_PIf),
+ nir_imm_float(b, -M_PIf))),
+ atan1);
+
+ /* Else... */
+ nir_ssa_def *r_else =
+ nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
+
+ return nir_bcsel(b, condition, r_then, r_else);
+}
+
+static nir_ssa_def *
+build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
+{
+ nir_ssa_def *abs_x = nir_fabs(b, x);
+ nir_ssa_def *zero = nir_imm_float(b, 0.0f);
+
+ /* Single-precision floating-point values are stored as
+ * 1 sign bit;
+ * 8 exponent bits;
+ * 23 mantissa bits.
+ *
+ * An exponent shift of 23 will shift the mantissa out, leaving only the
+ * exponent and sign bit (which itself may be zero, if the absolute value
+ * was taken before the bitcast and shift.
+ */
+ nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
+ nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
+
+ nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
+
+ /* Exponent of floating-point values in the range [0.5, 1.0). */
+ nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
+
+ nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
+
+ *exponent =
+ nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
+ nir_bcsel(b, is_not_zero, exponent_bias, zero));
+
+ return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
+ nir_bcsel(b, is_not_zero, exponent_value, zero));
+}
+
+static void
+handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
+ const uint32_t *w, unsigned count)
+{
+ struct nir_builder *nb = &b->nb;
+ const struct glsl_type *dest_type =
+ vtn_value(b, w[1], vtn_value_type_type)->type->type;
+
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = vtn_create_ssa_value(b, dest_type);
+
+ /* Collect the various SSA sources */
+ unsigned num_inputs = count - 5;
+ nir_ssa_def *src[3];
+ for (unsigned i = 0; i < num_inputs; i++)
+ src[i] = vtn_ssa_value(b, w[i + 5])->def;
+
+ nir_op op;
+ switch (entrypoint) {
+ case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */
+ case GLSLstd450RoundEven: op = nir_op_fround_even; break;
+ case GLSLstd450Trunc: op = nir_op_ftrunc; break;
+ case GLSLstd450FAbs: op = nir_op_fabs; break;
+ case GLSLstd450SAbs: op = nir_op_iabs; break;
+ case GLSLstd450FSign: op = nir_op_fsign; break;
+ case GLSLstd450SSign: op = nir_op_isign; break;
+ case GLSLstd450Floor: op = nir_op_ffloor; break;
+ case GLSLstd450Ceil: op = nir_op_fceil; break;
+ case GLSLstd450Fract: op = nir_op_ffract; break;
+ case GLSLstd450Radians:
+ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
+ return;
+ case GLSLstd450Degrees:
+ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
+ return;
+ case GLSLstd450Sin: op = nir_op_fsin; break;
+ case GLSLstd450Cos: op = nir_op_fcos; break;
+ case GLSLstd450Tan:
+ val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
+ nir_fcos(nb, src[0]));
+ return;
+ case GLSLstd450Pow: op = nir_op_fpow; break;
+ case GLSLstd450Exp2: op = nir_op_fexp2; break;
+ case GLSLstd450Log2: op = nir_op_flog2; break;
+ case GLSLstd450Sqrt: op = nir_op_fsqrt; break;
+ case GLSLstd450InverseSqrt: op = nir_op_frsq; break;
+
+ case GLSLstd450Modf: {
+ nir_ssa_def *sign = nir_fsign(nb, src[0]);
+ nir_ssa_def *abs = nir_fabs(nb, src[0]);
+ val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
+ nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
+ return;
+ }
+
+ case GLSLstd450ModfStruct: {
+ nir_ssa_def *sign = nir_fsign(nb, src[0]);
+ nir_ssa_def *abs = nir_fabs(nb, src[0]);
+ assert(glsl_type_is_struct(val->ssa->type));
+ val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
+ val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
+ return;
+ }
+
+ case GLSLstd450FMin: op = nir_op_fmin; break;
+ case GLSLstd450UMin: op = nir_op_umin; break;
+ case GLSLstd450SMin: op = nir_op_imin; break;
+ case GLSLstd450FMax: op = nir_op_fmax; break;
+ case GLSLstd450UMax: op = nir_op_umax; break;
+ case GLSLstd450SMax: op = nir_op_imax; break;
+ case GLSLstd450FMix: op = nir_op_flrp; break;
+ case GLSLstd450Step:
+ val->ssa->def = nir_sge(nb, src[1], src[0]);
+ return;
+
+ case GLSLstd450Fma: op = nir_op_ffma; break;
+ case GLSLstd450Ldexp: op = nir_op_ldexp; break;
+
+ /* Packing/Unpacking functions */
+ case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break;
+ case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break;
+ case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break;
+ case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break;
+ case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break;
+ case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break;
+ case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break;
+ case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break;
+ case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break;
+ case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break;
+
+ case GLSLstd450Length:
+ val->ssa->def = build_length(nb, src[0]);
+ return;
+ case GLSLstd450Distance:
+ val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
+ return;
+ case GLSLstd450Normalize:
+ val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
+ return;
+
+ case GLSLstd450Exp:
+ val->ssa->def = build_exp(nb, src[0]);
+ return;
+
+ case GLSLstd450Log:
+ val->ssa->def = build_log(nb, src[0]);
+ return;
+
+ case GLSLstd450FClamp:
+ val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
+ return;
+ case GLSLstd450UClamp:
+ val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
+ return;
+ case GLSLstd450SClamp:
+ val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
+ return;
+
+ case GLSLstd450Cross: {
+ unsigned yzx[4] = { 1, 2, 0, 0 };
+ unsigned zxy[4] = { 2, 0, 1, 0 };
+ val->ssa->def =
+ nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
+ nir_swizzle(nb, src[1], zxy, 3, true)),
+ nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
+ nir_swizzle(nb, src[1], yzx, 3, true)));
+ return;
+ }
+
+ case GLSLstd450SmoothStep: {
+ /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
+ nir_ssa_def *t =
+ build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
+ nir_fsub(nb, src[1], src[0])),
+ nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
+ /* result = t * t * (3 - 2 * t) */
+ val->ssa->def =
+ nir_fmul(nb, t, nir_fmul(nb, t,
+ nir_fsub(nb, nir_imm_float(nb, 3.0),
+ nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
+ return;
+ }
+
+ case GLSLstd450FaceForward:
+ val->ssa->def =
+ nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
+ nir_imm_float(nb, 0.0)),
+ src[0], nir_fneg(nb, src[0]));
+ return;
+
+ case GLSLstd450Reflect:
+ /* I - 2 * dot(N, I) * N */
+ val->ssa->def =
+ nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
+ nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
+ src[1])));
+ return;
+
+ case GLSLstd450Refract: {
+ nir_ssa_def *I = src[0];
+ nir_ssa_def *N = src[1];
+ nir_ssa_def *eta = src[2];
+ nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
+ nir_ssa_def *one = nir_imm_float(nb, 1.0);
+ nir_ssa_def *zero = nir_imm_float(nb, 0.0);
+ /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
+ nir_ssa_def *k =
+ nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
+ nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
+ nir_ssa_def *result =
+ nir_fsub(nb, nir_fmul(nb, eta, I),
+ nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
+ nir_fsqrt(nb, k)), N));
+ /* XXX: bcsel, or if statement? */
+ val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
+ return;
+ }
+
+ case GLSLstd450Sinh:
+ /* 0.5 * (e^x - e^(-x)) */
+ val->ssa->def =
+ nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ nir_fsub(nb, build_exp(nb, src[0]),
+ build_exp(nb, nir_fneg(nb, src[0]))));
+ return;
+
+ case GLSLstd450Cosh:
+ /* 0.5 * (e^x + e^(-x)) */
+ val->ssa->def =
+ nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ nir_fadd(nb, build_exp(nb, src[0]),
+ build_exp(nb, nir_fneg(nb, src[0]))));
+ return;
+
+ case GLSLstd450Tanh:
+ /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
+ val->ssa->def =
+ nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ nir_fsub(nb, build_exp(nb, src[0]),
+ build_exp(nb, nir_fneg(nb, src[0])))),
+ nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ nir_fadd(nb, build_exp(nb, src[0]),
+ build_exp(nb, nir_fneg(nb, src[0])))));
+ return;
+
+ case GLSLstd450Asinh:
+ val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
+ build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
+ nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
+ nir_imm_float(nb, 1.0f))))));
+ return;
+ case GLSLstd450Acosh:
+ val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
+ nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
+ nir_imm_float(nb, 1.0f)))));
+ return;
+ case GLSLstd450Atanh: {
+ nir_ssa_def *one = nir_imm_float(nb, 1.0);
+ val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
+ build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
+ nir_fsub(nb, one, src[0]))));
+ return;
+ }
+
+ case GLSLstd450FindILsb: op = nir_op_find_lsb; break;
+ case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break;
+ case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break;
+
+ case GLSLstd450Asin:
+ val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
+ return;
+
+ case GLSLstd450Acos:
+ val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
+ build_asin(nb, src[0], 0.08132463, -0.02363318));
+ return;
+
+ case GLSLstd450Atan:
+ val->ssa->def = build_atan(nb, src[0]);
+ return;
+
+ case GLSLstd450Atan2:
+ val->ssa->def = build_atan2(nb, src[0], src[1]);
+ return;
+
+ case GLSLstd450Frexp: {
+ nir_ssa_def *exponent;
+ val->ssa->def = build_frexp(nb, src[0], &exponent);
+ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
+ return;
+ }
+
+ case GLSLstd450FrexpStruct: {
+ assert(glsl_type_is_struct(val->ssa->type));
+ val->ssa->elems[0]->def = build_frexp(nb, src[0],
+ &val->ssa->elems[1]->def);
+ return;
+ }
+
+ case GLSLstd450PackDouble2x32:
+ case GLSLstd450UnpackDouble2x32:
+ default:
+ unreachable("Unhandled opcode");
+ }
+
+ nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
++ glsl_get_vector_elements(val->ssa->type),
++ glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
++ val->name);
+ instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
+ val->ssa->def = &instr->dest.dest.ssa;
+
+ for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
+ instr->src[i].src = nir_src_for_ssa(src[i]);
+
+ nir_builder_instr_insert(nb, &instr->instr);
+}
+
+bool
+vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch ((enum GLSLstd450)ext_opcode) {
+ case GLSLstd450Determinant: {
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = rzalloc(b, struct vtn_ssa_value);
+ val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
+ val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
+ break;
+ }
+
+ case GLSLstd450MatrixInverse: {
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
+ break;
+ }
+
+ case GLSLstd450InterpolateAtCentroid:
+ case GLSLstd450InterpolateAtSample:
+ case GLSLstd450InterpolateAtOffset:
+ unreachable("Unhandled opcode");
+
+ default:
+ handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
+ }
+
+ return true;
+}
--- /dev/null
- intrin->num_components, NULL);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "vtn_private.h"
+
+static struct vtn_access_chain *
+vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
+ unsigned new_ids)
+{
+ struct vtn_access_chain *chain;
+
+ unsigned new_len = old->length + new_ids;
+ chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0]));
+
+ chain->var = old->var;
+ chain->length = new_len;
+
+ for (unsigned i = 0; i < old->length; i++)
+ chain->link[i] = old->link[i];
+
+ return chain;
+}
+
+static nir_ssa_def *
+vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
+ unsigned stride)
+{
+ assert(stride > 0);
+ if (link.mode == vtn_access_mode_literal) {
+ return nir_imm_int(&b->nb, link.id * stride);
+ } else if (stride == 1) {
+ return vtn_ssa_value(b, link.id)->def;
+ } else {
+ return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
+ nir_imm_int(&b->nb, stride));
+ }
+}
+
+static struct vtn_type *
+vtn_access_chain_tail_type(struct vtn_builder *b,
+ struct vtn_access_chain *chain)
+{
+ struct vtn_type *type = chain->var->type;
+ for (unsigned i = 0; i < chain->length; i++) {
+ if (glsl_type_is_struct(type->type)) {
+ assert(chain->link[i].mode == vtn_access_mode_literal);
+ type = type->members[chain->link[i].id];
+ } else {
+ type = type->array_element;
+ }
+ }
+ return type;
+}
+
+/* Crawls a chain of array derefs and rewrites the types so that the
+ * lengths stay the same but the terminal type is the one given by
+ * tail_type. This is useful for split structures.
+ */
+static void
+rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
+{
+ deref->type = type;
+ if (deref->child) {
+ assert(deref->child->deref_type == nir_deref_type_array);
+ assert(glsl_type_is_array(deref->type));
+ rewrite_deref_types(deref->child, glsl_get_array_element(type));
+ }
+}
+
+nir_deref_var *
+vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
+{
+ nir_deref_var *deref_var;
+ if (chain->var->var) {
+ deref_var = nir_deref_var_create(b, chain->var->var);
+ } else {
+ assert(chain->var->members);
+ /* Create the deref_var manually. It will get filled out later. */
+ deref_var = rzalloc(b, nir_deref_var);
+ deref_var->deref.deref_type = nir_deref_type_var;
+ }
+
+ struct vtn_type *deref_type = chain->var->type;
+ nir_deref *tail = &deref_var->deref;
+ nir_variable **members = chain->var->members;
+
+ for (unsigned i = 0; i < chain->length; i++) {
+ enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_ARRAY: {
+ deref_type = deref_type->array_element;
+
+ nir_deref_array *deref_arr = nir_deref_array_create(b);
+ deref_arr->deref.type = deref_type->type;
+
+ if (chain->link[i].mode == vtn_access_mode_literal) {
+ deref_arr->deref_array_type = nir_deref_array_type_direct;
+ deref_arr->base_offset = chain->link[i].id;
+ } else {
+ assert(chain->link[i].mode == vtn_access_mode_id);
+ deref_arr->deref_array_type = nir_deref_array_type_indirect;
+ deref_arr->base_offset = 0;
+ deref_arr->indirect =
+ nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def);
+ }
+ tail->child = &deref_arr->deref;
+ tail = tail->child;
+ break;
+ }
+
+ case GLSL_TYPE_STRUCT: {
+ assert(chain->link[i].mode == vtn_access_mode_literal);
+ unsigned idx = chain->link[i].id;
+ deref_type = deref_type->members[idx];
+ if (members) {
+ /* This is a pre-split structure. */
+ deref_var->var = members[idx];
+ rewrite_deref_types(&deref_var->deref, members[idx]->type);
+ assert(tail->type == deref_type->type);
+ members = NULL;
+ } else {
+ nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx);
+ deref_struct->deref.type = deref_type->type;
+ tail->child = &deref_struct->deref;
+ tail = tail->child;
+ }
+ break;
+ }
+ default:
+ unreachable("Invalid type for deref");
+ }
+ }
+
+ assert(members == NULL);
+ return deref_var;
+}
+
+static void
+_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
+ nir_deref *tail, struct vtn_ssa_value *inout)
+{
+ /* The deref tail may contain a deref to select a component of a vector (in
+ * other words, it might not be an actual tail) so we have to save it away
+ * here since we overwrite it later.
+ */
+ nir_deref *old_child = tail->child;
+
+ if (glsl_type_is_vector_or_scalar(tail->type)) {
+ /* Terminate the deref chain in case there is one more link to pick
+ * off a component of the vector.
+ */
+ tail->child = NULL;
+
+ nir_intrinsic_op op = load ? nir_intrinsic_load_var :
+ nir_intrinsic_store_var;
+
+ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op);
+ intrin->variables[0] =
+ nir_deref_as_var(nir_copy_deref(intrin, &deref->deref));
+ intrin->num_components = glsl_get_vector_elements(tail->type);
+
+ if (load) {
+ nir_ssa_dest_init(&intrin->instr, &intrin->dest,
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++ intrin->num_components,
++ glsl_get_bit_size(glsl_get_base_type(tail->type)),
++ NULL);
+ inout->def = &intrin->dest.ssa;
+ } else {
+ nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
+ intrin->src[0] = nir_src_for_ssa(inout->def);
+ }
+
+ nir_builder_instr_insert(&b->nb, &intrin->instr);
+ } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY ||
+ glsl_type_is_matrix(tail->type)) {
+ unsigned elems = glsl_get_length(tail->type);
+ nir_deref_array *deref_arr = nir_deref_array_create(b);
+ deref_arr->deref_array_type = nir_deref_array_type_direct;
+ deref_arr->deref.type = glsl_get_array_element(tail->type);
+ tail->child = &deref_arr->deref;
+ for (unsigned i = 0; i < elems; i++) {
+ deref_arr->base_offset = i;
+ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
+ }
+ } else {
+ assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT);
+ unsigned elems = glsl_get_length(tail->type);
+ nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0);
+ tail->child = &deref_struct->deref;
+ for (unsigned i = 0; i < elems; i++) {
+ deref_struct->index = i;
+ deref_struct->deref.type = glsl_get_struct_field(tail->type, i);
+ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]);
+ }
+ }
+
+ tail->child = old_child;
+}
+
+nir_deref_var *
+vtn_nir_deref(struct vtn_builder *b, uint32_t id)
+{
+ struct vtn_access_chain *chain =
+ vtn_value(b, id, vtn_value_type_access_chain)->access_chain;
+
+ return vtn_access_chain_to_deref(b, chain);
+}
+
+/*
+ * Gets the NIR-level deref tail, which may have as a child an array deref
+ * selecting which component due to OpAccessChain supporting per-component
+ * indexing in SPIR-V.
+ */
+static nir_deref *
+get_deref_tail(nir_deref_var *deref)
+{
+ nir_deref *cur = &deref->deref;
+ while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child)
+ cur = cur->child;
+
+ return cur;
+}
+
+struct vtn_ssa_value *
+vtn_local_load(struct vtn_builder *b, nir_deref_var *src)
+{
+ nir_deref *src_tail = get_deref_tail(src);
+ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
+ _vtn_local_load_store(b, true, src, src_tail, val);
+
+ if (src_tail->child) {
+ nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child);
+ assert(vec_deref->deref.child == NULL);
+ val->type = vec_deref->deref.type;
+ if (vec_deref->deref_array_type == nir_deref_array_type_direct)
+ val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset);
+ else
+ val->def = vtn_vector_extract_dynamic(b, val->def,
+ vec_deref->indirect.ssa);
+ }
+
+ return val;
+}
+
+void
+vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ nir_deref_var *dest)
+{
+ nir_deref *dest_tail = get_deref_tail(dest);
+
+ if (dest_tail->child) {
+ struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
+ _vtn_local_load_store(b, true, dest, dest_tail, val);
+ nir_deref_array *deref = nir_deref_as_array(dest_tail->child);
+ assert(deref->deref.child == NULL);
+ if (deref->deref_array_type == nir_deref_array_type_direct)
+ val->def = vtn_vector_insert(b, val->def, src->def,
+ deref->base_offset);
+ else
+ val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
+ deref->indirect.ssa);
+ _vtn_local_load_store(b, false, dest, dest_tail, val);
+ } else {
+ _vtn_local_load_store(b, false, dest, dest_tail, src);
+ }
+}
+
+static nir_ssa_def *
+get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
+ struct vtn_type **type, unsigned *chain_idx)
+{
+ /* Push constants have no explicit binding */
+ if (chain->var->mode == vtn_variable_mode_push_constant) {
+ *chain_idx = 0;
+ *type = chain->var->type;
+ return NULL;
+ }
+
+ nir_ssa_def *array_index;
+ if (glsl_type_is_array(chain->var->type->type)) {
+ assert(chain->length > 0);
+ array_index = vtn_access_link_as_ssa(b, chain->link[0], 1);
+ *chain_idx = 1;
+ *type = chain->var->type->array_element;
+ } else {
+ array_index = nir_imm_int(&b->nb, 0);
+ *chain_idx = 0;
+ *type = chain->var->type;
+ }
+
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(b->nb.shader,
+ nir_intrinsic_vulkan_resource_index);
+ instr->src[0] = nir_src_for_ssa(array_index);
+ nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
+ nir_intrinsic_set_binding(instr, chain->var->binding);
+
- instr->num_components, NULL);
++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+ nir_builder_instr_insert(&b->nb, &instr->instr);
+
+ return &instr->dest.ssa;
+}
+
+nir_ssa_def *
+vtn_access_chain_to_offset(struct vtn_builder *b,
+ struct vtn_access_chain *chain,
+ nir_ssa_def **index_out, struct vtn_type **type_out,
+ unsigned *end_idx_out, bool stop_at_matrix)
+{
+ unsigned idx = 0;
+ struct vtn_type *type;
+ *index_out = get_vulkan_resource_index(b, chain, &type, &idx);
+
+ nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
+ for (; idx < chain->length; idx++) {
+ enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ /* Some users may not want matrix or vector derefs */
+ if (stop_at_matrix)
+ goto end;
+ /* Fall through */
+
+ case GLSL_TYPE_ARRAY:
+ offset = nir_iadd(&b->nb, offset,
+ vtn_access_link_as_ssa(b, chain->link[idx],
+ type->stride));
+
+ type = type->array_element;
+ break;
+
+ case GLSL_TYPE_STRUCT: {
+ assert(chain->link[idx].mode == vtn_access_mode_literal);
+ unsigned member = chain->link[idx].id;
+ offset = nir_iadd(&b->nb, offset,
+ nir_imm_int(&b->nb, type->offsets[member]));
+ type = type->members[member];
+ break;
+ }
+
+ default:
+ unreachable("Invalid type for deref");
+ }
+ }
+
+end:
+ *type_out = type;
+ if (end_idx_out)
+ *end_idx_out = idx;
+
+ return offset;
+}
+
+static void
+_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
+ nir_ssa_def *index, nir_ssa_def *offset,
+ struct vtn_ssa_value **inout, const struct glsl_type *type)
+{
+ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
+ instr->num_components = glsl_get_vector_elements(type);
+
+ int src = 0;
+ if (!load) {
+ nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
+ instr->src[src++] = nir_src_for_ssa((*inout)->def);
+ }
+
+ /* We set the base and size for push constant load to the entire push
+ * constant block for now.
+ */
+ if (op == nir_intrinsic_load_push_constant) {
+ nir_intrinsic_set_base(instr, 0);
+ nir_intrinsic_set_range(instr, 128);
+ }
+
+ if (index)
+ instr->src[src++] = nir_src_for_ssa(index);
+
+ instr->src[src++] = nir_src_for_ssa(offset);
+
+ if (load) {
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
++ instr->num_components,
++ glsl_get_bit_size(glsl_get_base_type(type)), NULL);
+ (*inout)->def = &instr->dest.ssa;
+ }
+
+ nir_builder_instr_insert(&b->nb, &instr->instr);
+
+ if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
+ (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
+}
+
+static void
+_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
+ nir_ssa_def *index, nir_ssa_def *offset,
+ struct vtn_access_chain *chain, unsigned chain_idx,
+ struct vtn_type *type, struct vtn_ssa_value **inout)
+{
+ if (chain && chain_idx >= chain->length)
+ chain = NULL;
+
+ if (load && chain == NULL && *inout == NULL)
+ *inout = vtn_create_ssa_value(b, type->type);
+
+ enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ /* This is where things get interesting. At this point, we've hit
+ * a vector, a scalar, or a matrix.
+ */
+ if (glsl_type_is_matrix(type->type)) {
+ if (chain == NULL) {
+ /* Loading the whole matrix */
+ struct vtn_ssa_value *transpose;
+ unsigned num_ops, vec_width;
+ if (type->row_major) {
+ num_ops = glsl_get_vector_elements(type->type);
+ vec_width = glsl_get_matrix_columns(type->type);
+ if (load) {
+ const struct glsl_type *transpose_type =
+ glsl_matrix_type(base_type, vec_width, num_ops);
+ *inout = vtn_create_ssa_value(b, transpose_type);
+ } else {
+ transpose = vtn_ssa_transpose(b, *inout);
+ inout = &transpose;
+ }
+ } else {
+ num_ops = glsl_get_matrix_columns(type->type);
+ vec_width = glsl_get_vector_elements(type->type);
+ }
+
+ for (unsigned i = 0; i < num_ops; i++) {
+ nir_ssa_def *elem_offset =
+ nir_iadd(&b->nb, offset,
+ nir_imm_int(&b->nb, i * type->stride));
+ _vtn_load_store_tail(b, op, load, index, elem_offset,
+ &(*inout)->elems[i],
+ glsl_vector_type(base_type, vec_width));
+ }
+
+ if (load && type->row_major)
+ *inout = vtn_ssa_transpose(b, *inout);
+ } else if (type->row_major) {
+ /* Row-major but with an access chiain. */
+ nir_ssa_def *col_offset =
+ vtn_access_link_as_ssa(b, chain->link[chain_idx],
+ type->array_element->stride);
+ offset = nir_iadd(&b->nb, offset, col_offset);
+
+ if (chain_idx + 1 < chain->length) {
+ /* Picking off a single element */
+ nir_ssa_def *row_offset =
+ vtn_access_link_as_ssa(b, chain->link[chain_idx + 1],
+ type->stride);
+ offset = nir_iadd(&b->nb, offset, row_offset);
+ if (load)
+ *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
+ _vtn_load_store_tail(b, op, load, index, offset, inout,
+ glsl_scalar_type(base_type));
+ } else {
+ /* Grabbing a column; picking one element off each row */
+ unsigned num_comps = glsl_get_vector_elements(type->type);
+ const struct glsl_type *column_type =
+ glsl_get_column_type(type->type);
+
+ nir_ssa_def *comps[4];
+ for (unsigned i = 0; i < num_comps; i++) {
+ nir_ssa_def *elem_offset =
+ nir_iadd(&b->nb, offset,
+ nir_imm_int(&b->nb, i * type->stride));
+
+ struct vtn_ssa_value *comp, temp_val;
+ if (!load) {
+ temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
+ temp_val.type = glsl_scalar_type(base_type);
+ }
+ comp = &temp_val;
+ _vtn_load_store_tail(b, op, load, index, elem_offset,
+ &comp, glsl_scalar_type(base_type));
+ comps[i] = comp->def;
+ }
+
+ if (load) {
+ if (*inout == NULL)
+ *inout = vtn_create_ssa_value(b, column_type);
+
+ (*inout)->def = nir_vec(&b->nb, comps, num_comps);
+ }
+ }
+ } else {
+ /* Column-major with a deref. Fall through to array case. */
+ nir_ssa_def *col_offset =
+ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
+ offset = nir_iadd(&b->nb, offset, col_offset);
+
+ _vtn_block_load_store(b, op, load, index, offset,
+ chain, chain_idx + 1,
+ type->array_element, inout);
+ }
+ } else if (chain == NULL) {
+ /* Single whole vector */
+ assert(glsl_type_is_vector_or_scalar(type->type));
+ _vtn_load_store_tail(b, op, load, index, offset, inout, type->type);
+ } else {
+ /* Single component of a vector. Fall through to array case. */
+ nir_ssa_def *elem_offset =
+ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride);
+ offset = nir_iadd(&b->nb, offset, elem_offset);
+
+ _vtn_block_load_store(b, op, load, index, offset, NULL, 0,
+ type->array_element, inout);
+ }
+ return;
+
+ case GLSL_TYPE_ARRAY: {
+ unsigned elems = glsl_get_length(type->type);
+ for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *elem_off =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
+ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
+ type->array_element, &(*inout)->elems[i]);
+ }
+ return;
+ }
+
+ case GLSL_TYPE_STRUCT: {
+ unsigned elems = glsl_get_length(type->type);
+ for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *elem_off =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
+ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0,
+ type->members[i], &(*inout)->elems[i]);
+ }
+ return;
+ }
+
+ default:
+ unreachable("Invalid block member type");
+ }
+}
+
+static struct vtn_ssa_value *
+vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src)
+{
+ nir_intrinsic_op op;
+ switch (src->var->mode) {
+ case vtn_variable_mode_ubo:
+ op = nir_intrinsic_load_ubo;
+ break;
+ case vtn_variable_mode_ssbo:
+ op = nir_intrinsic_load_ssbo;
+ break;
+ case vtn_variable_mode_push_constant:
+ op = nir_intrinsic_load_push_constant;
+ break;
+ default:
+ assert(!"Invalid block variable mode");
+ }
+
+ nir_ssa_def *offset, *index = NULL;
+ struct vtn_type *type;
+ unsigned chain_idx;
+ offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true);
+
+ struct vtn_ssa_value *value = NULL;
+ _vtn_block_load_store(b, op, true, index, offset,
+ src, chain_idx, type, &value);
+ return value;
+}
+
+static void
+vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ struct vtn_access_chain *dst)
+{
+ nir_ssa_def *offset, *index = NULL;
+ struct vtn_type *type;
+ unsigned chain_idx;
+ offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true);
+
+ _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
+ dst, chain_idx, type, &src);
+}
+
+static bool
+vtn_variable_is_external_block(struct vtn_variable *var)
+{
+ return var->mode == vtn_variable_mode_ssbo ||
+ var->mode == vtn_variable_mode_ubo ||
+ var->mode == vtn_variable_mode_push_constant;
+}
+
+static void
+_vtn_variable_load_store(struct vtn_builder *b, bool load,
+ struct vtn_access_chain *chain,
+ struct vtn_type *tail_type,
+ struct vtn_ssa_value **inout)
+{
+ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ /* At this point, we have a scalar, vector, or matrix so we know that
+ * there cannot be any structure splitting still in the way. By
+ * stopping at the matrix level rather than the vector level, we
+ * ensure that matrices get loaded in the optimal way even if they
+ * are storred row-major in a UBO.
+ */
+ if (load) {
+ *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain));
+ } else {
+ vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain));
+ }
+ return;
+
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT: {
+ struct vtn_access_chain *new_chain =
+ vtn_access_chain_extend(b, chain, 1);
+ new_chain->link[chain->length].mode = vtn_access_mode_literal;
+ unsigned elems = glsl_get_length(tail_type->type);
+ if (load) {
+ assert(*inout == NULL);
+ *inout = rzalloc(b, struct vtn_ssa_value);
+ (*inout)->type = tail_type->type;
+ (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems);
+ }
+ for (unsigned i = 0; i < elems; i++) {
+ new_chain->link[chain->length].id = i;
+ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
+ tail_type->array_element : tail_type->members[i];
+ _vtn_variable_load_store(b, load, new_chain, elem_type,
+ &(*inout)->elems[i]);
+ }
+ return;
+ }
+
+ default:
+ unreachable("Invalid access chain type");
+ }
+}
+
+struct vtn_ssa_value *
+vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src)
+{
+ if (vtn_variable_is_external_block(src->var)) {
+ return vtn_block_load(b, src);
+ } else {
+ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
+ struct vtn_ssa_value *val = NULL;
+ _vtn_variable_load_store(b, true, src, tail_type, &val);
+ return val;
+ }
+}
+
+void
+vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
+ struct vtn_access_chain *dest)
+{
+ if (vtn_variable_is_external_block(dest->var)) {
+ assert(dest->var->mode == vtn_variable_mode_ssbo);
+ vtn_block_store(b, src, dest);
+ } else {
+ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest);
+ _vtn_variable_load_store(b, false, dest, tail_type, &src);
+ }
+}
+
+static void
+_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
+ struct vtn_access_chain *src, struct vtn_type *tail_type)
+{
+ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ /* At this point, we have a scalar, vector, or matrix so we know that
+ * there cannot be any structure splitting still in the way. By
+ * stopping at the matrix level rather than the vector level, we
+ * ensure that matrices get loaded in the optimal way even if they
+ * are storred row-major in a UBO.
+ */
+ vtn_variable_store(b, vtn_variable_load(b, src), dest);
+ return;
+
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT: {
+ struct vtn_access_chain *new_src, *new_dest;
+ new_src = vtn_access_chain_extend(b, src, 1);
+ new_dest = vtn_access_chain_extend(b, dest, 1);
+ new_src->link[src->length].mode = vtn_access_mode_literal;
+ new_dest->link[dest->length].mode = vtn_access_mode_literal;
+ unsigned elems = glsl_get_length(tail_type->type);
+ for (unsigned i = 0; i < elems; i++) {
+ new_src->link[src->length].id = i;
+ new_dest->link[dest->length].id = i;
+ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ?
+ tail_type->array_element : tail_type->members[i];
+ _vtn_variable_copy(b, new_dest, new_src, elem_type);
+ }
+ return;
+ }
+
+ default:
+ unreachable("Invalid access chain type");
+ }
+}
+
+static void
+vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest,
+ struct vtn_access_chain *src)
+{
+ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src);
+ assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type);
+
+ /* TODO: At some point, we should add a special-case for when we can
+ * just emit a copy_var intrinsic.
+ */
+ _vtn_variable_copy(b, dest, src, tail_type);
+}
+
+static void
+set_mode_system_value(nir_variable_mode *mode)
+{
+ assert(*mode == nir_var_system_value || *mode == nir_var_shader_in);
+ *mode = nir_var_system_value;
+}
+
+static void
+vtn_get_builtin_location(struct vtn_builder *b,
+ SpvBuiltIn builtin, int *location,
+ nir_variable_mode *mode)
+{
+ switch (builtin) {
+ case SpvBuiltInPosition:
+ *location = VARYING_SLOT_POS;
+ break;
+ case SpvBuiltInPointSize:
+ *location = VARYING_SLOT_PSIZ;
+ break;
+ case SpvBuiltInClipDistance:
+ *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */
+ break;
+ case SpvBuiltInCullDistance:
+ /* XXX figure this out */
+ break;
+ case SpvBuiltInVertexIndex:
+ *location = SYSTEM_VALUE_VERTEX_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInVertexId:
+ /* Vulkan defines VertexID to be zero-based and reserves the new
+ * builtin keyword VertexIndex to indicate the non-zero-based value.
+ */
+ *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInInstanceIndex:
+ *location = SYSTEM_VALUE_INSTANCE_INDEX;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInInstanceId:
+ *location = SYSTEM_VALUE_INSTANCE_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInPrimitiveId:
+ *location = VARYING_SLOT_PRIMITIVE_ID;
+ *mode = nir_var_shader_out;
+ break;
+ case SpvBuiltInInvocationId:
+ *location = SYSTEM_VALUE_INVOCATION_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInLayer:
+ *location = VARYING_SLOT_LAYER;
+ *mode = nir_var_shader_out;
+ break;
+ case SpvBuiltInViewportIndex:
+ *location = VARYING_SLOT_VIEWPORT;
+ if (b->shader->stage == MESA_SHADER_GEOMETRY)
+ *mode = nir_var_shader_out;
+ else if (b->shader->stage == MESA_SHADER_FRAGMENT)
+ *mode = nir_var_shader_in;
+ else
+ unreachable("invalid stage for SpvBuiltInViewportIndex");
+ break;
+ case SpvBuiltInTessLevelOuter:
+ case SpvBuiltInTessLevelInner:
+ case SpvBuiltInTessCoord:
+ case SpvBuiltInPatchVertices:
+ unreachable("no tessellation support");
+ case SpvBuiltInFragCoord:
+ *location = VARYING_SLOT_POS;
+ assert(*mode == nir_var_shader_in);
+ break;
+ case SpvBuiltInPointCoord:
+ *location = VARYING_SLOT_PNTC;
+ assert(*mode == nir_var_shader_in);
+ break;
+ case SpvBuiltInFrontFacing:
+ *location = VARYING_SLOT_FACE;
+ assert(*mode == nir_var_shader_in);
+ break;
+ case SpvBuiltInSampleId:
+ *location = SYSTEM_VALUE_SAMPLE_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInSamplePosition:
+ *location = SYSTEM_VALUE_SAMPLE_POS;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInSampleMask:
+ *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInFragDepth:
+ *location = FRAG_RESULT_DEPTH;
+ assert(*mode == nir_var_shader_out);
+ break;
+ case SpvBuiltInNumWorkgroups:
+ *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInWorkgroupSize:
+ /* This should already be handled */
+ unreachable("unsupported builtin");
+ break;
+ case SpvBuiltInWorkgroupId:
+ *location = SYSTEM_VALUE_WORK_GROUP_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInLocalInvocationId:
+ *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInLocalInvocationIndex:
+ *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInGlobalInvocationId:
+ *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
+ set_mode_system_value(mode);
+ break;
+ case SpvBuiltInHelperInvocation:
+ default:
+ unreachable("unsupported builtin");
+ }
+}
+
+static void
+var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
+ const struct vtn_decoration *dec, void *void_var)
+{
+ struct vtn_variable *vtn_var = void_var;
+
+ /* Handle decorations that apply to a vtn_variable as a whole */
+ switch (dec->decoration) {
+ case SpvDecorationBinding:
+ vtn_var->binding = dec->literals[0];
+ return;
+ case SpvDecorationDescriptorSet:
+ vtn_var->descriptor_set = dec->literals[0];
+ return;
+
+ case SpvDecorationLocation: {
+ unsigned location = dec->literals[0];
+ bool is_vertex_input;
+ if (b->shader->stage == MESA_SHADER_FRAGMENT &&
+ vtn_var->mode == vtn_variable_mode_output) {
+ is_vertex_input = false;
+ location += FRAG_RESULT_DATA0;
+ } else if (b->shader->stage == MESA_SHADER_VERTEX &&
+ vtn_var->mode == vtn_variable_mode_input) {
+ is_vertex_input = true;
+ location += VERT_ATTRIB_GENERIC0;
+ } else if (vtn_var->mode == vtn_variable_mode_input ||
+ vtn_var->mode == vtn_variable_mode_output) {
+ is_vertex_input = false;
+ location += VARYING_SLOT_VAR0;
+ } else {
+ assert(!"Location must be on input or output variable");
+ }
+
+ if (vtn_var->var) {
+ vtn_var->var->data.location = location;
+ vtn_var->var->data.explicit_location = true;
+ } else {
+ assert(vtn_var->members);
+ unsigned length = glsl_get_length(vtn_var->type->type);
+ for (unsigned i = 0; i < length; i++) {
+ vtn_var->members[i]->data.location = location;
+ vtn_var->members[i]->data.explicit_location = true;
+ location +=
+ glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
+ is_vertex_input);
+ }
+ }
+ return;
+ }
+
+ default:
+ break;
+ }
+
+ /* Now we handle decorations that apply to a particular nir_variable */
+ nir_variable *nir_var = vtn_var->var;
+ if (val->value_type == vtn_value_type_access_chain) {
+ assert(val->access_chain->length == 0);
+ assert(val->access_chain->var == void_var);
+ assert(member == -1);
+ } else {
+ assert(val->value_type == vtn_value_type_type);
+ if (member != -1)
+ nir_var = vtn_var->members[member];
+ }
+
+ if (nir_var == NULL)
+ return;
+
+ switch (dec->decoration) {
+ case SpvDecorationRelaxedPrecision:
+ break; /* FIXME: Do nothing with this for now. */
+ case SpvDecorationNoPerspective:
+ nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ break;
+ case SpvDecorationFlat:
+ nir_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ break;
+ case SpvDecorationCentroid:
+ nir_var->data.centroid = true;
+ break;
+ case SpvDecorationSample:
+ nir_var->data.sample = true;
+ break;
+ case SpvDecorationInvariant:
+ nir_var->data.invariant = true;
+ break;
+ case SpvDecorationConstant:
+ assert(nir_var->constant_initializer != NULL);
+ nir_var->data.read_only = true;
+ break;
+ case SpvDecorationNonWritable:
+ nir_var->data.read_only = true;
+ break;
+ case SpvDecorationComponent:
+ nir_var->data.location_frac = dec->literals[0];
+ break;
+ case SpvDecorationIndex:
+ nir_var->data.explicit_index = true;
+ nir_var->data.index = dec->literals[0];
+ break;
+ case SpvDecorationBuiltIn: {
+ SpvBuiltIn builtin = dec->literals[0];
+
+ if (builtin == SpvBuiltInWorkgroupSize) {
+ /* This shouldn't be a builtin. It's actually a constant. */
+ nir_var->data.mode = nir_var_global;
+ nir_var->data.read_only = true;
+
+ nir_constant *c = rzalloc(nir_var, nir_constant);
+ c->value.u[0] = b->shader->info.cs.local_size[0];
+ c->value.u[1] = b->shader->info.cs.local_size[1];
+ c->value.u[2] = b->shader->info.cs.local_size[2];
+ nir_var->constant_initializer = c;
+ break;
+ }
+
+ nir_variable_mode mode = nir_var->data.mode;
+ vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode);
+ nir_var->data.explicit_location = true;
+ nir_var->data.mode = mode;
+
+ if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition)
+ nir_var->data.origin_upper_left = b->origin_upper_left;
+ break;
+ }
+ case SpvDecorationRowMajor:
+ case SpvDecorationColMajor:
+ case SpvDecorationGLSLShared:
+ case SpvDecorationPatch:
+ case SpvDecorationRestrict:
+ case SpvDecorationAliased:
+ case SpvDecorationVolatile:
+ case SpvDecorationCoherent:
+ case SpvDecorationNonReadable:
+ case SpvDecorationUniform:
+ /* This is really nice but we have no use for it right now. */
+ case SpvDecorationCPacked:
+ case SpvDecorationSaturatedConversion:
+ case SpvDecorationStream:
+ case SpvDecorationOffset:
+ case SpvDecorationXfbBuffer:
+ case SpvDecorationFuncParamAttr:
+ case SpvDecorationFPRoundingMode:
+ case SpvDecorationFPFastMathMode:
+ case SpvDecorationLinkageAttributes:
+ case SpvDecorationSpecId:
+ break;
+ default:
+ unreachable("Unhandled variable decoration");
+ }
+}
+
+/* Tries to compute the size of an interface block based on the strides and
+ * offsets that are provided to us in the SPIR-V source.
+ */
+static unsigned
+vtn_type_block_size(struct vtn_type *type)
+{
+ enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_DOUBLE: {
+ unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
+ glsl_get_matrix_columns(type->type);
+ if (cols > 1) {
+ assert(type->stride > 0);
+ return type->stride * cols;
+ } else if (base_type == GLSL_TYPE_DOUBLE) {
+ return glsl_get_vector_elements(type->type) * 8;
+ } else {
+ return glsl_get_vector_elements(type->type) * 4;
+ }
+ }
+
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE: {
+ unsigned size = 0;
+ unsigned num_fields = glsl_get_length(type->type);
+ for (unsigned f = 0; f < num_fields; f++) {
+ unsigned field_end = type->offsets[f] +
+ vtn_type_block_size(type->members[f]);
+ size = MAX2(size, field_end);
+ }
+ return size;
+ }
+
+ case GLSL_TYPE_ARRAY:
+ assert(type->stride > 0);
+ assert(glsl_get_length(type->type) > 0);
+ return type->stride * glsl_get_length(type->type);
+
+ default:
+ assert(!"Invalid block type");
+ return 0;
+ }
+}
+
+void
+vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
+ const uint32_t *w, unsigned count)
+{
+ switch (opcode) {
+ case SpvOpVariable: {
+ struct vtn_variable *var = rzalloc(b, struct vtn_variable);
+ var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+
+ var->chain.var = var;
+ var->chain.length = 0;
+
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ val->access_chain = &var->chain;
+
+ struct vtn_type *without_array = var->type;
+ while(glsl_type_is_array(without_array->type))
+ without_array = without_array->array_element;
+
+ nir_variable_mode nir_mode;
+ switch ((SpvStorageClass)w[3]) {
+ case SpvStorageClassUniform:
+ case SpvStorageClassUniformConstant:
+ if (without_array->block) {
+ var->mode = vtn_variable_mode_ubo;
+ b->shader->info.num_ubos++;
+ } else if (without_array->buffer_block) {
+ var->mode = vtn_variable_mode_ssbo;
+ b->shader->info.num_ssbos++;
+ } else if (glsl_type_is_image(without_array->type)) {
+ var->mode = vtn_variable_mode_image;
+ nir_mode = nir_var_uniform;
+ b->shader->info.num_images++;
+ } else if (glsl_type_is_sampler(without_array->type)) {
+ var->mode = vtn_variable_mode_sampler;
+ nir_mode = nir_var_uniform;
+ b->shader->info.num_textures++;
+ } else {
+ assert(!"Invalid uniform variable type");
+ }
+ break;
+ case SpvStorageClassPushConstant:
+ var->mode = vtn_variable_mode_push_constant;
+ assert(b->shader->num_uniforms == 0);
+ b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+ break;
+ case SpvStorageClassInput:
+ var->mode = vtn_variable_mode_input;
+ nir_mode = nir_var_shader_in;
+ break;
+ case SpvStorageClassOutput:
+ var->mode = vtn_variable_mode_output;
+ nir_mode = nir_var_shader_out;
+ break;
+ case SpvStorageClassPrivate:
+ var->mode = vtn_variable_mode_global;
+ nir_mode = nir_var_global;
+ break;
+ case SpvStorageClassFunction:
+ var->mode = vtn_variable_mode_local;
+ nir_mode = nir_var_local;
+ break;
+ case SpvStorageClassWorkgroup:
+ var->mode = vtn_variable_mode_workgroup;
+ nir_mode = nir_var_shared;
+ break;
+ case SpvStorageClassCrossWorkgroup:
+ case SpvStorageClassGeneric:
+ case SpvStorageClassAtomicCounter:
+ default:
+ unreachable("Unhandled variable storage class");
+ }
+
+ switch (var->mode) {
+ case vtn_variable_mode_local:
+ case vtn_variable_mode_global:
+ case vtn_variable_mode_image:
+ case vtn_variable_mode_sampler:
+ case vtn_variable_mode_workgroup:
+ /* For these, we create the variable normally */
+ var->var = rzalloc(b->shader, nir_variable);
+ var->var->name = ralloc_strdup(var->var, val->name);
+ var->var->type = var->type->type;
+ var->var->data.mode = nir_mode;
+
+ switch (var->mode) {
+ case vtn_variable_mode_image:
+ case vtn_variable_mode_sampler:
+ var->var->interface_type = without_array->type;
+ break;
+ default:
+ var->var->interface_type = NULL;
+ break;
+ }
+ break;
+
+ case vtn_variable_mode_input:
+ case vtn_variable_mode_output: {
+ /* For inputs and outputs, we immediately split structures. This
+ * is for a couple of reasons. For one, builtins may all come in
+ * a struct and we really want those split out into separate
+ * variables. For another, interpolation qualifiers can be
+ * applied to members of the top-level struct ane we need to be
+ * able to preserve that information.
+ */
+
+ int array_length = -1;
+ struct vtn_type *interface_type = var->type;
+ if (b->shader->stage == MESA_SHADER_GEOMETRY &&
+ glsl_type_is_array(var->type->type)) {
+ /* In Geometry shaders (and some tessellation), inputs come
+ * in per-vertex arrays. However, some builtins come in
+ * non-per-vertex, hence the need for the is_array check. In
+ * any case, there are no non-builtin arrays allowed so this
+ * check should be sufficient.
+ */
+ interface_type = var->type->array_element;
+ array_length = glsl_get_length(var->type->type);
+ }
+
+ if (glsl_type_is_struct(interface_type->type)) {
+ /* It's a struct. Split it. */
+ unsigned num_members = glsl_get_length(interface_type->type);
+ var->members = ralloc_array(b, nir_variable *, num_members);
+
+ for (unsigned i = 0; i < num_members; i++) {
+ const struct glsl_type *mtype = interface_type->members[i]->type;
+ if (array_length >= 0)
+ mtype = glsl_array_type(mtype, array_length);
+
+ var->members[i] = rzalloc(b->shader, nir_variable);
+ var->members[i]->name =
+ ralloc_asprintf(var->members[i], "%s.%d", val->name, i);
+ var->members[i]->type = mtype;
+ var->members[i]->interface_type =
+ interface_type->members[i]->type;
+ var->members[i]->data.mode = nir_mode;
+ }
+ } else {
+ var->var = rzalloc(b->shader, nir_variable);
+ var->var->name = ralloc_strdup(var->var, val->name);
+ var->var->type = var->type->type;
+ var->var->interface_type = interface_type->type;
+ var->var->data.mode = nir_mode;
+ }
+
+ /* For inputs and outputs, we need to grab locations and builtin
+ * information from the interface type.
+ */
+ vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var);
+ break;
+
+ case vtn_variable_mode_param:
+ unreachable("Not created through OpVariable");
+ }
+
+ case vtn_variable_mode_ubo:
+ case vtn_variable_mode_ssbo:
+ case vtn_variable_mode_push_constant:
+ /* These don't need actual variables. */
+ break;
+ }
+
+ if (count > 4) {
+ assert(count == 5);
+ nir_constant *constant =
+ vtn_value(b, w[4], vtn_value_type_constant)->constant;
+ var->var->constant_initializer =
+ nir_constant_clone(constant, var->var);
+ }
+
+ vtn_foreach_decoration(b, val, var_decoration_cb, var);
+
+ if (var->mode == vtn_variable_mode_image ||
+ var->mode == vtn_variable_mode_sampler) {
+ /* XXX: We still need the binding information in the nir_variable
+ * for these. We should fix that.
+ */
+ var->var->data.binding = var->binding;
+ var->var->data.descriptor_set = var->descriptor_set;
+
+ if (var->mode == vtn_variable_mode_image)
+ var->var->data.image.format = without_array->image_format;
+ }
+
+ if (var->mode == vtn_variable_mode_local) {
+ assert(var->members == NULL && var->var != NULL);
+ nir_function_impl_add_variable(b->impl, var->var);
+ } else if (var->var) {
+ nir_shader_add_variable(b->shader, var->var);
+ } else if (var->members) {
+ unsigned count = glsl_get_length(without_array->type);
+ for (unsigned i = 0; i < count; i++) {
+ assert(var->members[i]->data.mode != nir_var_local);
+ nir_shader_add_variable(b->shader, var->members[i]);
+ }
+ } else {
+ assert(var->mode == vtn_variable_mode_ubo ||
+ var->mode == vtn_variable_mode_ssbo ||
+ var->mode == vtn_variable_mode_push_constant);
+ }
+ break;
+ }
+
+ case SpvOpAccessChain:
+ case SpvOpInBoundsAccessChain: {
+ struct vtn_access_chain *base, *chain;
+ struct vtn_value *base_val = vtn_untyped_value(b, w[3]);
+ if (base_val->value_type == vtn_value_type_sampled_image) {
+ /* This is rather insane. SPIR-V allows you to use OpSampledImage
+ * to combine an array of images with a single sampler to get an
+ * array of sampled images that all share the same sampler.
+ * Fortunately, this means that we can more-or-less ignore the
+ * sampler when crawling the access chain, but it does leave us
+ * with this rather awkward little special-case.
+ */
+ base = base_val->sampled_image->image;
+ } else {
+ assert(base_val->value_type == vtn_value_type_access_chain);
+ base = base_val->access_chain;
+ }
+
+ chain = vtn_access_chain_extend(b, base, count - 4);
+
+ unsigned idx = base->length;
+ for (int i = 4; i < count; i++) {
+ struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
+ if (link_val->value_type == vtn_value_type_constant) {
+ chain->link[idx].mode = vtn_access_mode_literal;
+ chain->link[idx].id = link_val->constant->value.u[0];
+ } else {
+ chain->link[idx].mode = vtn_access_mode_id;
+ chain->link[idx].id = w[i];
+ }
+ idx++;
+ }
+
+ if (base_val->value_type == vtn_value_type_sampled_image) {
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_sampled_image);
+ val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ val->sampled_image->image = chain;
+ val->sampled_image->sampler = base_val->sampled_image->sampler;
+ } else {
+ struct vtn_value *val =
+ vtn_push_value(b, w[2], vtn_value_type_access_chain);
+ val->access_chain = chain;
+ }
+ break;
+ }
+
+ case SpvOpCopyMemory: {
+ struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain);
+ struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain);
+
+ vtn_variable_copy(b, dest->access_chain, src->access_chain);
+ break;
+ }
+
+ case SpvOpLoad: {
+ struct vtn_access_chain *src =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+
+ if (src->var->mode == vtn_variable_mode_image ||
+ src->var->mode == vtn_variable_mode_sampler) {
+ vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src;
+ return;
+ }
+
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = vtn_variable_load(b, src);
+ break;
+ }
+
+ case SpvOpStore: {
+ struct vtn_access_chain *dest =
+ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
+ struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
+ vtn_variable_store(b, src, dest);
+ break;
+ }
+
+ case SpvOpArrayLength: {
+ struct vtn_access_chain *chain =
+ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
+
+ const uint32_t offset = chain->var->type->offsets[w[4]];
+ const uint32_t stride = chain->var->type->members[w[4]]->stride;
+
+ unsigned chain_idx;
+ struct vtn_type *type;
+ nir_ssa_def *index =
+ get_vulkan_resource_index(b, chain, &type, &chain_idx);
+
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(b->nb.shader,
+ nir_intrinsic_get_buffer_size);
+ instr->src[0] = nir_src_for_ssa(index);
++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+ nir_builder_instr_insert(&b->nb, &instr->instr);
+ nir_ssa_def *buf_size = &instr->dest.ssa;
+
+ /* array_length = max(buffer_size - offset, 0) / stride */
+ nir_ssa_def *array_length =
+ nir_idiv(&b->nb,
+ nir_imax(&b->nb,
+ nir_isub(&b->nb,
+ buf_size,
+ nir_imm_int(&b->nb, offset)),
+ nir_imm_int(&b->nb, 0u)),
+ nir_imm_int(&b->nb, stride));
+
+ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+ val->ssa = vtn_create_ssa_value(b, glsl_uint_type());
+ val->ssa->def = array_length;
+ break;
+ }
+
+ case SpvOpCopyMemorySized:
+ default:
+ unreachable("Unhandled opcode");
+ }
+}
--- /dev/null
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_meta.h"
+#include "nir/nir_builder.h"
+
+struct blit_region {
+ VkOffset3D src_offset;
+ VkExtent3D src_extent;
+ VkOffset3D dest_offset;
+ VkExtent3D dest_extent;
+};
+
+static nir_shader *
+build_nir_vertex_shader(void)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "a_pos");
+ pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
+ nir_copy_var(&b, pos_out, pos_in);
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "a_tex_pos");
+ tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
+ nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "v_tex_pos");
+ tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ nir_copy_var(&b, tex_pos_out, tex_pos_in);
+
+ return b.shader;
+}
+
+static nir_shader *
+build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ /* Swizzle the array index which comes in as Z coordinate into the right
+ * position.
+ */
+ unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
+ nir_ssa_def *const tex_pos =
+ nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
+ (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ glsl_get_base_type(vec4));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_tex;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->dest_type = nir_type_float; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+ tex->texture = nir_deref_var_create(tex, sampler);
+ tex->sampler = nir_deref_var_create(tex, sampler);
+
++ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+
+ return b.shader;
+}
+
+static void
+meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_saved_state *saved_state)
+{
+ anv_meta_save(saved_state, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT));
+}
+
+static void
+meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image *src_image,
+ struct anv_image_view *src_iview,
+ VkOffset3D src_offset,
+ VkExtent3D src_extent,
+ struct anv_image *dest_image,
+ struct anv_image_view *dest_iview,
+ VkOffset3D dest_offset,
+ VkExtent3D dest_extent,
+ VkFilter blit_filter)
+{
+ struct anv_device *device = cmd_buffer->device;
+
+ struct blit_vb_data {
+ float pos[2];
+ float tex_coord[3];
+ } *vb_data;
+
+ assert(src_image->samples == dest_image->samples);
+
+ unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+ struct anv_state vb_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+ vb_data[0] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x + dest_extent.width,
+ dest_offset.y + dest_extent.height,
+ },
+ .tex_coord = {
+ (float)(src_offset.x + src_extent.width)
+ / (float)src_iview->extent.width,
+ (float)(src_offset.y + src_extent.height)
+ / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ vb_data[1] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y + dest_extent.height,
+ },
+ .tex_coord = {
+ (float)src_offset.x / (float)src_iview->extent.width,
+ (float)(src_offset.y + src_extent.height) /
+ (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ vb_data[2] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y,
+ },
+ .tex_coord = {
+ (float)src_offset.x / (float)src_iview->extent.width,
+ (float)src_offset.y / (float)src_iview->extent.height,
+ (float)src_offset.z / (float)src_iview->extent.depth,
+ },
+ };
+
+ anv_state_clflush(vb_state);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = vb_size,
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = vb_state.offset,
+ };
+
+ anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ (VkBuffer[]) {
+ anv_buffer_to_handle(&vertex_buffer),
+ anv_buffer_to_handle(&vertex_buffer)
+ },
+ (VkDeviceSize[]) {
+ 0,
+ sizeof(struct anv_vue_header),
+ });
+
+ VkSampler sampler;
+ ANV_CALL(CreateSampler)(anv_device_to_handle(device),
+ &(VkSamplerCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = blit_filter,
+ .minFilter = blit_filter,
+ }, &cmd_buffer->pool->alloc, &sampler);
+
+ VkDescriptorPool desc_pool;
+ anv_CreateDescriptorPool(anv_device_to_handle(device),
+ &(const VkDescriptorPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = NULL,
+ .flags = 0,
+ .maxSets = 1,
+ .poolSizeCount = 1,
+ .pPoolSizes = (VkDescriptorPoolSize[]) {
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1
+ },
+ }
+ }, &cmd_buffer->pool->alloc, &desc_pool);
+
+ VkDescriptorSet set;
+ anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ &(VkDescriptorSetAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = desc_pool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &device->meta_state.blit.ds_layout
+ }, &set);
+
+ anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = sampler,
+ .imageView = anv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
+ VkFramebuffer fb;
+ anv_CreateFramebuffer(anv_device_to_handle(device),
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(dest_iview),
+ },
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .layers = 1
+ }, &cmd_buffer->pool->alloc, &fb);
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit.render_pass,
+ .framebuffer = fb,
+ .renderArea = {
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { dest_extent.width, dest_extent.height },
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ }, VK_SUBPASS_CONTENTS_INLINE);
+
+ VkPipeline pipeline;
+
+ switch (src_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ pipeline = device->meta_state.blit.pipeline_1d_src;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ pipeline = device->meta_state.blit.pipeline_2d_src;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ pipeline = device->meta_state.blit.pipeline_3d_src;
+ break;
+ default:
+ unreachable(!"bad VkImageType");
+ }
+
+ if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ }
+
+ anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport) {
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ });
+
+ anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit.pipeline_layout, 0, 1,
+ &set, 0, NULL);
+
+ ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ desc_pool, &cmd_buffer->pool->alloc);
+ anv_DestroySampler(anv_device_to_handle(device), sampler,
+ &cmd_buffer->pool->alloc);
+ anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ &cmd_buffer->pool->alloc);
+}
+
+static void
+meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_meta_saved_state *saved_state)
+{
+ anv_meta_restore(saved_state, cmd_buffer);
+}
+
+void anv_CmdBlitImage(
+ VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkImageBlit* pRegions,
+ VkFilter filter)
+
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_image, src_image, srcImage);
+ ANV_FROM_HANDLE(anv_image, dest_image, destImage);
+ struct anv_meta_saved_state saved_state;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * vkCmdBlitImage must not be used for multisampled source or
+ * destination images. Use vkCmdResolveImage for this purpose.
+ */
+ assert(src_image->samples == 1);
+ assert(dest_image->samples == 1);
+
+ meta_prepare_blit(cmd_buffer, &saved_state);
+
+ for (unsigned r = 0; r < regionCount; r++) {
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = srcImage,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = pRegions[r].srcSubresource.aspectMask,
+ .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
+
+ const VkOffset3D dest_offset = {
+ .x = pRegions[r].dstOffsets[0].x,
+ .y = pRegions[r].dstOffsets[0].y,
+ .z = 0,
+ };
+
+ if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+ pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+ pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+ pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+ anv_finishme("FINISHME: Allow flipping in blits");
+
+ const VkExtent3D dest_extent = {
+ .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+ .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+ };
+
+ const VkExtent3D src_extent = {
+ .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+ .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+ };
+
+ const uint32_t dest_array_slice =
+ anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
+ &pRegions[r].dstOffsets[0]);
+
+ if (pRegions[r].srcSubresource.layerCount > 1)
+ anv_finishme("FINISHME: copy multiple array layers");
+
+ if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
+ pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
+ anv_finishme("FINISHME: copy multiple depth layers");
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = destImage,
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = dest_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_array_slice,
+ .layerCount = 1
+ },
+ },
+ cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+
+ meta_emit_blit(cmd_buffer,
+ src_image, &src_iview,
+ pRegions[r].srcOffsets[0], src_extent,
+ dest_image, &dest_iview,
+ dest_offset, dest_extent,
+ filter);
+ }
+
+ meta_finish_blit(cmd_buffer, &saved_state);
+}
+
+void
+anv_device_finish_meta_blit_state(struct anv_device *device)
+{
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit.render_pass,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_1d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_2d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_3d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_layout,
+ &device->meta_state.alloc);
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit.ds_layout,
+ &device->meta_state.alloc);
+}
+
+VkResult
+anv_device_init_meta_blit_state(struct anv_device *device)
+{
+ VkResult result;
+
+ result = anv_CreateRenderPass(anv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* We don't use a vertex shader for blitting, but instead build and pass
+ * the VUEs directly to the rasterization backend. However, we do need
+ * to provide GLSL source for the vertex shader so that the compiler
+ * does not dead-code our inputs.
+ */
+ struct anv_shader_module vs = {
+ .nir = build_nir_vertex_shader(),
+ };
+
+ struct anv_shader_module fs_1d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
+ };
+
+ struct anv_shader_module fs_2d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ };
+
+ struct anv_shader_module fs_3d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
+ };
+
+ VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 2,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = 0,
+ .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+ },
+ {
+ .binding = 1,
+ .stride = 5 * sizeof(float),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = 0
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = 0
+ },
+ {
+ /* Texture Coordinate */
+ .location = 2,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32B32_SFLOAT,
+ .offset = 8
+ }
+ }
+ };
+
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL
+ },
+ }
+ };
+ result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ &ds_layout_info,
+ &device->meta_state.alloc,
+ &device->meta_state.blit.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail_render_pass;
+
+ result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit.ds_layout,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
+ if (result != VK_SUCCESS)
+ goto fail_descriptor_set_layout;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = anv_shader_module_to_handle(&vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ }, {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ },
+ };
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = &vi_create_info,
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ { .colorWriteMask =
+ VK_COLOR_COMPONENT_A_BIT |
+ VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT },
+ }
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit.pipeline_layout,
+ .renderPass = device->meta_state.blit.render_pass,
+ .subpass = 0,
+ };
+
+ const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ .color_attachment_count = -1,
+ .use_repclear = false,
+ .disable_viewport = true,
+ .disable_scissor = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ };
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_layout;
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_1d;
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_2d;
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_1d.nir);
+ ralloc_free(fs_2d.nir);
+ ralloc_free(fs_3d.nir);
+
+ return VK_SUCCESS;
+
+ fail_pipeline_2d:
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_2d_src,
+ &device->meta_state.alloc);
+
+ fail_pipeline_1d:
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_1d_src,
+ &device->meta_state.alloc);
+
+ fail_pipeline_layout:
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit.pipeline_layout,
+ &device->meta_state.alloc);
+ fail_descriptor_set_layout:
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit.ds_layout,
+ &device->meta_state.alloc);
+ fail_render_pass:
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit.render_pass,
+ &device->meta_state.alloc);
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_1d.nir);
+ ralloc_free(fs_2d.nir);
+ ralloc_free(fs_3d.nir);
+ fail:
+ return result;
+}
--- /dev/null
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_meta.h"
+#include "nir/nir_builder.h"
+
+static VkFormat
+vk_format_for_size(int bs)
+{
+ /* The choice of UNORM and UINT formats is very intentional here. Most of
+ * the time, we want to use a UINT format to avoid any rounding error in
+ * the blit. For stencil blits, R8_UINT is required by the hardware.
+ * (It's the only format allowed in conjunction with W-tiling.) Also we
+ * intentionally use the 4-channel formats whenever we can. This is so
+ * that, when we do a RGB <-> RGBX copy, the two formats will line up even
+ * though one of them is 3/4 the size of the other. The choice of UNORM
+ * vs. UINT is also very intentional because Haswell doesn't handle 8 or
+ * 16-bit RGB UINT formats at all so we have to use UNORM there.
+ * Fortunately, the only time we should ever use two different formats in
+ * the table below is for RGB -> RGBA blits and so we will never have any
+ * UNORM/UINT mismatch.
+ */
+ switch (bs) {
+ case 1: return VK_FORMAT_R8_UINT;
+ case 2: return VK_FORMAT_R8G8_UINT;
+ case 3: return VK_FORMAT_R8G8B8_UNORM;
+ case 4: return VK_FORMAT_R8G8B8A8_UNORM;
+ case 6: return VK_FORMAT_R16G16B16_UNORM;
+ case 8: return VK_FORMAT_R16G16B16A16_UNORM;
+ case 12: return VK_FORMAT_R32G32B32_UINT;
+ case 16: return VK_FORMAT_R32G32B32A32_UINT;
+ default:
+ unreachable("Invalid format block size");
+ }
+}
+
+static void
+meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image_view *src_iview,
+ VkOffset3D src_offset,
+ struct anv_image_view *dest_iview,
+ VkOffset3D dest_offset,
+ VkExtent3D extent)
+{
+ struct anv_device *device = cmd_buffer->device;
+
+ struct blit_vb_data {
+ float pos[2];
+ float tex_coord[3];
+ } *vb_data;
+
+ unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+ struct anv_state vb_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+ vb_data[0] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x + extent.width,
+ dest_offset.y + extent.height,
+ },
+ .tex_coord = {
+ src_offset.x + extent.width,
+ src_offset.y + extent.height,
+ src_offset.z,
+ },
+ };
+
+ vb_data[1] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y + extent.height,
+ },
+ .tex_coord = {
+ src_offset.x,
+ src_offset.y + extent.height,
+ src_offset.z,
+ },
+ };
+
+ vb_data[2] = (struct blit_vb_data) {
+ .pos = {
+ dest_offset.x,
+ dest_offset.y,
+ },
+ .tex_coord = {
+ src_offset.x,
+ src_offset.y,
+ src_offset.z,
+ },
+ };
+
+ anv_state_clflush(vb_state);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = vb_size,
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = vb_state.offset,
+ };
+
+ anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ (VkBuffer[]) {
+ anv_buffer_to_handle(&vertex_buffer),
+ anv_buffer_to_handle(&vertex_buffer)
+ },
+ (VkDeviceSize[]) {
+ 0,
+ sizeof(struct anv_vue_header),
+ });
+
+ VkDescriptorPool desc_pool;
+ anv_CreateDescriptorPool(anv_device_to_handle(device),
+ &(const VkDescriptorPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = NULL,
+ .flags = 0,
+ .maxSets = 1,
+ .poolSizeCount = 1,
+ .pPoolSizes = (VkDescriptorPoolSize[]) {
+ {
+ .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1
+ },
+ }
+ }, &cmd_buffer->pool->alloc, &desc_pool);
+
+ VkDescriptorSet set;
+ anv_AllocateDescriptorSets(anv_device_to_handle(device),
+ &(VkDescriptorSetAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = desc_pool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &device->meta_state.blit2d.ds_layout
+ }, &set);
+
+ anv_UpdateDescriptorSets(anv_device_to_handle(device),
+ 1, /* writeCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = NULL,
+ .imageView = anv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ }, 0, NULL);
+
+ VkFramebuffer fb;
+ anv_CreateFramebuffer(anv_device_to_handle(device),
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(dest_iview),
+ },
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .layers = 1
+ }, &cmd_buffer->pool->alloc, &fb);
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d.render_pass,
+ .framebuffer = fb,
+ .renderArea = {
+ .offset = { dest_offset.x, dest_offset.y },
+ .extent = { extent.width, extent.height },
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ }, VK_SUBPASS_CONTENTS_INLINE);
+
+ VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src;
+
+ if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
+ anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ }
+
+ anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport) {
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = dest_iview->extent.width,
+ .height = dest_iview->extent.height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ });
+
+ anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit2d.pipeline_layout, 0, 1,
+ &set, 0, NULL);
+
+ ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ desc_pool, &cmd_buffer->pool->alloc);
+ anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+ &cmd_buffer->pool->alloc);
+}
+
+void
+anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_saved_state *save)
+{
+ anv_meta_restore(save, cmd_buffer);
+}
+
+void
+anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_saved_state *save)
+{
+ anv_meta_save(save, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT));
+}
+
+void
+anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_blit2d_surf *src,
+ struct anv_meta_blit2d_surf *dst,
+ unsigned num_rects,
+ struct anv_meta_blit2d_rect *rects)
+{
+ VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+ VkFormat src_format = vk_format_for_size(src->bs);
+ VkFormat dst_format = vk_format_for_size(dst->bs);
+ VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
+ VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+
+ /* Create VkImages */
+ VkImageCreateInfo image_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = 0, /* TEMPLATE */
+ .extent = {
+ .width = 0, /* TEMPLATE */
+ .height = 0, /* TEMPLATE */
+ .depth = 1,
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = 1,
+ .tiling = 0, /* TEMPLATE */
+ .usage = 0, /* TEMPLATE */
+ };
+ struct anv_image_create_info anv_image_info = {
+ .vk_info = &image_info,
+ .isl_tiling_flags = 0, /* TEMPLATE */
+ };
+
+ /* The image height is the rect height + src/dst y-offset from the
+ * tile-aligned base address.
+ */
+ struct isl_tile_info tile_info;
+
+ anv_image_info.isl_tiling_flags = 1 << src->tiling;
+ image_info.tiling = src->tiling == ISL_TILING_LINEAR ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ image_info.usage = src_usage;
+ image_info.format = src_format,
+ isl_tiling_get_info(&cmd_buffer->device->isl_dev, src->tiling, src->bs,
+ &tile_info);
+ image_info.extent.height = rects[r].height +
+ rects[r].src_y % tile_info.height;
+ image_info.extent.width = src->pitch / src->bs;
+ VkImage src_image;
+ anv_image_create(vk_device, &anv_image_info,
+ &cmd_buffer->pool->alloc, &src_image);
+
+ anv_image_info.isl_tiling_flags = 1 << dst->tiling;
+ image_info.tiling = dst->tiling == ISL_TILING_LINEAR ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ image_info.usage = dst_usage;
+ image_info.format = dst_format,
+ isl_tiling_get_info(&cmd_buffer->device->isl_dev, dst->tiling, dst->bs,
+ &tile_info);
+ image_info.extent.height = rects[r].height +
+ rects[r].dst_y % tile_info.height;
+ image_info.extent.width = dst->pitch / dst->bs;
+ VkImage dst_image;
+ anv_image_create(vk_device, &anv_image_info,
+ &cmd_buffer->pool->alloc, &dst_image);
+
+ /* We could use a vk call to bind memory, but that would require
+ * creating a dummy memory object etc. so there's really no point.
+ */
+ anv_image_from_handle(src_image)->bo = src->bo;
+ anv_image_from_handle(src_image)->offset = src->base_offset;
+ anv_image_from_handle(dst_image)->bo = dst->bo;
+ anv_image_from_handle(dst_image)->offset = dst->base_offset;
+
+ /* Create VkImageViews */
+ VkImageViewCreateInfo iview_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = 0, /* TEMPLATE */
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = 0, /* TEMPLATE */
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1
+ },
+ };
+ uint32_t img_o = 0;
+
+ iview_info.image = src_image;
+ iview_info.format = src_format;
+ VkOffset3D src_offset_el = {0};
+ isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
+ &anv_image_from_handle(src_image)->
+ color_surface.isl,
+ rects[r].src_x,
+ rects[r].src_y,
+ &img_o,
+ (uint32_t*)&src_offset_el.x,
+ (uint32_t*)&src_offset_el.y);
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &iview_info, cmd_buffer, img_o, src_usage);
+
+ iview_info.image = dst_image;
+ iview_info.format = dst_format;
+ VkOffset3D dst_offset_el = {0};
+ isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
+ &anv_image_from_handle(dst_image)->
+ color_surface.isl,
+ rects[r].dst_x,
+ rects[r].dst_y,
+ &img_o,
+ (uint32_t*)&dst_offset_el.x,
+ (uint32_t*)&dst_offset_el.y);
+ struct anv_image_view dst_iview;
+ anv_image_view_init(&dst_iview, cmd_buffer->device,
+ &iview_info, cmd_buffer, img_o, dst_usage);
+
+ /* Perform blit */
+ meta_emit_blit2d(cmd_buffer,
+ &src_iview,
+ src_offset_el,
+ &dst_iview,
+ dst_offset_el,
+ (VkExtent3D){rects[r].width, rects[r].height, 1});
+
+ anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
+ anv_DestroyImage(vk_device, dst_image, &cmd_buffer->pool->alloc);
+ }
+}
+
+
+static nir_shader *
+build_nir_vertex_shader(void)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+
+ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "a_pos");
+ pos_in->data.location = VERT_ATTRIB_GENERIC0;
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
+ nir_copy_var(&b, pos_out, pos_in);
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec4, "a_tex_pos");
+ tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
+ nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "v_tex_pos");
+ tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ nir_copy_var(&b, tex_pos_out, tex_pos_in);
+
+ return b.shader;
+}
+
+static nir_shader *
+build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec3, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+ glsl_get_base_type(vec4));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+ sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->dest_type = nir_type_float; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+ tex->texture = nir_deref_var_create(tex, sampler);
+ tex->sampler = NULL;
+
++ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+
+ return b.shader;
+}
+
+void
+anv_device_finish_meta_blit2d_state(struct anv_device *device)
+{
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit2d.render_pass,
+ &device->meta_state.alloc);
+ anv_DestroyPipeline(anv_device_to_handle(device),
+ device->meta_state.blit2d.pipeline_2d_src,
+ &device->meta_state.alloc);
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit2d.pipeline_layout,
+ &device->meta_state.alloc);
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit2d.ds_layout,
+ &device->meta_state.alloc);
+}
+
+VkResult
+anv_device_init_meta_blit2d_state(struct anv_device *device)
+{
+ VkResult result;
+
+ result = anv_CreateRenderPass(anv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* We don't use a vertex shader for blitting, but instead build and pass
+ * the VUEs directly to the rasterization backend. However, we do need
+ * to provide GLSL source for the vertex shader so that the compiler
+ * does not dead-code our inputs.
+ */
+ struct anv_shader_module vs = {
+ .nir = build_nir_vertex_shader(),
+ };
+
+ struct anv_shader_module fs_2d = {
+ .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
+ };
+
+ VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 2,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = 0,
+ .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+ },
+ {
+ .binding = 1,
+ .stride = 5 * sizeof(float),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = 0
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = 0
+ },
+ {
+ /* Texture Coordinate */
+ .location = 2,
+ .binding = 1,
+ .format = VK_FORMAT_R32G32B32_SFLOAT,
+ .offset = 8
+ }
+ }
+ };
+
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL
+ },
+ }
+ };
+ result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+ &ds_layout_info,
+ &device->meta_state.alloc,
+ &device->meta_state.blit2d.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail_render_pass;
+
+ result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit2d.ds_layout,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout);
+ if (result != VK_SUCCESS)
+ goto fail_descriptor_set_layout;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = anv_shader_module_to_handle(&vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ }, {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+ .pName = "main",
+ .pSpecializationInfo = NULL
+ },
+ };
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = &vi_create_info,
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ { .colorWriteMask =
+ VK_COLOR_COMPONENT_A_BIT |
+ VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT },
+ }
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit2d.pipeline_layout,
+ .renderPass = device->meta_state.blit2d.render_pass,
+ .subpass = 0,
+ };
+
+ const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+ .color_attachment_count = -1,
+ .use_repclear = false,
+ .disable_viewport = true,
+ .disable_scissor = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ };
+
+ pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+ result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+ VK_NULL_HANDLE,
+ &vk_pipeline_info, &anv_pipeline_info,
+ &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src);
+ if (result != VK_SUCCESS)
+ goto fail_pipeline_layout;
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_2d.nir);
+
+ return VK_SUCCESS;
+
+ fail_pipeline_layout:
+ anv_DestroyPipelineLayout(anv_device_to_handle(device),
+ device->meta_state.blit2d.pipeline_layout,
+ &device->meta_state.alloc);
+ fail_descriptor_set_layout:
+ anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+ device->meta_state.blit2d.ds_layout,
+ &device->meta_state.alloc);
+ fail_render_pass:
+ anv_DestroyRenderPass(anv_device_to_handle(device),
+ device->meta_state.blit2d.render_pass,
+ &device->meta_state.alloc);
+
+ ralloc_free(vs.nir);
+ ralloc_free(fs_2d.nir);
+ fail:
+ return result;
+}
--- /dev/null
- nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "anv_meta.h"
+#include "anv_private.h"
+#include "nir/nir_builder.h"
+
+/**
+ * Vertex attributes used by all pipelines.
+ */
+struct vertex_attrs {
+ struct anv_vue_header vue_header;
+ float position[2]; /**< 3DPRIM_RECTLIST */
+ float tex_position[2];
+};
+
+static void
+meta_resolve_save(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_save(saved_state, cmd_buffer,
+ (1 << VK_DYNAMIC_STATE_VIEWPORT) |
+ (1 << VK_DYNAMIC_STATE_SCISSOR));
+
+ cmd_buffer->state.dynamic.viewport.count = 0;
+ cmd_buffer->state.dynamic.scissor.count = 0;
+}
+
+static void
+meta_resolve_restore(struct anv_meta_saved_state *saved_state,
+ struct anv_cmd_buffer *cmd_buffer)
+{
+ anv_meta_restore(saved_state, cmd_buffer);
+}
+
+static VkPipeline *
+get_pipeline_h(struct anv_device *device, uint32_t samples)
+{
+ uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
+
+ assert(samples >= 2);
+ assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
+
+ return &device->meta_state.resolve.pipelines[i];
+}
+
+static nir_shader *
+build_nir_vs(void)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+
+ nir_builder b;
+ nir_variable *a_position;
+ nir_variable *v_position;
+ nir_variable *a_tex_position;
+ nir_variable *v_tex_position;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+
+ a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "a_position");
+ a_position->data.location = VERT_ATTRIB_GENERIC0;
+
+ v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "gl_Position");
+ v_position->data.location = VARYING_SLOT_POS;
+
+ a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "a_tex_position");
+ a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
+
+ nir_copy_var(&b, v_position, a_position);
+ nir_copy_var(&b, v_tex_position, a_tex_position);
+
+ return b.shader;
+}
+
+static nir_shader *
+build_nir_fs(uint32_t num_samples)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+
+ const struct glsl_type *sampler2DMS =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+ /*is_shadow*/ false,
+ /*is_array*/ false,
+ GLSL_TYPE_FLOAT);
+
+ nir_builder b;
+ nir_variable *u_tex; /* uniform sampler */
+ nir_variable *v_position; /* vec4, varying fragment position */
+ nir_variable *v_tex_position; /* vec4, varying texture coordinate */
+ nir_variable *f_color; /* vec4, fragment output color */
+ nir_ssa_def *accum; /* vec4, accumulation of sample values */
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_asprintf(b.shader,
+ "meta_resolve_fs_samples%02d",
+ num_samples);
+
+ u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
+ "u_tex");
+ u_tex->data.descriptor_set = 0;
+ u_tex->data.binding = 0;
+
+ v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "v_position");
+ v_position->data.location = VARYING_SLOT_POS;
+ v_position->data.origin_upper_left = true;
+
+ v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
+ "v_tex_position");
+ v_tex_position->data.location = VARYING_SLOT_VAR0;
+
+ f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+ "f_color");
+ f_color->data.location = FRAG_RESULT_DATA0;
+
+ accum = nir_imm_vec4(&b, 0, 0, 0, 0);
+
+ nir_ssa_def *tex_position_ivec =
+ nir_f2i(&b, nir_load_var(&b, v_tex_position));
+
+ for (uint32_t i = 0; i < num_samples; ++i) {
+ nir_tex_instr *tex;
+
+ tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
+ tex->texture = nir_deref_var_create(tex, u_tex);
+ tex->sampler = nir_deref_var_create(tex, u_tex);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->dest_type = nir_type_float;
+ tex->is_array = false;
+ tex->coord_components = 3;
++ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ accum = nir_fadd(&b, accum, &tex->dest.ssa);
+ }
+
+ accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
+ nir_store_var(&b, f_color, accum, /*writemask*/ 4);
+
+ return b.shader;
+}
+
+static VkResult
+create_pass(struct anv_device *device)
+{
+ VkResult result;
+ VkDevice device_h = anv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ result = anv_CreateRenderPass(device_h,
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+ .samples = 1,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 0,
+ },
+ alloc,
+ &device->meta_state.resolve.pass);
+
+ return result;
+}
+
+static VkResult
+create_pipeline(struct anv_device *device,
+ uint32_t num_samples,
+ VkShaderModule vs_module_h)
+{
+ VkResult result;
+ VkDevice device_h = anv_device_to_handle(device);
+
+ struct anv_shader_module fs_module = {
+ .nir = build_nir_fs(num_samples),
+ };
+
+ if (!fs_module.nir) {
+ /* XXX: Need more accurate error */
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto cleanup;
+ }
+
+ result = anv_graphics_pipeline_create(device_h,
+ VK_NULL_HANDLE,
+ &(VkGraphicsPipelineCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = (VkPipelineShaderStageCreateInfo[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vs_module_h,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = anv_shader_module_to_handle(&fs_module),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+ {
+ .binding = 0,
+ .stride = sizeof(struct vertex_attrs),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+ },
+ },
+ .vertexAttributeDescriptionCount = 3,
+ .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+ {
+ /* VUE Header */
+ .location = 0,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32B32A32_UINT,
+ .offset = offsetof(struct vertex_attrs, vue_header),
+ },
+ {
+ /* Position */
+ .location = 1,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vertex_attrs, position),
+ },
+ {
+ /* Texture Coordinate */
+ .location = 2,
+ .binding = 0,
+ .format = VK_FORMAT_R32G32_SFLOAT,
+ .offset = offsetof(struct vertex_attrs, tex_position),
+ },
+ },
+ },
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = false,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]) { 0x1 },
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+ {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT |
+ VK_COLOR_COMPONENT_A_BIT,
+ },
+ },
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = device->meta_state.resolve.pipeline_layout,
+ .renderPass = device->meta_state.resolve.pass,
+ .subpass = 0,
+ },
+ &(struct anv_graphics_pipeline_create_info) {
+ .color_attachment_count = -1,
+ .use_repclear = false,
+ .disable_viewport = true,
+ .disable_scissor = true,
+ .disable_vs = true,
+ .use_rectlist = true
+ },
+ &device->meta_state.alloc,
+ get_pipeline_h(device, num_samples));
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ goto cleanup;
+
+cleanup:
+ ralloc_free(fs_module.nir);
+ return result;
+}
+
+void
+anv_device_finish_meta_resolve_state(struct anv_device *device)
+{
+ struct anv_meta_state *state = &device->meta_state;
+ VkDevice device_h = anv_device_to_handle(device);
+ VkRenderPass pass_h = device->meta_state.resolve.pass;
+ VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
+ VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ if (pass_h)
+ ANV_CALL(DestroyRenderPass)(device_h, pass_h,
+ &device->meta_state.alloc);
+
+ if (pipeline_layout_h)
+ ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
+
+ if (ds_layout_h)
+ ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
+ VkPipeline pipeline_h = state->resolve.pipelines[i];
+
+ if (pipeline_h) {
+ ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
+ }
+ }
+}
+
+VkResult
+anv_device_init_meta_resolve_state(struct anv_device *device)
+{
+ VkResult res = VK_SUCCESS;
+ VkDevice device_h = anv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+
+ const isl_sample_count_mask_t sample_count_mask =
+ isl_device_get_sample_counts(&device->isl_dev);
+
+ zero(device->meta_state.resolve);
+
+ struct anv_shader_module vs_module = { .nir = build_nir_vs() };
+ if (!vs_module.nir) {
+ /* XXX: Need more accurate error */
+ res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
+
+ res = anv_CreateDescriptorSetLayout(device_h,
+ &(VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ },
+ },
+ },
+ alloc,
+ &device->meta_state.resolve.ds_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = anv_CreatePipelineLayout(device_h,
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = (VkDescriptorSetLayout[]) {
+ device->meta_state.resolve.ds_layout,
+ },
+ },
+ alloc,
+ &device->meta_state.resolve.pipeline_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_pass(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0;
+ i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
+
+ uint32_t sample_count = 1 << (1 + i);
+ if (!(sample_count_mask & sample_count))
+ continue;
+
+ res = create_pipeline(device, sample_count, vs_module_h);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ goto cleanup;
+
+fail:
+ anv_device_finish_meta_resolve_state(device);
+
+cleanup:
+ ralloc_free(vs_module.nir);
+
+ return res;
+}
+
+static void
+emit_resolve(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_image_view *src_iview,
+ const VkOffset2D *src_offset,
+ struct anv_image_view *dest_iview,
+ const VkOffset2D *dest_offset,
+ const VkExtent2D *resolve_extent)
+{
+ struct anv_device *device = cmd_buffer->device;
+ VkDevice device_h = anv_device_to_handle(device);
+ VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
+ const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct anv_image *src_image = src_iview->image;
+
+ const struct vertex_attrs vertex_data[3] = {
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x + resolve_extent->width,
+ dest_offset->y + resolve_extent->height,
+ },
+ .tex_position = {
+ src_offset->x + resolve_extent->width,
+ src_offset->y + resolve_extent->height,
+ },
+ },
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x,
+ dest_offset->y + resolve_extent->height,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y + resolve_extent->height,
+ },
+ },
+ {
+ .vue_header = {0},
+ .position = {
+ dest_offset->x,
+ dest_offset->y,
+ },
+ .tex_position = {
+ src_offset->x,
+ src_offset->y,
+ },
+ },
+ };
+
+ struct anv_state vertex_mem =
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
+ sizeof(vertex_data), 16);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = sizeof(vertex_data),
+ .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
+ .offset = vertex_mem.offset,
+ };
+
+ VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
+
+ anv_CmdBindVertexBuffers(cmd_buffer_h,
+ /*firstBinding*/ 0,
+ /*bindingCount*/ 1,
+ (VkBuffer[]) { vertex_buffer_h },
+ (VkDeviceSize[]) { 0 });
+
+ VkSampler sampler_h;
+ ANV_CALL(CreateSampler)(device_h,
+ &(VkSamplerCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_NEAREST,
+ .minFilter = VK_FILTER_NEAREST,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .mipLodBias = 0.0,
+ .anisotropyEnable = false,
+ .compareEnable = false,
+ .minLod = 0.0,
+ .maxLod = 0.0,
+ .unnormalizedCoordinates = false,
+ },
+ &cmd_buffer->pool->alloc,
+ &sampler_h);
+
+ VkDescriptorPool desc_pool;
+ anv_CreateDescriptorPool(anv_device_to_handle(device),
+ &(const VkDescriptorPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .pNext = NULL,
+ .flags = 0,
+ .maxSets = 1,
+ .poolSizeCount = 1,
+ .pPoolSizes = (VkDescriptorPoolSize[]) {
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1
+ },
+ }
+ }, &cmd_buffer->pool->alloc, &desc_pool);
+
+ VkDescriptorSet desc_set_h;
+ anv_AllocateDescriptorSets(device_h,
+ &(VkDescriptorSetAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = desc_pool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = (VkDescriptorSetLayout[]) {
+ device->meta_state.resolve.ds_layout,
+ },
+ },
+ &desc_set_h);
+
+ anv_UpdateDescriptorSets(device_h,
+ /*writeCount*/ 1,
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = desc_set_h,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = sampler_h,
+ .imageView = anv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ },
+ },
+ },
+ /*copyCount*/ 0,
+ /*copies */ NULL);
+
+ ANV_CALL(CmdSetViewport)(cmd_buffer_h,
+ /*firstViewport*/ 0,
+ /*viewportCount*/ 1,
+ (VkViewport[]) {
+ {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
+ .minDepth = 0.0,
+ .maxDepth = 1.0,
+ },
+ });
+
+ ANV_CALL(CmdSetScissor)(cmd_buffer_h,
+ /*firstScissor*/ 0,
+ /*scissorCount*/ 1,
+ (VkRect2D[]) {
+ {
+ .offset = { 0, 0 },
+ .extent = (VkExtent2D) { fb->width, fb->height },
+ },
+ });
+
+ VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
+
+ if (cmd_buffer->state.pipeline != pipeline) {
+ anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline_h);
+ }
+
+ anv_CmdBindDescriptorSets(cmd_buffer_h,
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.resolve.pipeline_layout,
+ /*firstSet*/ 0,
+ /* setCount */ 1,
+ (VkDescriptorSet[]) {
+ desc_set_h,
+ },
+ /*copyCount*/ 0,
+ /*copies */ NULL);
+
+ ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
+
+ /* All objects below are consumed by the draw call. We may safely destroy
+ * them.
+ */
+ anv_DestroyDescriptorPool(anv_device_to_handle(device),
+ desc_pool, &cmd_buffer->pool->alloc);
+ anv_DestroySampler(device_h, sampler_h,
+ &cmd_buffer->pool->alloc);
+}
+
+void anv_CmdResolveImage(
+ VkCommandBuffer cmd_buffer_h,
+ VkImage src_image_h,
+ VkImageLayout src_image_layout,
+ VkImage dest_image_h,
+ VkImageLayout dest_image_layout,
+ uint32_t region_count,
+ const VkImageResolve* regions)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
+ ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
+ ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
+ struct anv_device *device = cmd_buffer->device;
+ struct anv_meta_saved_state state;
+ VkDevice device_h = anv_device_to_handle(device);
+
+ meta_resolve_save(&state, cmd_buffer);
+
+ assert(src_image->samples > 1);
+ assert(dest_image->samples == 1);
+
+ if (src_image->samples >= 16) {
+ /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
+ * glBlitFramebuffer workaround for samples >= 16.
+ */
+ anv_finishme("vkCmdResolveImage: need interpolation workaround when "
+ "samples >= 16");
+ }
+
+ if (src_image->array_size > 1)
+ anv_finishme("vkCmdResolveImage: multisample array images");
+
+ for (uint32_t r = 0; r < region_count; ++r) {
+ const VkImageResolve *region = ®ions[r];
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * - The aspectMask member of srcSubresource and dstSubresource must
+ * only contain VK_IMAGE_ASPECT_COLOR_BIT
+ *
+ * - The layerCount member of srcSubresource and dstSubresource must
+ * match
+ */
+ assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->srcSubresource.layerCount ==
+ region->dstSubresource.layerCount);
+
+ const uint32_t src_base_layer =
+ anv_meta_get_iview_layer(src_image, ®ion->srcSubresource,
+ ®ion->srcOffset);
+
+ const uint32_t dest_base_layer =
+ anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource,
+ ®ion->dstOffset);
+
+ /**
+ * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
+ *
+ * extent is the size in texels of the source image to resolve in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ *
+ * srcOffset and dstOffset select the initial x, y, and z offsets in
+ * texels of the sub-regions of the source and destination image data.
+ * extent is the size in texels of the source image to resolve in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ */
+ const struct VkExtent3D extent =
+ anv_sanitize_image_extent(src_image->type, region->extent);
+ const struct VkOffset3D srcOffset =
+ anv_sanitize_image_offset(src_image->type, region->srcOffset);
+ const struct VkOffset3D dstOffset =
+ anv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+
+ for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
+ ++layer) {
+
+ struct anv_image_view src_iview;
+ anv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = src_image_h,
+ .viewType = anv_meta_get_view_type(src_image),
+ .format = src_image->format->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0, VK_IMAGE_USAGE_SAMPLED_BIT);
+
+ struct anv_image_view dest_iview;
+ anv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = dest_image_h,
+ .viewType = anv_meta_get_view_type(dest_image),
+ .format = dest_image->format->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ cmd_buffer, 0, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+
+ VkFramebuffer fb_h;
+ anv_CreateFramebuffer(device_h,
+ &(VkFramebufferCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]) {
+ anv_image_view_to_handle(&dest_iview),
+ },
+ .width = anv_minify(dest_image->extent.width,
+ region->dstSubresource.mipLevel),
+ .height = anv_minify(dest_image->extent.height,
+ region->dstSubresource.mipLevel),
+ .layers = 1
+ },
+ &cmd_buffer->pool->alloc,
+ &fb_h);
+
+ ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.resolve.pass,
+ .framebuffer = fb_h,
+ .renderArea = {
+ .offset = {
+ dstOffset.x,
+ dstOffset.y,
+ },
+ .extent = {
+ extent.width,
+ extent.height,
+ }
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ emit_resolve(cmd_buffer,
+ &src_iview,
+ &(VkOffset2D) {
+ .x = srcOffset.x,
+ .y = srcOffset.y,
+ },
+ &dest_iview,
+ &(VkOffset2D) {
+ .x = dstOffset.x,
+ .y = dstOffset.y,
+ },
+ &(VkExtent2D) {
+ .width = extent.width,
+ .height = extent.height,
+ });
+
+ ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
+
+ anv_DestroyFramebuffer(device_h, fb_h,
+ &cmd_buffer->pool->alloc);
+ }
+ }
+
+ meta_resolve_restore(&state, cmd_buffer);
+}
+
+/**
+ * Emit any needed resolves for the current subpass.
+ */
+void
+anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ struct anv_subpass *subpass = cmd_buffer->state.subpass;
+ struct anv_meta_saved_state saved_state;
+
+ /* FINISHME(perf): Skip clears for resolve attachments.
+ *
+ * From the Vulkan 1.0 spec:
+ *
+ * If the first use of an attachment in a render pass is as a resolve
+ * attachment, then the loadOp is effectively ignored as the resolve is
+ * guaranteed to overwrite all pixels in the render area.
+ */
+
+ if (!subpass->has_resolve)
+ return;
+
+ meta_resolve_save(&saved_state, cmd_buffer);
+
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ uint32_t src_att = subpass->color_attachments[i];
+ uint32_t dest_att = subpass->resolve_attachments[i];
+
+ if (dest_att == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct anv_image_view *src_iview = fb->attachments[src_att];
+ struct anv_image_view *dest_iview = fb->attachments[dest_att];
+
+ struct anv_subpass resolve_subpass = {
+ .color_count = 1,
+ .color_attachments = (uint32_t[]) { dest_att },
+ .depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
+ };
+
+ anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+ /* Subpass resolves must respect the render area. We can ignore the
+ * render area here because vkCmdBeginRenderPass set the render area
+ * with 3DSTATE_DRAWING_RECTANGLE.
+ *
+ * XXX(chadv): Does the hardware really respect
+ * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+ */
+ emit_resolve(cmd_buffer,
+ src_iview,
+ &(VkOffset2D) { 0, 0 },
+ dest_iview,
+ &(VkOffset2D) { 0, 0 },
+ &(VkExtent2D) { fb->width, fb->height });
+ }
+
+ cmd_buffer->state.subpass = subpass;
+ meta_resolve_restore(&saved_state, cmd_buffer);
+}
--- /dev/null
- nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "nir/nir_builder.h"
+
+struct apply_dynamic_offsets_state {
+ nir_shader *shader;
+ nir_builder builder;
+
+ const struct anv_pipeline_layout *layout;
+
+ uint32_t indices_start;
+};
+
+static bool
+apply_dynamic_offsets_block(nir_block *block, void *void_state)
+{
+ struct apply_dynamic_offsets_state *state = void_state;
+ struct anv_descriptor_set_layout *set_layout;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ unsigned block_idx_src;
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ssbo:
+ block_idx_src = 0;
+ break;
+ case nir_intrinsic_store_ssbo:
+ block_idx_src = 1;
+ break;
+ default:
+ continue; /* the loop */
+ }
+
+ nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
+ assert(res_instr->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
+ assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+
+ unsigned set = res_intrin->const_index[0];
+ unsigned binding = res_intrin->const_index[1];
+
+ set_layout = state->layout->set[set].layout;
+ if (set_layout->binding[binding].dynamic_offset_index < 0)
+ continue;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ /* First, we need to generate the uniform load for the buffer offset */
+ uint32_t index = state->layout->set[set].dynamic_offset_start +
+ set_layout->binding[binding].dynamic_offset_index;
+
+ nir_intrinsic_instr *offset_load =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ offset_load->num_components = 2;
+ offset_load->const_index[0] = state->indices_start + index * 8;
+ offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
+ nir_imm_int(b, 8)));
+
- intrin->num_components, NULL);
++ nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
+ nir_builder_instr_insert(b, &offset_load->instr);
+
+ nir_src *offset_src = nir_get_io_offset_src(intrin);
+ nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
+ &offset_load->dest.ssa);
+
+ /* In order to avoid out-of-bounds access, we predicate */
+ nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
+ offset_src->ssa);
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(pred);
+ nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+
+ nir_instr_remove(&intrin->instr);
+ *offset_src = nir_src_for_ssa(new_offset);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
+
+ if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
+ /* It's a load, we need a phi node */
+ nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ nir_ssa_dest_init(&phi->instr, &phi->dest,
- (nir_const_value) { .u = { 0, 0, 0, 0 } });
++ intrin->num_components,
++ intrin->dest.ssa.bit_size, NULL);
+
+ nir_phi_src *src1 = ralloc(phi, nir_phi_src);
+ struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
+ src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
+ src1->src = nir_src_for_ssa(&intrin->dest.ssa);
+ exec_list_push_tail(&phi->srcs, &src1->node);
+
+ b->cursor = nir_after_cf_list(&if_stmt->else_list);
+ nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
++ (nir_const_value) { .u32 = { 0, 0, 0, 0 } });
+
+ nir_phi_src *src2 = ralloc(phi, nir_phi_src);
+ struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
+ src2->pred = exec_node_data(nir_block, enode, cf_node.node);
+ src2->src = nir_src_for_ssa(zero);
+ exec_list_push_tail(&phi->srcs, &src2->node);
+
+ assert(intrin->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&phi->dest.ssa));
+
+ nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
+ }
+ }
+
+ return true;
+}
+
+void
+anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ nir_shader *shader,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct apply_dynamic_offsets_state state = {
+ .shader = shader,
+ .layout = pipeline->layout,
+ .indices_start = shader->num_uniforms,
+ };
+
+ if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
+ return;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_builder_init(&state.builder, function->impl);
+ nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ struct anv_push_constants *null_data = NULL;
+ for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
+ prog_data->param[i * 2 + shader->num_uniforms / 4] =
+ (const union gl_constant_value *)&null_data->dynamic[i].offset;
+ prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] =
+ (const union gl_constant_value *)&null_data->dynamic[i].range;
+ }
+
+ shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
+}
--- /dev/null
- block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "program/prog_parameter.h"
+#include "nir/nir_builder.h"
+
+struct apply_pipeline_layout_state {
+ nir_shader *shader;
+ nir_builder builder;
+
+ struct {
+ BITSET_WORD *used;
+ uint8_t *surface_offsets;
+ uint8_t *sampler_offsets;
+ uint8_t *image_offsets;
+ } set[MAX_SETS];
+};
+
+static void
+add_binding(struct apply_pipeline_layout_state *state,
+ uint32_t set, uint32_t binding)
+{
+ BITSET_SET(state->set[set].used, binding);
+}
+
+static void
+add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
+{
+ add_binding(state, var->data.descriptor_set, var->data.binding);
+}
+
+static bool
+get_used_bindings_block(nir_block *block, void *void_state)
+{
+ struct apply_pipeline_layout_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_vulkan_resource_index:
+ add_binding(state, nir_intrinsic_desc_set(intrin),
+ nir_intrinsic_binding(intrin));
+ break;
+
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_image_samples:
+ add_var_binding(state, intrin->variables[0]->var);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ assert(tex->texture);
+ add_var_binding(state, tex->texture->var);
+ if (tex->sampler)
+ add_var_binding(state, tex->sampler->var);
+ break;
+ }
+ default:
+ continue;
+ }
+ }
+
+ return true;
+}
+
+static void
+lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
+ struct apply_pipeline_layout_state *state)
+{
+ nir_builder *b = &state->builder;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ uint32_t set = nir_intrinsic_desc_set(intrin);
+ uint32_t binding = nir_intrinsic_binding(intrin);
+
+ uint32_t surface_index = state->set[set].surface_offsets[binding];
+
+ nir_const_value *const_block_idx =
+ nir_src_as_const_value(intrin->src[0]);
+
+ nir_ssa_def *block_index;
+ if (const_block_idx) {
++ block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
+ } else {
+ block_index = nir_iadd(b, nir_imm_int(b, surface_index),
+ nir_ssa_for_src(b, intrin->src[0], 1));
+ }
+
+ assert(intrin->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
+ nir_instr_remove(&intrin->instr);
+}
+
+static void
+lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
+ unsigned *const_index, nir_tex_src_type src_type,
+ struct apply_pipeline_layout_state *state)
+{
+ if (deref->deref.child) {
+ assert(deref->deref.child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+
+ *const_index += deref_array->base_offset;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
+ tex->num_srcs + 1);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ new_srcs[i].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
+ }
+
+ ralloc_free(tex->src);
+ tex->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ tex->src[tex->num_srcs].src_type = src_type;
+ tex->num_srcs++;
+ assert(deref_array->indirect.is_ssa);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
+ deref_array->indirect);
+ }
+ }
+}
+
+static void
+cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
+{
+ if (deref->deref.child == NULL)
+ return;
+
+ nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+
+ if (deref_array->deref_array_type != nir_deref_array_type_indirect)
+ return;
+
+ nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
+}
+
+static void
+lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+{
+ /* No one should have come by and lowered it already */
+ assert(tex->texture);
+
+ unsigned set = tex->texture->var->data.descriptor_set;
+ unsigned binding = tex->texture->var->data.binding;
+ tex->texture_index = state->set[set].surface_offsets[binding];
+ lower_tex_deref(tex, tex->texture, &tex->texture_index,
+ nir_tex_src_texture_offset, state);
+
+ if (tex->sampler) {
+ unsigned set = tex->sampler->var->data.descriptor_set;
+ unsigned binding = tex->sampler->var->data.binding;
+ tex->sampler_index = state->set[set].sampler_offsets[binding];
+ lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
+ nir_tex_src_sampler_offset, state);
+ }
+
+ /* The backend only ever uses this to mark used surfaces. We don't care
+ * about that little optimization so it just needs to be non-zero.
+ */
+ tex->texture_array_size = 1;
+
+ cleanup_tex_deref(tex, tex->texture);
+ if (tex->sampler)
+ cleanup_tex_deref(tex, tex->sampler);
+ tex->texture = NULL;
+ tex->sampler = NULL;
+}
+
+static bool
+apply_pipeline_layout_block(nir_block *block, void *void_state)
+{
+ struct apply_pipeline_layout_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
+ lower_res_index_intrinsic(intrin, state);
+ }
+ break;
+ }
+ case nir_instr_type_tex:
+ lower_tex(nir_instr_as_tex(instr), state);
+ break;
+ default:
+ continue;
+ }
+ }
+
+ return true;
+}
+
+static void
+setup_vec4_uniform_value(const union gl_constant_value **params,
+ const union gl_constant_value *values,
+ unsigned n)
+{
+ static const gl_constant_value zero = { 0 };
+
+ for (unsigned i = 0; i < n; ++i)
+ params[i] = &values[i];
+
+ for (unsigned i = n; i < 4; ++i)
+ params[i] = &zero;
+}
+
+void
+anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
+ nir_shader *shader,
+ struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline_bind_map *map)
+{
+ struct anv_pipeline_layout *layout = pipeline->layout;
+
+ struct apply_pipeline_layout_state state = {
+ .shader = shader,
+ };
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ for (unsigned s = 0; s < layout->num_sets; s++) {
+ const unsigned count = layout->set[s].layout->binding_count;
+ const unsigned words = BITSET_WORDS(count);
+ state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
+ state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
+ }
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_foreach_block(function->impl, get_used_bindings_block, &state);
+ }
+
+ for (uint32_t set = 0; set < layout->num_sets; set++) {
+ struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+
+ BITSET_WORD b, _tmp;
+ BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
+ set_layout->binding_count) {
+ if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
+ map->surface_count += set_layout->binding[b].array_size;
+ if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
+ map->sampler_count += set_layout->binding[b].array_size;
+ if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
+ map->image_count += set_layout->binding[b].array_size;
+ }
+ }
+
+ unsigned surface = 0;
+ unsigned sampler = 0;
+ unsigned image = 0;
+ for (uint32_t set = 0; set < layout->num_sets; set++) {
+ struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
+
+ BITSET_WORD b, _tmp;
+ BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
+ set_layout->binding_count) {
+ unsigned array_size = set_layout->binding[b].array_size;
+ unsigned set_offset = set_layout->binding[b].descriptor_index;
+
+ if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
+ state.set[set].surface_offsets[b] = surface;
+ for (unsigned i = 0; i < array_size; i++) {
+ map->surface_to_descriptor[surface + i].set = set;
+ map->surface_to_descriptor[surface + i].offset = set_offset + i;
+ }
+ surface += array_size;
+ }
+
+ if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
+ state.set[set].sampler_offsets[b] = sampler;
+ for (unsigned i = 0; i < array_size; i++) {
+ map->sampler_to_descriptor[sampler + i].set = set;
+ map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
+ }
+ sampler += array_size;
+ }
+
+ if (set_layout->binding[b].stage[shader->stage].image_index >= 0) {
+ state.set[set].image_offsets[b] = image;
+ image += array_size;
+ }
+ }
+ }
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_builder_init(&state.builder, function->impl);
+ nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ if (map->image_count > 0) {
+ assert(map->image_count <= MAX_IMAGES);
+ nir_foreach_variable(var, &shader->uniforms) {
+ if (glsl_type_is_image(var->type) ||
+ (glsl_type_is_array(var->type) &&
+ glsl_type_is_image(glsl_get_array_element(var->type)))) {
+ /* Images are represented as uniform push constants and the actual
+ * information required for reading/writing to/from the image is
+ * storred in the uniform.
+ */
+ unsigned set = var->data.descriptor_set;
+ unsigned binding = var->data.binding;
+ unsigned image_index = state.set[set].image_offsets[binding];
+
+ var->data.driver_location = shader->num_uniforms +
+ image_index * BRW_IMAGE_PARAM_SIZE * 4;
+ }
+ }
+
+ struct anv_push_constants *null_data = NULL;
+ const gl_constant_value **param =
+ prog_data->param + (shader->num_uniforms / 4);
+ const struct brw_image_param *image_param = null_data->images;
+ for (uint32_t i = 0; i < map->image_count; i++) {
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ (const union gl_constant_value *)&image_param->surface_idx, 1);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ (const union gl_constant_value *)image_param->offset, 2);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ (const union gl_constant_value *)image_param->size, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ (const union gl_constant_value *)image_param->stride, 4);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ (const union gl_constant_value *)image_param->tiling, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ (const union gl_constant_value *)image_param->swizzling, 2);
+
+ param += BRW_IMAGE_PARAM_SIZE;
+ image_param ++;
+ }
+
+ shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+ }
+
+ ralloc_free(mem_ctx);
+}
nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
break;
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+ case nir_intrinsic_load_shared: {
+ assert(devinfo->gen >= 7);
+
+ fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+ /* Get the offset to read from */
+ fs_reg offset_reg;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
++ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
+ /* Read the vector */
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */,
+ instr->num_components,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ for (int i = 0; i < instr->num_components; i++)
+ bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
+
+ break;
+ }
+
+ case nir_intrinsic_store_shared: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+ /* Value */
+ fs_reg val_reg = get_nir_src(instr->src[0]);
+
+ /* Writemask */
+ unsigned writemask = instr->const_index[1];
+
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
+
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
++ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
+ 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0] + 4 * first_component));
+ }
+
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
+ }
+
+ break;
+ }
+
default:
nir_emit_intrinsic(bld, instr);
break;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 4 */
- assert(const_offset->u[0] % 4 == 0);
- src.reg_offset = const_offset->u[0] / 4;
+ assert(const_offset->u32[0] % 4 == 0);
+ src.reg_offset = const_offset->u32[0] / 4;
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ }
} else {
- src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
- }
+ fs_reg indirect = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
- for (unsigned j = 0; j < instr->num_components; j++) {
- bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ /* We need to pass a size to the MOV_INDIRECT but we don't want it to
+ * go past the end of the uniform. In order to keep the n'th
+ * component from running past, we subtract off the size of all but
+ * one component of the vector.
+ */
+ assert(instr->const_index[1] >= instr->num_components * 4);
+ unsigned read_size = instr->const_index[1] -
+ (instr->num_components - 1) * 4;
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+ offset(dest, bld, j), offset(src, bld, j),
+ indirect, brw_imm_ud(read_size));
+ }
}
break;
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 16 */
- assert(const_offset->u[0] % 16 == 0);
- src.reg_offset = const_offset->u[0] / 16;
+ assert(const_offset->u32[0] % 16 == 0);
+ src.reg_offset = const_offset->u32[0] / 16;
+
+ emit(MOV(dest, src));
} else {
- src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
- src.reladdr = new(mem_ctx) src_reg(tmp);
- }
+ src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
- emit(MOV(dest, src));
+ emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
+ indirect, brw_imm_ud(instr->const_index[1]));
+ }
break;
}