OSDN Git Service

radeonsi: add shader code generation for tessellation
authorMarek Olšák <marek.olsak@amd.com>
Sun, 22 Feb 2015 14:09:35 +0000 (15:09 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 22 Jul 2015 22:59:32 +0000 (00:59 +0200)
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeon/radeon_llvm_emit.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 25580b6..973d6ed 100644 (file)
@@ -62,6 +62,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
 
        switch (type) {
        case TGSI_PROCESSOR_VERTEX:
+       case TGSI_PROCESSOR_TESS_CTRL:
+       case TGSI_PROCESSOR_TESS_EVAL:
                llvm_type = RADEON_LLVM_SHADER_VS;
                break;
        case TGSI_PROCESSOR_GEOMETRY:
index 9b03a53..cddd9a0 100644 (file)
@@ -71,12 +71,17 @@ struct si_shader_context
        int param_streamout_write_index;
        int param_streamout_offset[4];
        int param_vertex_id;
+       int param_rel_auto_id;
        int param_instance_id;
+       int param_tes_u;
+       int param_tes_v;
+       int param_tes_rel_patch_id;
+       int param_tes_patch_id;
        int param_es2gs_offset;
        LLVMTargetMachineRef tm;
        LLVMValueRef const_md;
        LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
-       LLVMValueRef ddxy_lds;
+       LLVMValueRef lds;
        LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
        LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
        LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
@@ -133,6 +138,14 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
                assert(index <= 63-4);
                return 4 + index;
 
+       /* patch indices are completely separate and thus start from 0 */
+       case TGSI_SEMANTIC_TESSOUTER:
+               return 0;
+       case TGSI_SEMANTIC_TESSINNER:
+               return 1;
+       case TGSI_SEMANTIC_PATCH:
+               return 2 + index;
+
        default:
                /* Don't fail here. The result of this function is only used
                 * for LS, TCS, TES, and GS, where legacy GL semantics can't
@@ -210,6 +223,136 @@ static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
        return value;
 }
 
+static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
+{
+       switch (si_shader_ctx->type) {
+       case TGSI_PROCESSOR_TESS_CTRL:
+               return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
+
+       case TGSI_PROCESSOR_TESS_EVAL:
+               return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                   si_shader_ctx->param_tes_rel_patch_id);
+
+       default:
+               assert(0);
+               return NULL;
+       }
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2            = get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0            = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0  = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2            = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+
+static LLVMValueRef
+get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+       if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
+               return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+       else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+               return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+       else {
+               assert(0);
+               return NULL;
+       }
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+       return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
+{
+       return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+                               unpack_param(si_shader_ctx,
+                                            SI_PARAM_TCS_OUT_OFFSETS,
+                                            0, 16),
+                               4);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+       return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+                               unpack_param(si_shader_ctx,
+                                            SI_PARAM_TCS_OUT_OFFSETS,
+                                            16, 16),
+                               4);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
+       LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+       return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
+       LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+       LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+       return LLVMBuildAdd(gallivm->builder, patch0_offset,
+                           LLVMBuildMul(gallivm->builder, patch_stride,
+                                        rel_patch_id, ""),
+                           "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       LLVMValueRef patch0_patch_data_offset =
+               get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
+       LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+       LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+       return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
+                           LLVMBuildMul(gallivm->builder, patch_stride,
+                                        rel_patch_id, ""),
+                           "");
+}
+
+static void build_indexed_store(struct si_shader_context *si_shader_ctx,
+                               LLVMValueRef base_ptr, LLVMValueRef index,
+                               LLVMValueRef value)
+{
+       struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef indices[2], pointer;
+
+       indices[0] = bld_base->uint_bld.zero;
+       indices[1] = index;
+
+       pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+       LLVMBuildStore(gallivm->builder, value, pointer);
+}
+
 /**
  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
  * It's equivalent to doing a load from &base_ptr[index].
@@ -337,6 +480,12 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
                return bld_base->uint_bld.zero;
 
        switch (si_shader_ctx->type) {
+       case TGSI_PROCESSOR_TESS_CTRL:
+               return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                   SI_PARAM_PATCH_ID);
+       case TGSI_PROCESSOR_TESS_EVAL:
+               return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                   si_shader_ctx->param_tes_patch_id);
        case TGSI_PROCESSOR_GEOMETRY:
                return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
                                    SI_PARAM_PRIMITIVE_ID);
@@ -346,6 +495,278 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
        }
 }
 
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+                                      const struct tgsi_ind_register *ind,
+                                      int rel_index)
+{
+       struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+       LLVMValueRef result;
+
+       result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+       result = LLVMBuildLoad(gallivm->builder, result, "");
+       result = LLVMBuildAdd(gallivm->builder, result,
+                             lp_build_const_int32(gallivm, rel_index), "");
+       return result;
+}
+
+/**
+ * Calculate a dword address given an input or output register and a stride.
+ */
+static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
+                                  const struct tgsi_full_dst_register *dst,
+                                  const struct tgsi_full_src_register *src,
+                                  LLVMValueRef vertex_dw_stride,
+                                  LLVMValueRef base_addr)
+{
+       struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+       struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
+       ubyte *name, *index, *array_first;
+       int first, param;
+       struct tgsi_full_dst_register reg;
+
+       /* Set the register description. The address computation is the same
+        * for sources and destinations. */
+       if (src) {
+               reg.Register.File = src->Register.File;
+               reg.Register.Index = src->Register.Index;
+               reg.Register.Indirect = src->Register.Indirect;
+               reg.Register.Dimension = src->Register.Dimension;
+               reg.Indirect = src->Indirect;
+               reg.Dimension = src->Dimension;
+               reg.DimIndirect = src->DimIndirect;
+       } else
+               reg = *dst;
+
+       /* If the register is 2-dimensional (e.g. an array of vertices
+        * in a primitive), calculate the base address of the vertex. */
+       if (reg.Register.Dimension) {
+               LLVMValueRef index;
+
+               if (reg.Dimension.Indirect)
+                       index = get_indirect_index(si_shader_ctx, &reg.DimIndirect,
+                                                  reg.Dimension.Index);
+               else
+                       index = lp_build_const_int32(gallivm, reg.Dimension.Index);
+
+               base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+                                        LLVMBuildMul(gallivm->builder, index,
+                                                     vertex_dw_stride, ""), "");
+       }
+
+       /* Get information about the register. */
+       if (reg.Register.File == TGSI_FILE_INPUT) {
+               name = info->input_semantic_name;
+               index = info->input_semantic_index;
+               array_first = info->input_array_first;
+       } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
+               name = info->output_semantic_name;
+               index = info->output_semantic_index;
+               array_first = info->output_array_first;
+       } else {
+               assert(0);
+               return NULL;
+       }
+
+       if (reg.Register.Indirect) {
+               /* Add the relative address of the element. */
+               LLVMValueRef ind_index;
+
+               if (reg.Indirect.ArrayID)
+                       first = array_first[reg.Indirect.ArrayID];
+               else
+                       first = reg.Register.Index;
+
+               ind_index = get_indirect_index(si_shader_ctx, &reg.Indirect,
+                                          reg.Register.Index - first);
+
+               base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+                                   LLVMBuildMul(gallivm->builder, ind_index,
+                                                lp_build_const_int32(gallivm, 4), ""), "");
+
+               param = si_shader_io_get_unique_index(name[first], index[first]);
+       } else {
+               param = si_shader_io_get_unique_index(name[reg.Register.Index],
+                                                     index[reg.Register.Index]);
+       }
+
+       /* Add the base address of the element. */
+       return LLVMBuildAdd(gallivm->builder, base_addr,
+                           lp_build_const_int32(gallivm, param * 4), "");
+}
+
+/**
+ * Load from LDS.
+ *
+ * \param type         output value type
+ * \param swizzle      offset (typically 0..3); it can be ~0, which loads a vec4
+ * \param dw_addr      address in dwords
+ */
+static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
+                            enum tgsi_opcode_type type, unsigned swizzle,
+                            LLVMValueRef dw_addr)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef value;
+
+       if (swizzle == ~0) {
+               LLVMValueRef values[TGSI_NUM_CHANNELS];
+
+               for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
+                       values[chan] = lds_load(bld_base, type, chan, dw_addr);
+
+               return lp_build_gather_values(bld_base->base.gallivm, values,
+                                             TGSI_NUM_CHANNELS);
+       }
+
+       dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+                           lp_build_const_int32(gallivm, swizzle));
+
+       value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+       return LLVMBuildBitCast(gallivm->builder, value,
+                               tgsi2llvmtype(bld_base, type), "");
+}
+
+/**
+ * Store to LDS.
+ *
+ * \param swizzle      offset (typically 0..3)
+ * \param dw_addr      address in dwords
+ * \param value                value to store
+ */
+static void lds_store(struct lp_build_tgsi_context * bld_base,
+                     unsigned swizzle, LLVMValueRef dw_addr,
+                     LLVMValueRef value)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+       dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+                           lp_build_const_int32(gallivm, swizzle));
+
+       value = LLVMBuildBitCast(gallivm->builder, value,
+                                LLVMInt32TypeInContext(gallivm->context), "");
+       build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
+                           dw_addr, value);
+}
+
+static LLVMValueRef fetch_input_tcs(
+       struct lp_build_tgsi_context *bld_base,
+       const struct tgsi_full_src_register *reg,
+       enum tgsi_opcode_type type, unsigned swizzle)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       LLVMValueRef dw_addr, stride;
+
+       stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+       dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
+       dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+
+       return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_output_tcs(
+               struct lp_build_tgsi_context *bld_base,
+               const struct tgsi_full_src_register *reg,
+               enum tgsi_opcode_type type, unsigned swizzle)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct si_shader *shader = si_shader_ctx->shader;
+       struct tgsi_shader_info *info = &shader->selector->info;
+       unsigned name = info->output_semantic_name[reg->Register.Index];
+       LLVMValueRef dw_addr, stride;
+
+       /* Just read the local temp "output" register to get TESSOUTER/INNER. */
+       if (!reg->Register.Indirect &&
+           (name == TGSI_SEMANTIC_TESSOUTER ||
+            name == TGSI_SEMANTIC_TESSINNER)) {
+               return radeon_llvm_emit_fetch(bld_base, reg, type, swizzle);
+       }
+
+       if (reg->Register.Dimension) {
+               stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+               dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+       } else {
+               dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+       }
+
+       return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_input_tes(
+       struct lp_build_tgsi_context *bld_base,
+       const struct tgsi_full_src_register *reg,
+       enum tgsi_opcode_type type, unsigned swizzle)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       LLVMValueRef dw_addr, stride;
+
+       if (reg->Register.Dimension) {
+               stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+               dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+       } else {
+               dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+       }
+
+       return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
+                            const struct tgsi_full_instruction * inst,
+                            const struct tgsi_opcode_info * info,
+                            LLVMValueRef dst[4])
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct si_shader *shader = si_shader_ctx->shader;
+       struct tgsi_shader_info *sinfo = &shader->selector->info;
+       const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+       unsigned chan_index;
+       LLVMValueRef dw_addr, stride;
+
+       /* Only handle per-patch and per-vertex outputs here.
+        * Vectors will be lowered to scalars and this function will be called again.
+        */
+       if (reg->Register.File != TGSI_FILE_OUTPUT ||
+           (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
+               radeon_llvm_emit_store(bld_base, inst, info, dst);
+               return;
+       }
+
+       /* Write tessellation levels to "output" temp registers.
+        * Also write them to LDS as per-patch outputs (below).
+        */
+       if (!reg->Register.Indirect &&
+           (sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSINNER ||
+             sinfo->output_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_TESSOUTER))
+               radeon_llvm_emit_store(bld_base, inst, info, dst);
+
+       if (reg->Register.Dimension) {
+               stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+               dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
+       } else {
+               dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+               dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
+       }
+
+       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+               LLVMValueRef value = dst[chan_index];
+
+               if (inst->Instruction.Saturate)
+                       value = radeon_llvm_saturate(bld_base, value);
+
+               lds_store(bld_base, chan_index, dw_addr, value);
+       }
+}
+
 static LLVMValueRef fetch_input_gs(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
@@ -398,7 +819,7 @@ static LLVMValueRef fetch_input_gs(
        args[1] = vtx_offset;
        args[2] = lp_build_const_int32(gallivm,
                                       (get_param_index(semantic_name, semantic_index,
-                                                       shader->selector->gs_used_inputs) * 4 +
+                                                       shader->selector->inputs_read) * 4 +
                                        swizzle) * 256);
        args[3] = uint->zero;
        args[4] = uint->one;  /* OFFEN */
@@ -616,6 +1037,7 @@ static void declare_system_value(
 {
        struct si_shader_context *si_shader_ctx =
                si_shader_context(&radeon_bld->soa.bld_base);
+       struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
        struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
        struct gallivm_state *gallivm = &radeon_bld->gallivm;
        LLVMValueRef value = 0;
@@ -645,8 +1067,13 @@ static void declare_system_value(
                break;
 
        case TGSI_SEMANTIC_INVOCATIONID:
-               value = LLVMGetParam(radeon_bld->main_fn,
-                                    SI_PARAM_GS_INSTANCE_ID);
+               if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+                       value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+               else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
+                       value = LLVMGetParam(radeon_bld->main_fn,
+                                            SI_PARAM_GS_INSTANCE_ID);
+               else
+                       assert(!"INVOCATIONID not implemented");
                break;
 
        case TGSI_SEMANTIC_SAMPLEID:
@@ -683,6 +1110,48 @@ static void declare_system_value(
                        value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
                break;
 
+       case TGSI_SEMANTIC_TESSCOORD:
+       {
+               LLVMValueRef coord[4] = {
+                       LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
+                       LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
+                       bld->zero,
+                       bld->zero
+               };
+
+               /* For triangles, the vector should be (u, v, 1-u-v). */
+               if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+                   PIPE_PRIM_TRIANGLES)
+                       coord[2] = lp_build_sub(bld, bld->one,
+                                               lp_build_add(bld, coord[0], coord[1]));
+
+               value = lp_build_gather_values(gallivm, coord, 4);
+               break;
+       }
+
+       case TGSI_SEMANTIC_VERTICESIN:
+               value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+               break;
+
+       case TGSI_SEMANTIC_TESSINNER:
+       case TGSI_SEMANTIC_TESSOUTER:
+       {
+               LLVMValueRef dw_addr;
+               int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+
+               dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+               dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
+                                      lp_build_const_int32(gallivm, param * 4), "");
+
+               value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
+                                ~0, dw_addr);
+               break;
+       }
+
+       case TGSI_SEMANTIC_PRIMID:
+               value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
+               break;
+
        default:
                assert(!"unknown system value");
                return;
@@ -1338,6 +1807,134 @@ handle_semantic:
        }
 }
 
+static void si_write_tess_factors(struct si_shader_context *si_shader_ctx,
+                                 unsigned name, LLVMValueRef *out_ptr)
+{
+       struct si_shader *shader = si_shader_ctx->shader;
+       struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
+       LLVMValueRef output, out[4];
+       unsigned stride, outer_comps, inner_comps, i;
+
+       if (name != TGSI_SEMANTIC_TESSOUTER &&
+           name != TGSI_SEMANTIC_TESSINNER) {
+               assert(0);
+               return;
+       }
+
+       switch (shader->key.tcs.prim_mode) {
+       case PIPE_PRIM_LINES:
+               stride = 2;
+               outer_comps = 2;
+               inner_comps = 0;
+               break;
+       case PIPE_PRIM_TRIANGLES:
+               stride = 4;
+               outer_comps = 3;
+               inner_comps = 1;
+               break;
+       case PIPE_PRIM_QUADS:
+               stride = 6;
+               outer_comps = 4;
+               inner_comps = 2;
+               break;
+       default:
+               assert(0);
+       }
+
+       /* Load the outputs as i32. */
+       for (i = 0; i < 4; i++)
+               out[i] = LLVMBuildBitCast(gallivm->builder,
+                               LLVMBuildLoad(gallivm->builder, out_ptr[i], ""),
+                               bld_base->uint_bld.elem_type, "");
+
+       /* Convert the outputs to vectors. */
+       if (name == TGSI_SEMANTIC_TESSOUTER)
+               output = lp_build_gather_values(gallivm, out,
+                                               util_next_power_of_two(outer_comps));
+       else if (inner_comps > 1)
+               output = lp_build_gather_values(gallivm, out, inner_comps);
+       else if (inner_comps == 1)
+               output = out[0];
+       else
+               return;
+
+       /* Get the buffer. */
+       rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                 SI_PARAM_RW_BUFFERS);
+       buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
+                       lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
+
+       /* Get offsets. */
+       tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                              SI_PARAM_TESS_FACTOR_OFFSET);
+       rel_patch_id = get_rel_patch_id(si_shader_ctx);
+       byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+                                 lp_build_const_int32(gallivm, 4 * stride), "");
+
+       /* Store the output. */
+       if (name == TGSI_SEMANTIC_TESSOUTER) {
+               build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
+                                          outer_comps, byteoffset, tf_base, 0);
+       } else if (inner_comps) {
+               build_tbuffer_store_dwords(si_shader_ctx, buffer, output,
+                                          inner_comps, byteoffset, tf_base,
+                                          outer_comps * 4);
+       }
+}
+
+static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct si_shader *shader = si_shader_ctx->shader;
+       struct tgsi_shader_info *info = &shader->selector->info;
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       unsigned i, chan;
+       LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                             si_shader_ctx->param_rel_auto_id);
+       LLVMValueRef vertex_dw_stride =
+               unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+       LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+                                                vertex_dw_stride, "");
+
+       /* Write outputs to LDS. The next shader (TCS aka HS) will read
+        * its inputs from it. */
+       for (i = 0; i < info->num_outputs; i++) {
+               LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+               unsigned name = info->output_semantic_name[i];
+               unsigned index = info->output_semantic_index[i];
+               int param = si_shader_io_get_unique_index(name, index);
+               LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+                                       lp_build_const_int32(gallivm, param * 4), "");
+
+               for (chan = 0; chan < 4; chan++) {
+                       lds_store(bld_base, chan, dw_addr,
+                                 LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+               }
+       }
+}
+
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct si_shader *shader = si_shader_ctx->shader;
+       struct tgsi_shader_info *info = &shader->selector->info;
+       unsigned i;
+
+       /* Only write tessellation factors. Other outputs have already been
+        * written to LDS by instructions. */
+       for (i = 0; i < info->num_outputs; i++) {
+               LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+               unsigned name = info->output_semantic_name[i];
+
+               if (name == TGSI_SEMANTIC_TESSINNER ||
+                   name == TGSI_SEMANTIC_TESSOUTER) {
+                       si_write_tess_factors(si_shader_ctx, name, out_ptr);
+               }
+       }
+}
+
 static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
 {
        struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
@@ -1347,6 +1944,9 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
        LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
        LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
                                            si_shader_ctx->param_es2gs_offset);
+       uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
+                                          es->key.tes.es_enabled_outputs :
+                                          es->key.vs.es_enabled_outputs;
        unsigned chan;
        int i;
 
@@ -1361,7 +1961,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
 
                param_index = get_param_index(info->output_semantic_name[i],
                                              info->output_semantic_index[i],
-                                             es->key.vs.gs_used_inputs);
+                                             enabled_outputs);
                if (param_index < 0)
                        continue;
 
@@ -2201,19 +2801,19 @@ static void si_llvm_emit_ddxy(
        indices[0] = bld_base->uint_bld.zero;
        indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
                                     NULL, 0, LLVMReadNoneAttribute);
-       store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+       store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
        indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
                                  lp_build_const_int32(gallivm, 0xfffffffc), "");
-       load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+       load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
        indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
                                  lp_build_const_int32(gallivm,
                                                       opcode == TGSI_OPCODE_DDX ? 1 : 2),
                                  "");
-       load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+       load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
        for (c = 0; c < 4; ++c) {
@@ -2432,6 +3032,9 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
                if (shader->key.vs.as_es) {
                        params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+               } else if (shader->key.vs.as_ls) {
+                       params[SI_PARAM_LS_OUT_LAYOUT] = i32;
+                       num_params = SI_PARAM_LS_OUT_LAYOUT+1;
                } else {
                        if (shader->is_gs_copy_shader) {
                                last_array_pointer = SI_PARAM_CONST;
@@ -2447,11 +3050,44 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
                /* VGPRs */
                params[si_shader_ctx->param_vertex_id = num_params++] = i32;
-               params[num_params++] = i32; /* unused*/
+               params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
                params[num_params++] = i32; /* unused */
                params[si_shader_ctx->param_instance_id = num_params++] = i32;
                break;
 
+       case TGSI_PROCESSOR_TESS_CTRL:
+               params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+               params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+               params[SI_PARAM_TCS_IN_LAYOUT] = i32;
+               params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
+               last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+
+               /* VGPRs */
+               params[SI_PARAM_PATCH_ID] = i32;
+               params[SI_PARAM_REL_IDS] = i32;
+               num_params = SI_PARAM_REL_IDS+1;
+               break;
+
+       case TGSI_PROCESSOR_TESS_EVAL:
+               params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+               params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+               num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
+
+               if (shader->key.tes.as_es) {
+                       params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+               } else {
+                       declare_streamout_params(si_shader_ctx, &shader->selector->so,
+                                                params, i32, &num_params);
+               }
+               last_sgpr = num_params - 1;
+
+               /* VGPRs */
+               params[si_shader_ctx->param_tes_u = num_params++] = f32;
+               params[si_shader_ctx->param_tes_v = num_params++] = f32;
+               params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
+               params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
+               break;
+
        case TGSI_PROCESSOR_GEOMETRY:
                params[SI_PARAM_GS2VS_OFFSET] = i32;
                params[SI_PARAM_GS_WAVE_ID] = i32;
@@ -2519,11 +3155,30 @@ static void create_function(struct si_shader_context *si_shader_ctx)
        if (bld_base->info &&
            (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
-               si_shader_ctx->ddxy_lds =
+               si_shader_ctx->lds =
                        LLVMAddGlobalInAddressSpace(gallivm->module,
                                                    LLVMArrayType(i32, 64),
                                                    "ddxy_lds",
                                                    LOCAL_ADDR_SPACE);
+
+       if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
+           si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
+           si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
+               /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+               unsigned vertex_data_dw_size = 32*32*4;
+               unsigned patch_data_dw_size = 32*4;
+               /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+               unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+               unsigned lds_dwords = patch_dw_size;
+
+               /* The actual size is computed outside of the shader to reduce
+                * the number of shader variants. */
+               si_shader_ctx->lds =
+                       LLVMAddGlobalInAddressSpace(gallivm->module,
+                                                   LLVMArrayType(i32, lds_dwords),
+                                                   "tess_lds",
+                                                   LOCAL_ADDR_SPACE);
+       }
 }
 
 static void preload_constants(struct si_shader_context *si_shader_ctx)
@@ -2600,9 +3255,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
        struct gallivm_state * gallivm = bld_base->base.gallivm;
        unsigned i;
 
-       if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
-           si_shader_ctx->shader->key.vs.as_es ||
-           !si_shader_ctx->shader->selector->so.num_outputs)
+       /* Streamout can only be used if the shader is compiled as VS. */
+       if (!si_shader_ctx->shader->selector->so.num_outputs ||
+           (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+            (si_shader_ctx->shader->key.vs.as_es ||
+             si_shader_ctx->shader->key.vs.as_ls)) ||
+           (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+            si_shader_ctx->shader->key.tes.as_es))
                return;
 
        LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -2633,6 +3292,8 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
 
        if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
             si_shader_ctx->shader->key.vs.as_es) ||
+           (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+            si_shader_ctx->shader->key.tes.as_es) ||
            si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
                LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
 
@@ -2893,9 +3554,21 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
                fprintf(stderr, "}\n");
 
                if (key->vs.as_es)
-                       fprintf(stderr, "  gs_used_inputs = 0x%"PRIx64"\n",
-                               key->vs.gs_used_inputs);
+                       fprintf(stderr, "  es_enabled_outputs = 0x%"PRIx64"\n",
+                               key->vs.es_enabled_outputs);
                fprintf(stderr, "  as_es = %u\n", key->vs.as_es);
+               fprintf(stderr, "  as_es = %u\n", key->vs.as_ls);
+               break;
+
+       case PIPE_SHADER_TESS_CTRL:
+               fprintf(stderr, "  prim_mode = %u\n", key->tcs.prim_mode);
+               break;
+
+       case PIPE_SHADER_TESS_EVAL:
+               if (key->tes.as_es)
+                       fprintf(stderr, "  es_enabled_outputs = 0x%"PRIx64"\n",
+                               key->tes.es_enabled_outputs);
+               fprintf(stderr, "  as_es = %u\n", key->tes.as_es);
                break;
 
        case PIPE_SHADER_GEOMETRY:
@@ -2995,11 +3668,25 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
        switch (si_shader_ctx.type) {
        case TGSI_PROCESSOR_VERTEX:
                si_shader_ctx.radeon_bld.load_input = declare_input_vs;
-               if (shader->key.vs.as_es) {
+               if (shader->key.vs.as_ls)
+                       bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+               else if (shader->key.vs.as_es)
                        bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
-               } else {
+               else
+                       bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+               break;
+       case TGSI_PROCESSOR_TESS_CTRL:
+               bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+               bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
+               bld_base->emit_store = store_output_tcs;
+               bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+               break;
+       case TGSI_PROCESSOR_TESS_EVAL:
+               bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+               if (shader->key.tes.as_es)
+                       bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
+               else
                        bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
-               }
                break;
        case TGSI_PROCESSOR_GEOMETRY:
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
index be3e98a..2cba96f 100644 (file)
  *      Christian König <christian.koenig@amd.com>
  */
 
+/* How linking tessellation shader inputs and outputs works.
+ *
+ * Inputs and outputs between shaders are stored in a buffer. This buffer
+ * lives in LDS (typical case for tessellation), but it can also live
+ * in memory. Each input or output has a fixed location within a vertex.
+ * The highest used input or output determines the stride between vertices.
+ *
+ * Since tessellation is only enabled in the OpenGL core profile,
+ * only these semantics are valid for per-vertex data:
+ *
+ *   Name             Location
+ *
+ *   POSITION         0
+ *   PSIZE            1
+ *   CLIPDIST0..1     2..3
+ *   CULLDIST0..1     (not implemented)
+ *   GENERIC0..31     4..35
+ *
+ * For example, a shader only writing GENERIC0 has the output stride of 5.
+ *
+ * Only these semantics are valid for per-patch data:
+ *
+ *   Name             Location
+ *
+ *   TESSOUTER        0
+ *   TESSINNER        1
+ *   PATCH0..29       2..31
+ *
+ * That's how independent shaders agree on input and output locations.
+ * The si_shader_io_get_unique_index function assigns the locations.
+ *
+ * Other required information for calculating the input and output addresses
+ * like the vertex stride, the patch stride, and the offsets where per-vertex
+ * and per-patch data start, is passed to the shader via user data SGPRs.
+ * The offsets and strides are calculated at draw time and aren't available
+ * at compile time.
+ *
+ * The same approach should be used for linking ES->GS in the future.
+ */
+
 #ifndef SI_SHADER_H
 #define SI_SHADER_H
 
@@ -43,9 +83,16 @@ struct radeon_shader_reloc;
 #define SI_SGPR_VERTEX_BUFFER  8  /* VS only */
 #define SI_SGPR_BASE_VERTEX    10 /* VS only */
 #define SI_SGPR_START_INSTANCE 11 /* VS only */
+#define SI_SGPR_LS_OUT_LAYOUT  12 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS        8  /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 9  /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT  10 /* TCS only */
 #define SI_SGPR_ALPHA_REF      8  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR    12
+#define SI_LS_NUM_USER_SGPR    13
+#define SI_TCS_NUM_USER_SGPR   11
+#define SI_TES_NUM_USER_SGPR   10
 #define SI_GS_NUM_USER_SGPR    8
 #define SI_GSCOPY_NUM_USER_SGPR        4
 #define SI_PS_NUM_USER_SGPR    9
@@ -62,6 +109,31 @@ struct radeon_shader_reloc;
 #define SI_PARAM_START_INSTANCE        6
 /* the other VS parameters are assigned dynamically */
 
+/* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ *   [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
+ */
+#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+
+/* Layout of TCS outputs / TES inputs:
+ *   [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
+ *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
+ *   [26:31] = gl_PatchVerticesIn, max = 32
+ */
+#define SI_PARAM_TCS_OUT_LAYOUT        5 /* for TCS & TES */
+
+/* Layout of LS outputs / TCS inputs
+ *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+ *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+ */
+#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+
+/* TCS only parameters. */
+#define SI_PARAM_TESS_FACTOR_OFFSET 7
+#define SI_PARAM_PATCH_ID      8
+#define SI_PARAM_REL_IDS       9
+
 /* GS only parameters */
 #define SI_PARAM_GS2VS_OFFSET  4
 #define SI_PARAM_GS_WAVE_ID    5
@@ -113,9 +185,24 @@ struct si_shader_selector {
        unsigned        gs_output_prim;
        unsigned        gs_max_out_vertices;
        unsigned        gs_num_invocations;
-       uint64_t        gs_used_inputs; /* mask of "get_unique_index" bits */
+
+       /* masks of "get_unique_index" bits */
+       uint64_t        inputs_read;
+       uint64_t        outputs_written;
+       uint32_t        patch_outputs_written;
 };
 
+/* Valid shader configurations:
+ *
+ * API shaders       VS | TCS | TES | GS |pass| PS
+ * are compiled as:     |     |     |    |thru|
+ *                      |     |     |    |    |
+ * Only VS & PS:     VS | --  | --  | -- | -- | PS
+ * With GS:          ES | --  | --  | GS | VS | PS
+ * With Tessel.:     LS | HS  | VS  | -- | -- | PS
+ * With both:        LS | HS  | ES  | GS | VS | PS
+ */
+
 union si_shader_key {
        struct {
                unsigned        export_16bpc:8;
@@ -128,11 +215,23 @@ union si_shader_key {
        } ps;
        struct {
                unsigned        instance_divisors[SI_NUM_VERTEX_BUFFERS];
-               /* The mask of "get_unique_index" bits, needed for ES,
-                * it describes how the ES->GS ring buffer is laid out. */
-               uint64_t        gs_used_inputs;
-               unsigned        as_es:1;
+               /* Mask of "get_unique_index" bits - which outputs are read
+                * by the next stage (needed by ES).
+                * This describes how outputs are laid out in memory. */
+               uint64_t        es_enabled_outputs;
+               unsigned        as_es:1; /* export shader */
+               unsigned        as_ls:1; /* local shader */
        } vs;
+       struct {
+               unsigned        prim_mode:3;
+       } tcs; /* tessellation control shader */
+       struct {
+               /* Mask of "get_unique_index" bits - which outputs are read
+                * by the next stage (needed by ES).
+                * This describes how outputs are laid out in memory. */
+               uint64_t        es_enabled_outputs;
+               unsigned        as_es:1; /* export shader */
+       } tes; /* tessellation evaluation shader */
 };
 
 struct si_shader {
index 6174dad..af001b3 100644 (file)
@@ -142,8 +142,9 @@ struct si_shader_data {
  * Ring buffers:        0..1
  * Streamout buffers:   2..5
  */
-#define SI_RING_ESGS           0
-#define SI_RING_GSVS           1
+#define SI_RING_TESS_FACTOR    0 /* for HS (TCS)  */
+#define SI_RING_ESGS           0 /* for ES, GS */
+#define SI_RING_GSVS           1 /* for GS, VS */
 #define SI_NUM_RING_BUFFERS    2
 #define SI_SO_BUF_OFFSET       SI_NUM_RING_BUFFERS
 #define SI_NUM_RW_BUFFERS      (SI_SO_BUF_OFFSET + 4)
index 3eec217..2a0ff10 100644 (file)
@@ -112,7 +112,7 @@ static void si_shader_gs(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
 
        si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                      util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
+                      util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
        si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
 
        si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -351,9 +351,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                key->vs.instance_divisors[i] =
                                        sctx->vertex_elements->elements[i].instance_divisor;
 
-               if (sctx->gs_shader) {
+               if (sctx->tes_shader)
+                       key->vs.as_ls = 1;
+               else if (sctx->gs_shader) {
                        key->vs.as_es = 1;
-                       key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
+                       key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
+               }
+               break;
+       case PIPE_SHADER_TESS_CTRL:
+               key->tcs.prim_mode =
+                       sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+               break;
+       case PIPE_SHADER_TESS_EVAL:
+               if (sctx->gs_shader) {
+                       key->tes.as_es = 1;
+                       key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
                }
                break;
        case PIPE_SHADER_GEOMETRY:
@@ -487,10 +499,31 @@ static void *si_create_shader_state(struct pipe_context *ctx,
                        case TGSI_SEMANTIC_PRIMID:
                                break;
                        default:
-                               sel->gs_used_inputs |=
+                               sel->inputs_read |=
                                        1llu << si_shader_io_get_unique_index(name, index);
                        }
                }
+               break;
+
+       case PIPE_SHADER_VERTEX:
+       case PIPE_SHADER_TESS_CTRL:
+               for (i = 0; i < sel->info.num_outputs; i++) {
+                       unsigned name = sel->info.output_semantic_name[i];
+                       unsigned index = sel->info.output_semantic_index[i];
+
+                       switch (name) {
+                       case TGSI_SEMANTIC_TESSINNER:
+                       case TGSI_SEMANTIC_TESSOUTER:
+                       case TGSI_SEMANTIC_PATCH:
+                               sel->patch_outputs_written |=
+                                       1llu << si_shader_io_get_unique_index(name, index);
+                               break;
+                       default:
+                               sel->outputs_written |=
+                                       1llu << si_shader_io_get_unique_index(name, index);
+                       }
+               }
+               break;
        }
 
        if (sscreen->b.debug_flags & DBG_PRECOMPILE)