From fc6caef4cb67fb13642c5ebccee53019d1764df6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 14:40:51 +1000 Subject: [PATCH] r600g: evergreen interpolation support. On evergreen, interpolation has moved into the fragment shader, with the interpolation parmaters being passed via GPRs and LDS entries. This works out the number of interps required and reserves GPR/LDS storage for them, it also correctly routes face/position values which aren't interpolated from the vertex shader. Also if we noticed nothing is to be interpolated we always setup perspective interpolation for one value otherwise the GPU appears to lockup. This fixes about 15 piglit tests on evergreen. --- src/gallium/drivers/r600/evergreen_state.c | 74 ++++++++++++++++++----- src/gallium/drivers/r600/r600_shader.c | 95 ++++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_shader.h | 2 + 3 files changed, 149 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 542df11db66..935496c04af 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1529,23 +1529,39 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); @@ -1568,7 +1584,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -1580,19 +1597,49 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 2; } - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); spi_input_z |= 1; } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); r600_pipe_state_add_reg(rstate, @@ -1607,11 +1654,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0286E0_SPI_BARYC_CNTL, - S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1), - 0xFFFFFFFF, NULL); if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b53d4780719..94c9cbd9234 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -357,6 +357,11 @@ struct r600_shader_ctx { u32 *literals; u32 nliterals; u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; }; struct r600_shader_tgsi_instruction { @@ -404,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -418,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -474,7 +505,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -501,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -547,7 +630,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_INPUT] = 1; } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { - ctx.file_offset[TGSI_FILE_INPUT] = 1; + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index a341cca0836..f8bc5951395 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -32,6 +32,7 @@ struct r600_shader_io { int sid; unsigned interpolate; boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -40,6 +41,7 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; -- 2.11.0