From 63503f284863530d628f26bea27f2390aca518e5 Mon Sep 17 00:00:00 2001 From: Markus Amsler Date: Sat, 3 May 2008 12:55:45 -0400 Subject: [PATCH] r300: Set correct VAP_CNTL per vertex program. adapted from Markus' patch on bug 15386 with updates for non-TCL and R500. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 32 +++++++++- src/mesa/drivers/dri/r300/r300_state.c | 111 ++++++++++++++++++++++----------- 2 files changed, 105 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index d38ee9003d2..ffcde7ff354 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -187,6 +187,7 @@ static void r300EmitClearState(GLcontext * ctx) drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; int is_r500 = 0; + GLuint vap_cntl; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; @@ -206,11 +207,9 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, vir[0]); reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - /*e32(0x22030003);*/ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - /*e32(0x21030003);*/ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); @@ -426,6 +425,35 @@ static void r300EmitClearState(GLcontext * ctx) } if (has_tcl) { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + + R300_STATECHANGE(rmesa, vap_cntl); + reg_start(R300_VAP_CNTL, 0); + e32(vap_cntl); + + if (has_tcl) { R300_STATECHANGE(r300, pvs); reg_start(R300_VAP_PVS_CNTL_1, 2); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 21b5ac969b3..10002e3c4f8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1811,10 +1811,70 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, } } +#define MIN3(a,b,c) ((a)<(b) ? MIN2(a, c): MIN2(b, c)) + +static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) +{ + int vtx_mem_size; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 */ + reg_start(R300_VAP_PVS_WAITIDLE, 0); + e32(0x00000000); + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->hw.vap_cntl.cmd[1] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + rmesa->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + rmesa->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + +} + static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) { struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); GLuint o_reg = 0; + GLuint i_reg = 0; int i; int inst_count = 0; int param_count = 0; @@ -1827,6 +1887,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); program_end += 4; + i_reg++; } } @@ -1836,6 +1897,8 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, i_reg, o_reg, 0); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | @@ -1849,6 +1912,15 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } +static int bit_count (int x) +{ + x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); + x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); + x = (x >> 16) + (x & 0xffff); + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); + return (x >> 8) + (x & 0x00ff); +} + static void r300SetupRealVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -1870,6 +1942,9 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, bit_count(prog->key.InputsRead), + bit_count(prog->key.OutputsWritten), prog->num_temporaries); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | @@ -1903,13 +1978,6 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) r300SetupDefaultVertexProgram(rmesa); } - - /* FIXME: This is done for vertex shader fragments, but also needs to be - * done for vap_pvs, so I leave it as a reminder. */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif } /** @@ -2011,35 +2079,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - /* setup the VAP */ - /* for tcl, PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted - * dynamically. PVS_NUM_FPUS is fixed based on asic - */ - if (has_tcl) { - r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r300->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; - } else - r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT)); - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - r300->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) - r300->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) - r300->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - r300->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); - else - r300->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); - r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA -- 2.11.0