OSDN Git Service

r600/eg: workaround bug with tess shader and dynamic GPRs.
authorDave Airlie <airlied@redhat.com>
Mon, 30 Nov 2015 05:22:06 +0000 (15:22 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 6 Dec 2015 23:59:01 +0000 (09:59 +1000)
When using tessellation on eg/ni chipsets, we must disable
dynamic GPRs to workaround a hw bug where the GPU hangs
when too many things get queued.

This implements something like the r600 code to emit
the transition between static and dynamic GPRs, and to
statically allocate GPRs when tessellation is enabled.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state_common.c

index a3e198c..ef6de8c 100644 (file)
@@ -432,6 +432,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
         */
        r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
 
+       /* emit config state */
+       if (ctx->b.chip_class == EVERGREEN)
+               r600_emit_atom(ctx, &ctx->config_state.atom);
+
        ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
        r600_flush_emit(ctx);
 
@@ -791,7 +795,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
 
        /* Config Registers */
        if (ctx->b.chip_class < CAYMAN)
-               evergreen_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
+               evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family,
                                           ctx->screen->b.info.drm_minor);
        else
                cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
index 79cdd7c..229baab 100644 (file)
@@ -869,6 +869,33 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
                                                    tex->width0, tex->height0, 0);
 }
 
+static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+       struct r600_config_state *a = (struct r600_config_state*)atom;
+
+       radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
+       if (a->dyn_gpr_enabled) {
+               radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
+               radeon_emit(cs, 0);
+               radeon_emit(cs, 0);
+       } else {
+               radeon_emit(cs, a->sq_gpr_resource_mgmt_1);
+               radeon_emit(cs, a->sq_gpr_resource_mgmt_2);
+               radeon_emit(cs, a->sq_gpr_resource_mgmt_3);
+       }
+       radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8));
+       if (a->dyn_gpr_enabled) {
+               radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+                                      S_028838_PS_GPRS(0x1e) |
+                                      S_028838_VS_GPRS(0x1e) |
+                                      S_028838_GS_GPRS(0x1e) |
+                                      S_028838_ES_GPRS(0x1e) |
+                                      S_028838_HS_GPRS(0x1e) |
+                                      S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+       }
+}
+
 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
@@ -2553,10 +2580,10 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
 }
 
-void evergreen_init_common_regs(struct r600_command_buffer *cb,
-       enum chip_class ctx_chip_class,
-       enum radeon_family ctx_family,
-       int ctx_drm_minor)
+void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb,
+                               enum chip_class ctx_chip_class,
+                               enum radeon_family ctx_family,
+                               int ctx_drm_minor)
 {
        int ps_prio;
        int vs_prio;
@@ -2567,31 +2594,23 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
        int cs_prio;
        int ls_prio;
 
-       int num_ps_gprs;
-       int num_vs_gprs;
-       int num_gs_gprs;
-       int num_es_gprs;
-       int num_hs_gprs;
-       int num_ls_gprs;
-       int num_temp_gprs;
-
        unsigned tmp;
 
        ps_prio = 0;
        vs_prio = 1;
        gs_prio = 2;
        es_prio = 3;
-       hs_prio = 0;
-       ls_prio = 0;
+       hs_prio = 3;
+       ls_prio = 3;
        cs_prio = 0;
 
-       num_ps_gprs = 93;
-       num_vs_gprs = 46;
-       num_temp_gprs = 4;
-       num_gs_gprs = 31;
-       num_es_gprs = 31;
-       num_hs_gprs = 23;
-       num_ls_gprs = 23;
+       rctx->default_gprs[R600_HW_STAGE_PS] = 93;
+       rctx->default_gprs[R600_HW_STAGE_VS] = 46;
+       rctx->r6xx_num_clause_temp_gprs = 4;
+       rctx->default_gprs[R600_HW_STAGE_GS] = 31;
+       rctx->default_gprs[R600_HW_STAGE_ES] = 31;
+       rctx->default_gprs[EG_HW_STAGE_HS] = 23;
+       rctx->default_gprs[EG_HW_STAGE_LS] = 23;
 
        tmp = 0;
        switch (ctx_family) {
@@ -2614,40 +2633,12 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
        tmp |= S_008C00_GS_PRIO(gs_prio);
        tmp |= S_008C00_ES_PRIO(es_prio);
 
-       /* enable dynamic GPR resource management */
-       if (ctx_drm_minor >= 7) {
-               r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
-               r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-               /* always set temp clauses */
-               r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-               r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
-               r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
-               r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
-               r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
-               r600_store_context_reg(cb, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
-                                       S_028838_PS_GPRS(0x1e) |
-                                       S_028838_VS_GPRS(0x1e) |
-                                       S_028838_GS_GPRS(0x1e) |
-                                       S_028838_ES_GPRS(0x1e) |
-                                       S_028838_HS_GPRS(0x1e) |
-                                       S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
-       } else {
-               r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 4);
-               r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
-
-               tmp = S_008C04_NUM_PS_GPRS(num_ps_gprs);
-               tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-               tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-               r600_store_value(cb, tmp); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
-
-               tmp = S_008C08_NUM_GS_GPRS(num_gs_gprs);
-               tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-               r600_store_value(cb, tmp); /* R_008C08_SQ_GPR_RESOURCE_MGMT_2 */
+       r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1);
+       r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
 
-               tmp = S_008C0C_NUM_HS_GPRS(num_hs_gprs);
-               tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
-               r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */
-       }
+       r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
+       r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
+       r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
 
        /* The cs checker requires this register to be set. */
        r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
@@ -2694,7 +2685,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
        r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
        r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 
-       evergreen_init_common_regs(cb, rctx->b.chip_class,
+       evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
                                   rctx->b.family, rctx->screen->b.info.drm_minor);
 
        family = rctx->b.family;
@@ -3693,7 +3684,11 @@ void evergreen_init_state_functions(struct r600_context *rctx)
         * or piglit regression).
         * !!!
         */
-
+       if (rctx->b.chip_class == EVERGREEN) {
+               r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11);
+               if (rctx->screen->b.info.drm_minor >= 7)
+                       rctx->config_state.dyn_gpr_enabled = true;
+       }
        r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
        /* shader const */
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
@@ -3921,3 +3916,122 @@ void evergreen_set_lds_alloc(struct r600_context *rctx,
 {
        radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
 }
+
+/* on evergreen if you are running tessellation you need to disable dynamic
+   GPRs to workaround a hardware bug.*/
+bool evergreen_adjust_gprs(struct r600_context *rctx)
+{
+       unsigned num_gprs[EG_NUM_HW_STAGES];
+       unsigned def_gprs[EG_NUM_HW_STAGES];
+       unsigned cur_gprs[EG_NUM_HW_STAGES];
+       unsigned new_gprs[EG_NUM_HW_STAGES];
+       unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
+       unsigned max_gprs;
+       unsigned i;
+       unsigned total_gprs;
+       unsigned tmp[3];
+       bool rework = false, set_default = false, set_dirty = false;
+       max_gprs = 0;
+       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+               def_gprs[i] = rctx->default_gprs[i];
+               max_gprs += def_gprs[i];
+       }
+       max_gprs += def_num_clause_temp_gprs * 2;
+
+       /* if we have no TESS and dyn gpr is enabled then do nothing. */
+       if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) {
+               if (rctx->config_state.dyn_gpr_enabled)
+                       return true;
+
+               /* transition back to dyn gpr enabled state */
+               rctx->config_state.dyn_gpr_enabled = true;
+               r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+               rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+               return true;
+       }
+
+
+       /* gather required shader gprs */
+       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+               if (rctx->hw_shader_stages[i].shader)
+                       num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
+               else
+                       num_gprs[i] = 0;
+       }
+
+       cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+       cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
+       cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+       cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
+       cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+       cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
+
+       total_gprs = 0;
+       for (i = 0; i < EG_NUM_HW_STAGES; i++)  {
+               new_gprs[i] = num_gprs[i];
+               total_gprs += num_gprs[i];
+       }
+
+       if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
+               return false;
+
+       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+               if (new_gprs[i] > cur_gprs[i]) {
+                       rework = true;
+                       break;
+               }
+       }
+
+       if (rctx->config_state.dyn_gpr_enabled) {
+               set_dirty = true;
+               rctx->config_state.dyn_gpr_enabled = false;
+       }
+
+       if (rework) {
+               set_default = true;
+               for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+                       if (new_gprs[i] > def_gprs[i])
+                               set_default = false;
+               }
+
+               if (set_default) {
+                       for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+                               new_gprs[i] = def_gprs[i];
+                       }
+               } else {
+                       unsigned ps_value = max_gprs;
+
+                       ps_value -= (def_num_clause_temp_gprs * 2);
+                       for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
+                               ps_value -= new_gprs[i];
+
+                       new_gprs[R600_HW_STAGE_PS] = ps_value;
+               }
+
+               tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
+                       S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
+                       S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
+
+               tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
+                       S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
+
+               tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
+                       S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
+
+               if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
+                   rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
+                   rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
+                       rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
+                       rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
+                       rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
+                       set_dirty = true;
+               }
+       }
+
+
+       if (set_dirty) {
+               r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
+               rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+       }
+       return true;
+}
index b7845b5..90b99e8 100644 (file)
@@ -310,7 +310,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
        ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
        ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
        r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
-       if (ctx->b.chip_class < EVERGREEN) {
+       if (ctx->b.chip_class <= EVERGREEN) {
                r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
        }
        r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
index f1651df..795fb9a 100644 (file)
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 51
+#define R600_NUM_ATOMS 52
 
 #define R600_MAX_VIEWPORTS 16
 
@@ -206,6 +206,8 @@ struct r600_config_state {
        struct r600_atom atom;
        unsigned sq_gpr_resource_mgmt_1;
        unsigned sq_gpr_resource_mgmt_2;
+       unsigned sq_gpr_resource_mgmt_3;
+       bool dyn_gpr_enabled;
 };
 
 struct r600_stencil_ref
@@ -441,6 +443,7 @@ struct r600_context {
        boolean                         has_vertex_cache;
        boolean                         keep_tiling_flags;
        unsigned                        default_gprs[EG_NUM_HW_STAGES];
+       unsigned                        current_gprs[EG_NUM_HW_STAGES];
        unsigned                        r6xx_num_clause_temp_gprs;
 
        /* Miscellaneous state objects. */
@@ -608,7 +611,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                     const struct pipe_sampler_view *state,
                                     unsigned width0, unsigned height0,
                                     unsigned force_level);
-void evergreen_init_common_regs(struct r600_command_buffer *cb,
+void evergreen_init_common_regs(struct r600_context *ctx,
+                               struct r600_command_buffer *cb,
                                enum chip_class ctx_chip_class,
                                enum radeon_family ctx_family,
                                int ctx_drm_minor);
@@ -639,7 +643,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
 void evergreen_init_color_surface_rat(struct r600_context *rctx,
                                        struct r600_surface *surf);
 void evergreen_update_db_shader_control(struct r600_context * rctx);
-
+bool evergreen_adjust_gprs(struct r600_context *rctx);
 /* r600_blit.c */
 void r600_init_blit_functions(struct r600_context *rctx);
 void r600_decompress_depth_textures(struct r600_context *rctx,
index 1a18e69..6a66634 100644 (file)
@@ -1624,6 +1624,13 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                }
        }
 
+       if (rctx->b.chip_class == EVERGREEN) {
+               if (!evergreen_adjust_gprs(rctx)) {
+                       /* discard rendering */
+                       return false;
+               }
+       }
+
        blend_disable = (rctx->dual_src_blend &&
                        rctx->ps_shader->current->nr_ps_color_outputs < 2);