OSDN Git Service

radeonsi: avoid redundant CB and DB register updates
authorMarek Olšák <marek.olsak@amd.com>
Sat, 29 Aug 2015 00:32:13 +0000 (02:32 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Sep 2015 19:51:14 +0000 (21:51 +0200)
The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.

Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index 22a0950..08b2f64 100644 (file)
@@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 
        if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
                evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
-                                             &buffers, color);
+                                             &buffers, NULL, color);
                if (!buffers)
                        return; /* all buffers have been fast cleared */
        }
index 29db1cc..d22c230 100644 (file)
@@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
-                                  unsigned *buffers,
+                                  unsigned *buffers, unsigned *dirty_cbufs,
                                   const union pipe_color_union *color);
 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
 void r600_init_context_texture_functions(struct r600_common_context *rctx);
index 5469691..89f18fb 100644 (file)
@@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
-                                  unsigned *buffers,
+                                  unsigned *buffers, unsigned *dirty_cbufs,
                                   const union pipe_color_union *color)
 {
        int i;
@@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   tex->cmask.offset, tex->cmask.size, 0, true);
 
                tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+               if (dirty_cbufs)
+                       *dirty_cbufs |= 1 << i;
                rctx->set_atom_dirty(rctx, fb_state, true);
                *buffers &= ~clear_bit;
        }
index c28b2a8..d1486bd 100644 (file)
@@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
 
        if (buffers & PIPE_CLEAR_COLOR) {
-               evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
-                                             &buffers, color);
+               evergreen_do_fast_color_clear(&sctx->b, fb,
+                                             &sctx->framebuffer.atom, &buffers,
+                                             &sctx->framebuffer.dirty_cbufs,
+                                             color);
                if (!buffers)
                        return; /* all buffers have been fast cleared */
        }
@@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                }
 
                zstex->depth_clear_value = depth;
+               sctx->framebuffer.dirty_zsbuf = true;
                si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
                sctx->db_depth_clear = true;
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
index 2381b6c..5613781 100644 (file)
@@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx)
        /* The CS initialization should be emitted before everything else. */
        si_pm4_emit(ctx, ctx->init_config);
 
-       si_mark_atom_dirty(ctx, &ctx->clip_regs);
+       ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+       ctx->framebuffer.dirty_zsbuf = true;
        si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+       si_mark_atom_dirty(ctx, &ctx->clip_regs);
        si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
        si_mark_atom_dirty(ctx, &ctx->msaa_config);
        si_mark_atom_dirty(ctx, &ctx->db_render_state);
index 47ad619..9be4aa7 100644 (file)
@@ -127,6 +127,8 @@ struct si_framebuffer {
        unsigned                        cb0_is_integer;
        unsigned                        compressed_cb_mask;
        unsigned                        export_16bpc;
+       unsigned                        dirty_cbufs;
+       bool                            dirty_zsbuf;
 };
 
 struct si_scissors {
index 88964e1..3c25048 100644 (file)
@@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                         SI_CONTEXT_INV_TC_L2 |
                         SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
 
+       /* Take the maximum of the old and new count. If the new count is lower,
+        * dirtying is needed to disable the unbound colorbuffers.
+        */
+       sctx->framebuffer.dirty_cbufs |=
+               (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+       sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
        sctx->framebuffer.export_16bpc = 0;
@@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
        /* Colorbuffers. */
        for (i = 0; i < nr_cbufs; i++) {
+               if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+                       continue;
+
                cb = (struct r600_surface*)state->cbufs[i];
                if (!cb) {
                        r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                        radeon_emit(cs, 0);     /* R_028C94_CB_COLOR0_DCC_BASE */
        }
        /* set CB_COLOR1_INFO for possible dual-src blending */
-       if (i == 1 && state->cbufs[0]) {
+       if (i == 1 && state->cbufs[0] &&
+           sctx->framebuffer.dirty_cbufs & (1 << 0)) {
                r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
                                       cb->cb_color_info | tex->cb_color_info);
                i++;
        }
-       for (; i < 8 ; i++) {
-               r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
-       }
+       for (; i < 8 ; i++)
+               if (sctx->framebuffer.dirty_cbufs & (1 << i))
+                       r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
        /* ZS buffer. */
-       if (state->zsbuf) {
+       if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
                struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
                struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
@@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
                r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                                       zb->pa_su_poly_offset_db_fmt_cntl);
-       } else {
+       } else if (sctx->framebuffer.dirty_zsbuf) {
                r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
                radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
                radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
@@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
        r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
                               S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+       sctx->framebuffer.dirty_cbufs = 0;
+       sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx,