OSDN Git Service

freedreno: gmem bypass
authorRob Clark <robclark@freedesktop.org>
Mon, 27 May 2013 00:36:35 +0000 (20:36 -0400)
committerRob Clark <robclark@freedesktop.org>
Sat, 8 Jun 2013 17:15:51 +0000 (13:15 -0400)
The GPU (at least a3xx, but I think also a2xx) can render directly to
memory, bypassing tiling.  Although it can't do this if blend, depth,
and a few other features of the pipeline are enabled.  This direct
memory mode can be faster for some sorts of operations, such as simple
blits.  In particular, this significantly speeds up XA by avoiding to
pull the entire dest pixmap into GMEM, render tiles, and write it all
back out again.  This should also speed up resource copy-region and
blit.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/a3xx/fd3_zsa.c
src/gallium/drivers/freedreno/freedreno_context.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.c
src/gallium/drivers/freedreno/freedreno_gmem.c
src/gallium/drivers/freedreno/freedreno_state.h

index 16ec959..1cb170a 100644 (file)
@@ -47,8 +47,15 @@ static void
 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
                struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
 {
+       enum a3xx_tile_mode tile_mode;
        unsigned i;
 
+       if (bin_w) {
+               tile_mode = TILE_32X32;
+       } else {
+               tile_mode = LINEAR;
+       }
+
        for (i = 0; i < 4; i++) {
                enum a3xx_color_fmt format = 0;
                enum a3xx_color_swap swap = WZYX;
@@ -58,23 +65,32 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
 
                if (i < nr_bufs) {
                        struct pipe_surface *psurf = bufs[i];
-                       struct fd_resource *res = fd_resource(psurf->texture);
 
+                       res = fd_resource(psurf->texture);
                        format = fd3_pipe2color(psurf->format);
                        swap = fd3_pipe2swap(psurf->format);
-                       stride = bin_w * res->cpp;
 
-                       if (bases) {
-                               base = bases[i] * res->cpp;
+                       if (bin_w) {
+                               stride = bin_w * res->cpp;
+
+                               if (bases) {
+                                       base = bases[i] * res->cpp;
+                               }
+                       } else {
+                               stride = res->pitch * res->cpp;
                        }
                }
 
                OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
                OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                               A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
                                A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
-                               A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE_32X32) |
                                A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
-               OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+               if (bin_w || (i >= nr_bufs)) {
+                       OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+               } else {
+                       OUT_RELOCS(ring, res->bo, 0, 0, -1);
+               }
 
                OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
                OUT_RING(ring, A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(format));
@@ -381,6 +397,42 @@ update_vsc_pipe(struct fd_context *ctx)
        }
 }
 
+/* for rendering directly to system memory: */
+static void
+fd3_emit_sysmem_prep(struct fd_context *ctx)
+{
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture);
+       struct fd_ringbuffer *ring = ctx->ring;
+
+       fd3_emit_restore(ctx);
+
+       OUT_PKT0(ring, REG_A3XX_RB_WINDOW_SIZE, 1);
+       OUT_RING(ring, A3XX_RB_WINDOW_SIZE_WIDTH(pfb->width) |
+                       A3XX_RB_WINDOW_SIZE_HEIGHT(pfb->height));
+
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+
+       fd3_emit_rbrc_tile_state(ring,
+                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch));
+
+       /* setup scissor/offset for current tile: */
+       OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+       OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(0) |
+                       A3XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+}
+
 /* before first tile */
 static void
 fd3_emit_tile_init(struct fd_context *ctx)
@@ -478,6 +530,7 @@ fd3_gmem_init(struct pipe_context *pctx)
 {
        struct fd_context *ctx = fd_context(pctx);
 
+       ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
        ctx->emit_tile_init = fd3_emit_tile_init;
        ctx->emit_tile_prep = fd3_emit_tile_prep;
        ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
index 857ab8f..7603465 100644 (file)
@@ -94,7 +94,5 @@ fd3_zsa_state_create(struct pipe_context *pctx,
                // TODO alpha_ref and alpha_test_enable??
        }
 
-       so->rb_render_control |= 0x2000;  /* ??? */
-
        return so;
 }
index 0f16568..44d525b 100644 (file)
@@ -83,6 +83,8 @@ fd_context_render(struct pipe_context *pctx)
 
        ctx->needs_flush = false;
        ctx->cleared = ctx->restore = ctx->resolve = 0;
+       ctx->gmem_reason = 0;
+       ctx->num_draws = 0;
 
        fd_resource(pfb->cbufs[0]->texture)->dirty = false;
        if (pfb->zsbuf)
index a6133c0..5475931 100644 (file)
@@ -117,6 +117,23 @@ struct fd_context {
 
        bool needs_flush;
 
+       /* To decide whether to render to system memory, keep track of the
+        * number of draws, and whether any of them require multisample,
+        * depth_test (or depth write), stencil_test, blending, and
+        * color_logic_Op (since those functions are disabled when by-
+        * passing GMEM.
+        */
+       enum {
+               FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01,
+               FD_GMEM_DEPTH_ENABLED        = 0x02,
+               FD_GMEM_STENCIL_ENABLED      = 0x04,
+
+               FD_GMEM_MSAA_ENABLED         = 0x08,
+               FD_GMEM_BLEND_ENABLED        = 0x10,
+               FD_GMEM_LOGICOP_ENABLED      = 0x20,
+       } gmem_reason;
+       unsigned num_draws;
+
        struct fd_ringbuffer *ring;
        struct fd_ringmarker *draw_start, *draw_end;
 
@@ -186,6 +203,9 @@ struct fd_context {
        void (*emit_tile_gmem2mem)(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
                        uint32_t bin_w, uint32_t bin_h);
 
+       /* optional, for GMEM bypass: */
+       void (*emit_sysmem_prep)(struct fd_context *ctx);
+
        /* draw: */
        void (*draw)(struct fd_context *pctx, const struct pipe_draw_info *info);
        void (*clear)(struct fd_context *ctx, unsigned buffers,
index 2b7c168..dbdf573 100644 (file)
@@ -114,7 +114,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 {
        struct fd_context *ctx = fd_context(pctx);
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-       unsigned buffers;
+       unsigned i, buffers = 0;
 
        /* if we supported transform feedback, we'd have to disable this: */
        if (((ctx->scissor.maxx - ctx->scissor.minx) *
@@ -124,19 +124,40 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
        ctx->needs_flush = true;
 
-       fd_resource(pfb->cbufs[0]->texture)->dirty = true;
+       /*
+        * Figure out the buffers/features we need:
+        */
 
-       /* figure out the buffers we need: */
-       buffers = FD_BUFFER_COLOR;
        if (fd_depth_enabled(ctx)) {
                buffers |= FD_BUFFER_DEPTH;
                fd_resource(pfb->zsbuf->texture)->dirty = true;
+               ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
        }
+
        if (fd_stencil_enabled(ctx)) {
                buffers |= FD_BUFFER_STENCIL;
                fd_resource(pfb->zsbuf->texture)->dirty = true;
+               ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
        }
 
+       if (fd_logicop_enabled(ctx))
+               ctx->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
+
+       for (i = 0; i < pfb->nr_cbufs; i++) {
+               struct pipe_resource *surf = pfb->cbufs[i]->texture;
+
+               fd_resource(surf)->dirty = true;
+               buffers |= FD_BUFFER_COLOR;
+
+               if (surf->nr_samples > 1)
+                       ctx->gmem_reason |= FD_GMEM_MSAA_ENABLED;
+
+               if (fd_blend_enabled(ctx, i))
+                       ctx->gmem_reason |= FD_GMEM_BLEND_ENABLED;
+       }
+
+       ctx->num_draws++;
+
        /* any buffers that haven't been cleared, we need to restore: */
        ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
        /* and any buffers used, need to be resolved: */
@@ -165,8 +186,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
        if (buffers & PIPE_CLEAR_COLOR)
                fd_resource(pfb->cbufs[0]->texture)->dirty = true;
 
-       if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
+       if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                fd_resource(pfb->zsbuf->texture)->dirty = true;
+               ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
+       }
 
        DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
                        util_format_name(pfb->cbufs[0]->format),
index 856e441..12633bd 100644 (file)
@@ -128,23 +128,11 @@ calculate_tiles(struct fd_context *ctx)
        gmem->height = height;
 }
 
-
-void
-fd_gmem_render_tiles(struct pipe_context *pctx)
+static void
+render_tiles(struct fd_context *ctx)
 {
-       struct fd_context *ctx = fd_context(pctx);
-       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
        struct fd_gmem_stateobj *gmem = &ctx->gmem;
-       uint32_t i, timestamp, yoff = 0;
-
-       calculate_tiles(ctx);
-
-       DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
-                       util_format_name(pfb->cbufs[0]->format),
-                       pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
-
-       /* mark the end of the clear/draw cmds before emitting per-tile cmds: */
-       fd_ringmarker_mark(ctx->draw_end);
+       uint32_t i, yoff = 0;
 
        yoff= gmem->miny;
 
@@ -184,6 +172,50 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 
                yoff += bh;
        }
+}
+
+static void
+render_sysmem(struct fd_context *ctx)
+{
+       ctx->emit_sysmem_prep(ctx);
+
+       /* emit IB to drawcmds: */
+       OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+}
+
+void
+fd_gmem_render_tiles(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       uint32_t timestamp = 0;
+       bool sysmem = false;
+
+       if (ctx->emit_sysmem_prep) {
+               if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) {
+                       DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
+                               ctx->cleared, ctx->gmem_reason, ctx->num_draws);
+               } else {
+                       sysmem = true;
+               }
+       }
+
+       /* mark the end of the clear/draw cmds before emitting per-tile cmds: */
+       fd_ringmarker_mark(ctx->draw_end);
+
+       if (sysmem) {
+               DBG("rendering sysmem (%s/%s)",
+                       util_format_name(pfb->cbufs[0]->format),
+                       pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+               render_sysmem(ctx);
+       } else {
+               struct fd_gmem_stateobj *gmem = &ctx->gmem;
+               DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
+                       util_format_name(pfb->cbufs[0]->format),
+                       pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+               calculate_tiles(ctx);
+               render_tiles(ctx);
+       }
 
        /* GPU executes starting from tile cmds, which IB back to draw cmds: */
        fd_ringmarker_flush(ctx->draw_end);
index c966bdc..859299b 100644 (file)
 
 static inline bool fd_depth_enabled(struct fd_context *ctx)
 {
-       return ctx->zsa->depth.enabled;
+       return ctx->zsa && ctx->zsa->depth.enabled;
 }
 
 static inline bool fd_stencil_enabled(struct fd_context *ctx)
 {
-       return ctx->zsa->stencil[0].enabled;
+       return ctx->zsa && ctx->zsa->stencil[0].enabled;
+}
+
+static inline bool fd_logicop_enabled(struct fd_context *ctx)
+{
+       return ctx->blend && ctx->blend->logicop_enable;
+}
+
+static inline bool fd_blend_enabled(struct fd_context *ctx, unsigned n)
+{
+       return ctx->blend && ctx->blend->rt[n].blend_enable;
 }
 
 void fd_state_init(struct pipe_context *pctx);