OSDN Git Service

[intel] Clean up cliprect handling in intel drivers.
authorEric Anholt <eric@anholt.net>
Wed, 9 Jan 2008 20:33:39 +0000 (12:33 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 9 Jan 2008 22:41:40 +0000 (14:41 -0800)
In particular, batch buffers are no longer flushed when switching from
CLIPRECTS to NO_CLIPRECTS or vice versa, and 965 just uses DRM cliprect
handling for primitives instead of trying to sneak in its own to avoid the
DRM stuff. The disadvantage is that we will re-execute state updates per
cliprect, but the advantage is that we will be able to accumulate larger
batch buffers, which were proving to be a major overhead.

src/mesa/drivers/dri/i915/intel_tris.c
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/intel/intel_batchbuffer.c
src/mesa/drivers/dri/intel/intel_batchbuffer.h
src/mesa/drivers/dri/intel/intel_buffers.c

index 6ccb502..8d27e3c 100644 (file)
@@ -129,11 +129,11 @@ void
 intelWrapInlinePrimitive(struct intel_context *intel)
 {
    GLuint prim = intel->prim.primitive;
-   GLuint batchflags = intel->batch->flags;
+   GLuint cliprects_enable = intel->batch->cliprects_enable;
 
    intel_flush_inline_primitive(intel);
    intel_batchbuffer_flush(intel->batch);
-   intelStartInlinePrimitive(intel, prim, batchflags);  /* ??? */
+   intelStartInlinePrimitive(intel, prim, cliprects_enable);  /* ??? */
 }
 
 GLuint *
index 72ed7cd..9225748 100644 (file)
@@ -123,25 +123,6 @@ static GLuint trim(GLenum prim, GLuint length)
 }
 
 
-static void brw_emit_cliprect( struct brw_context *brw, 
-                              const drm_clip_rect_t *rect )
-{
-   struct brw_drawrect bdr;
-
-   bdr.header.opcode = CMD_DRAW_RECT;
-   bdr.header.length = sizeof(bdr)/4 - 2;
-   bdr.xmin = rect->x1;
-   bdr.xmax = rect->x2 - 1;
-   bdr.ymin = rect->y1;
-   bdr.ymax = rect->y2 - 1;
-   bdr.xorg = brw->intel.drawX;
-   bdr.yorg = brw->intel.drawY;
-
-   intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), 
-                          INTEL_BATCH_NO_CLIPRECTS);
-}
-
-
 static void brw_emit_prim( struct brw_context *brw, 
                           const struct _mesa_prim *prim )
 
@@ -165,8 +146,8 @@ static void brw_emit_prim( struct brw_context *brw,
    prim_packet.base_vert_location = 0;
 
    if (prim_packet.verts_per_instance) {
-      intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), 
-                             INTEL_BATCH_NO_CLIPRECTS);
+      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+                             sizeof(prim_packet), INTEL_BATCH_CLIPRECTS);
    }
 }
 
@@ -270,7 +251,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    struct intel_context *intel = intel_context(ctx);
    struct brw_context *brw = brw_context(ctx);
    GLboolean retval = GL_FALSE;
-   GLuint i, j;
+   GLuint i;
 
    if (ctx->NewState)
       _mesa_update_state( ctx );
@@ -320,31 +301,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
         goto out;
       }
 
-      /* For single cliprect, state is already emitted: 
-       */
-      if (brw->intel.numClipRects == 1) {
-        for (i = 0; i < nr_prims; i++) {
-           brw_emit_prim(brw, &prim[i]);   
-        }
-      }
-      else {
-        /* Otherwise, explicitly do the cliprects at this point:
-         */
-          GLuint nprims = 0;
-        for (j = 0; j < brw->intel.numClipRects; j++) {
-           brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);
-
-           /* Emit prims to batchbuffer: 
-            */
-           for (i = 0; i < nr_prims; i++) {
-              brw_emit_prim(brw, &prim[i]);   
-
-          if (++nprims == VBO_MAX_PRIM) {
-              intel_batchbuffer_flush(brw->intel.batch);
-              nprims = 0;
-          }
-           }
-        }
+      for (i = 0; i < nr_prims; i++) {
+        brw_emit_prim(brw, &prim[i]);
       }
 
       retval = GL_TRUE;
index cbd6d72..7ad9a1a 100644 (file)
@@ -87,6 +87,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
    batch->ptr = batch->map;
    batch->dirty_state = ~0;
    batch->id = batch->intel->batch_id++;
+   batch->cliprects_enable = INTEL_BATCH_NO_CLIPRECTS;
 }
 
 struct intel_batchbuffer *
@@ -124,8 +125,7 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
  */
 static void
 do_flush_locked(struct intel_batchbuffer *batch,
-               GLuint used,
-               GLboolean ignore_cliprects, GLboolean allow_unlock)
+               GLuint used, GLboolean allow_unlock)
 {
    struct intel_context *intel = batch->intel;
    void *start;
@@ -136,28 +136,33 @@ do_flush_locked(struct intel_batchbuffer *batch,
 
    batch->map = NULL;
    batch->ptr = NULL;
-   batch->flags = 0;
 
    /* Throw away non-effective packets.  Won't work once we have
     * hardware contexts which would preserve statechanges beyond a
     * single buffer.
     */
 
-   if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
+   if (!(intel->numClipRects == 0 &&
+        batch->cliprects_enable == INTEL_BATCH_CLIPRECTS)) {
       if (intel->ttm == GL_TRUE) {
         intel_exec_ioctl(batch->intel,
-                         used, ignore_cliprects, allow_unlock,
+                         used,
+                         batch->cliprects_enable == INTEL_BATCH_NO_CLIPRECTS,
+                         allow_unlock,
                          start, count, &batch->last_fence);
       } else {
         intel_batch_ioctl(batch->intel,
                           batch->buf->offset,
-                          used, ignore_cliprects, allow_unlock);
+                          used,
+                          batch->cliprects_enable == INTEL_BATCH_NO_CLIPRECTS,
+                          allow_unlock);
       }
    }
       
    dri_post_submit(batch->buf, &batch->last_fence);
 
-   if (intel->numClipRects == 0 && !ignore_cliprects) {
+   if (intel->numClipRects == 0 &&
+       batch->cliprects_enable == INTEL_BATCH_CLIPRECTS) {
       if (allow_unlock) {
         /* If we are not doing any actual user-visible rendering,
          * do a sched_yield to keep the app from pegging the cpu while
@@ -212,9 +217,8 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
    if (!was_locked)
       LOCK_HARDWARE(intel);
 
-   do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS),
-                  GL_FALSE);
-     
+   do_flush_locked(batch, used, GL_FALSE);
+
    if (!was_locked)
       UNLOCK_HARDWARE(intel);
 
@@ -258,10 +262,11 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes, GLuint flags)
+                       const void *data, GLuint bytes,
+                      enum cliprects_enable cliprects_enable)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
+   intel_batchbuffer_require_space(batch, bytes, cliprects_enable);
    __memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
 }
index 1aa86ae..5b6e0a1 100644 (file)
@@ -10,8 +10,10 @@ struct intel_context;
 #define BATCH_SZ 16384
 #define BATCH_RESERVED 16
 
-#define INTEL_BATCH_NO_CLIPRECTS 0x1
-#define INTEL_BATCH_CLIPRECTS    0x2
+enum cliprects_enable {
+   INTEL_BATCH_CLIPRECTS = 0,
+   INTEL_BATCH_NO_CLIPRECTS = 1
+};
 
 struct intel_batchbuffer
 {
@@ -19,11 +21,12 @@ struct intel_batchbuffer
 
    dri_bo *buf;
    dri_fence *last_fence;
-   GLuint flags;
 
    GLubyte *map;
    GLubyte *ptr;
 
+   enum cliprects_enable cliprects_enable;
+
    GLuint size;
 
    GLuint dirty_state;
@@ -48,7 +51,8 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
  * intel_buffer_dword() calls.
  */
 void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                            const void *data, GLuint bytes, GLuint flags);
+                            const void *data, GLuint bytes,
+                           enum cliprects_enable cliprects_enable);
 
 void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
                                      GLuint bytes);
@@ -80,29 +84,37 @@ intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
 
 static INLINE void
 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-                                GLuint sz, GLuint flags)
+                                GLuint sz,
+                               enum cliprects_enable cliprects_enable)
 {
    assert(sz < batch->size - 8);
-   if (intel_batchbuffer_space(batch) < sz ||
-       (batch->flags != 0 && flags != 0 && batch->flags != flags))
+   if (intel_batchbuffer_space(batch) < sz)
       intel_batchbuffer_flush(batch);
 
-   batch->flags |= flags;
+   /* Upgrade the buffer to being looped over per cliprect if this batch
+    * emit needs it.  The code used to emit a batch whenever the
+    * cliprects_enable was changed, but reducing the overhead of frequent
+    * batch flushing is more important than reducing state parsing,
+    * particularly as we move towards private backbuffers and number
+    * cliprects always being 1 except at swap.
+    */
+   if (cliprects_enable == INTEL_BATCH_CLIPRECTS)
+      batch->cliprects_enable = INTEL_BATCH_CLIPRECTS;
 }
 
 /* Here are the crusty old macros, to be removed:
  */
 #define BATCH_LOCALS
 
-#define BEGIN_BATCH(n, flags) do {                             \
-   intel_batchbuffer_require_space(intel->batch, (n)*4, flags);        \
+#define BEGIN_BATCH(n, cliprects_enable) do {                          \
+   intel_batchbuffer_require_space(intel->batch, (n)*4, cliprects_enable); \
 } while (0)
 
 #define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
 
-#define OUT_RELOC(buf, flags, delta) do {                              \
+#define OUT_RELOC(buf, cliprects_enable, delta) do {                   \
    assert((delta) >= 0);                                               \
-   intel_batchbuffer_emit_reloc(intel->batch, buf, flags, delta);      \
+   intel_batchbuffer_emit_reloc(intel->batch, buf, cliprects_enable, delta); \
 } while (0)
 
 #define ADVANCE_BATCH() do { } while(0)
index 2ea8d68..73872a9 100644 (file)
@@ -135,6 +135,9 @@ intel_readbuf_region(struct intel_context *intel)
 static void
 intelSetRenderbufferClipRects(struct intel_context *intel)
 {
+   /* flush batch since pClipRects may change */
+   intel_batchbuffer_flush(intel->batch);
+
    assert(intel->ctx.DrawBuffer->Width > 0);
    assert(intel->ctx.DrawBuffer->Height > 0);
    intel->fboRect.x1 = 0;
@@ -160,6 +163,9 @@ intelSetFrontClipRects(struct intel_context *intel)
    if (!dPriv)
       return;
 
+   /* flush batch since pClipRects may change */
+   intel_batchbuffer_flush(intel->batch);
+
    intel->numClipRects = dPriv->numClipRects;
    intel->pClipRects = dPriv->pClipRects;
    intel->drawX = dPriv->x;
@@ -179,6 +185,9 @@ intelSetBackClipRects(struct intel_context *intel)
    if (!dPriv)
       return;
 
+   /* flush batch since pClipRects may change */
+   intel_batchbuffer_flush(intel->batch);
+
    intel_fb = dPriv->driverPrivate;
 
    if (intel_fb->pf_active || dPriv->numBackClipRects == 0) {