winsys/radeon: add fine-grained fences for slab buffers

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Mon, 12 Sep 2016 09:46:12 +0000 (11:46 +0200)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Tue, 27 Sep 2016 14:45:34 +0000 (16:45 +0200)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 12 Sep 2016 09:46:12 +0000 (11:46 +0200)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 27 Sep 2016 14:45:34 +0000 (16:45 +0200)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c

index 3f58b00..f9cf2e0 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -53,7 +53,7 @@ struct radeon_bo_va_hole {
      uint64_t         size;
  };
  
-static bool radeon_bo_is_busy(struct radeon_bo *bo)
+static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
  {
      struct drm_radeon_gem_busy args = {0};
  
@@ -62,7 +62,31 @@ static bool radeon_bo_is_busy(struct radeon_bo *bo)
                                 &args, sizeof(args)) != 0;
  }
  
-static void radeon_bo_wait_idle(struct radeon_bo *bo)
+static bool radeon_bo_is_busy(struct radeon_bo *bo)
+{
+    unsigned num_idle;
+    bool busy = false;
+
+    if (bo->handle)
+        return radeon_real_bo_is_busy(bo);
+
+    pipe_mutex_lock(bo->rws->bo_fence_lock);
+    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
+        if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
+            busy = true;
+            break;
+        }
+        radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
+    }
+    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
+            (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
+    bo->u.slab.num_fences -= num_idle;
+    pipe_mutex_unlock(bo->rws->bo_fence_lock);
+
+    return busy;
+}
+
+static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
  {
      struct drm_radeon_gem_wait_idle args = {0};
  
@@ -71,6 +95,33 @@ static void radeon_bo_wait_idle(struct radeon_bo *bo)
                             &args, sizeof(args)) == -EBUSY);
  }
  
+static void radeon_bo_wait_idle(struct radeon_bo *bo)
+{
+    if (bo->handle) {
+        radeon_real_bo_wait_idle(bo);
+    } else {
+        pipe_mutex_lock(bo->rws->bo_fence_lock);
+        while (bo->u.slab.num_fences) {
+            struct radeon_bo *fence = NULL;
+            radeon_bo_reference(&fence, bo->u.slab.fences[0]);
+            pipe_mutex_unlock(bo->rws->bo_fence_lock);
+
+            /* Wait without holding the fence lock. */
+            radeon_real_bo_wait_idle(fence);
+
+            pipe_mutex_lock(bo->rws->bo_fence_lock);
+            if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
+                radeon_bo_reference(&bo->u.slab.fences[0], NULL);
+                memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
+                        (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
+                bo->u.slab.num_fences--;
+            }
+            radeon_bo_reference(&fence, NULL);
+        }
+        pipe_mutex_unlock(bo->rws->bo_fence_lock);
+    }
+}
+
  static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
                             enum radeon_bo_usage usage)
  {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h

index 8e35a38..8f767fd 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -50,6 +50,10 @@ struct radeon_bo {
          struct {
              struct pb_slab_entry entry;
              struct radeon_bo *real;
+
+            unsigned num_fences;
+            unsigned max_fences;
+            struct radeon_bo **fences;
          } slab;
      } u;
  
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c

index 9fbd378..79c09e2 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -471,6 +471,8 @@ void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
  
      for (i = 0; i < csc->num_relocs; i++)
          p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
+    for (i = 0; i < csc->num_slab_buffers; i++)
+        p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls);
  
      radeon_cs_context_cleanup(csc);
  }
@@ -487,11 +489,61 @@ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
          util_queue_job_wait(&cs->flush_completed);
  }
  
+/* Add the given fence to a slab buffer fence list.
+ *
+ * There is a potential race condition when bo participates in submissions on
+ * two or more threads simultaneously. Since we do not know which of the
+ * submissions will be sent to the GPU first, we have to keep the fences
+ * of all submissions.
+ *
+ * However, fences that belong to submissions that have already returned from
+ * their respective ioctl do not have to be kept, because we know that they
+ * will signal earlier.
+ */
+static void radeon_bo_slab_fence(struct radeon_bo *bo, struct radeon_bo *fence)
+{
+    unsigned dst;
+
+    assert(fence->num_cs_references);
+
+    /* Cleanup older fences */
+    dst = 0;
+    for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) {
+        if (bo->u.slab.fences[src]->num_cs_references) {
+            bo->u.slab.fences[dst] = bo->u.slab.fences[src];
+            dst++;
+        } else {
+            radeon_bo_reference(&bo->u.slab.fences[src], NULL);
+        }
+    }
+    bo->u.slab.num_fences = dst;
+
+    /* Check available space for the new fence */
+    if (bo->u.slab.num_fences >= bo->u.slab.max_fences) {
+        unsigned new_max_fences = bo->u.slab.max_fences + 1;
+        struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences,
+                                                bo->u.slab.max_fences * sizeof(*new_fences),
+                                                new_max_fences * sizeof(*new_fences));
+        if (!new_fences) {
+            fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n");
+            return;
+        }
+
+        bo->u.slab.fences = new_fences;
+        bo->u.slab.max_fences = new_max_fences;
+    }
+
+    /* Add the new fence */
+    bo->u.slab.fences[bo->u.slab.num_fences] = NULL;
+    radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence);
+    bo->u.slab.num_fences++;
+}
+
  DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
  
  static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
                                 unsigned flags,
-                               struct pipe_fence_handle **fence)
+                               struct pipe_fence_handle **pfence)
  {
      struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
      struct radeon_cs_context *tmp;
@@ -531,15 +583,31 @@ static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
         fprintf(stderr, "radeon: command stream overflowed\n");
      }
  
-    if (fence) {
-       if (cs->next_fence) {
-          radeon_fence_reference(fence, cs->next_fence);
-       } else {
-          radeon_fence_reference(fence, NULL);
-          *fence = radeon_cs_create_fence(rcs);
-       }
+    if (pfence || cs->csc->num_slab_buffers) {
+        struct pipe_fence_handle *fence;
+
+        if (cs->next_fence) {
+            fence = cs->next_fence;
+            cs->next_fence = NULL;
+        } else {
+            fence = radeon_cs_create_fence(rcs);
+        }
+
+        if (pfence)
+            radeon_fence_reference(pfence, fence);
+
+        pipe_mutex_lock(cs->ws->bo_fence_lock);
+        for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
+            struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
+            p_atomic_inc(&bo->num_active_ioctls);
+            radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
+        }
+        pipe_mutex_unlock(cs->ws->bo_fence_lock);
+
+        radeon_fence_reference(&fence, NULL);
+    } else {
+        radeon_fence_reference(&cs->next_fence, NULL);
      }
-    radeon_fence_reference(&cs->next_fence, NULL);
  
      radeon_drm_cs_sync_flush(rcs);
  
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index aa4bf5f..e02f286 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -556,6 +556,7 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
      util_hash_table_destroy(ws->bo_vas);
      pipe_mutex_destroy(ws->bo_handles_mutex);
      pipe_mutex_destroy(ws->bo_va_mutex);
+    pipe_mutex_destroy(ws->bo_fence_lock);
  
      if (ws->fd >= 0)
          close(ws->fd);
@@ -787,6 +788,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
      ws->bo_vas = util_hash_table_create(handle_hash, handle_compare);
      pipe_mutex_init(ws->bo_handles_mutex);
      pipe_mutex_init(ws->bo_va_mutex);
+    pipe_mutex_init(ws->bo_fence_lock);
      ws->va_offset = ws->va_start;
      list_inithead(&ws->va_holes);
  
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h

index 5514980..b30055c 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -91,6 +91,7 @@ struct radeon_drm_winsys {
      struct util_hash_table *bo_vas;
      pipe_mutex bo_handles_mutex;
      pipe_mutex bo_va_mutex;
+    pipe_mutex bo_fence_lock;
  
      uint64_t va_offset;
      struct list_head va_holes;
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Mon, 12 Sep 2016 09:46:12 +0000 (11:46 +0200)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Tue, 27 Sep 2016 14:45:34 +0000 (16:45 +0200)
src/gallium/winsys/radeon/drm/radeon_drm_bo.c		patch \| blob \| history
src/gallium/winsys/radeon/drm/radeon_drm_bo.h		patch \| blob \| history
src/gallium/winsys/radeon/drm/radeon_drm_cs.c		patch \| blob \| history
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c		patch \| blob \| history
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h		patch \| blob \| history