OSDN Git Service

drm/vc4: Expose dma-buf fences for V3D rendering.
authorEric Anholt <eric@anholt.net>
Wed, 12 Apr 2017 19:12:02 +0000 (12:12 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 13 Apr 2017 18:00:28 +0000 (11:00 -0700)
This is needed for proper synchronization with display on another DRM
device (pl111 or tinydrm) with buffers produced by vc4 V3D.  Fixes the
new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2
desktop tests on pl111+vc4.

This doesn't yet introduce waits on another device's fences before
vc4's rendering/display, because I don't have testcases for them.

v2: Reuse dma_fence_free(), retitle commit message to clarify that
    it's not a full dma-buf fencing implementation yet.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170412191202.22740-6-eric@anholt.net
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/vc4/Makefile
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vc4/vc4_drv.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_fence.c [new file with mode: 0644]
drivers/gpu/drm/vc4/vc4_gem.c
drivers/gpu/drm/vc4/vc4_irq.c

index 61f45d1..ab687fb 100644 (file)
@@ -9,6 +9,7 @@ vc4-y := \
        vc4_drv.o \
        vc4_dpi.o \
        vc4_dsi.o \
+       vc4_fence.o \
        vc4_kms.o \
        vc4_gem.o \
        vc4_hdmi.o \
index af29432..80b2f9e 100644 (file)
@@ -19,6 +19,8 @@
  * rendering can return quickly.
  */
 
+#include <linux/dma-buf.h>
+
 #include "vc4_drv.h"
 #include "uapi/drm/vc4_drm.h"
 
@@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
 
        vc4->bo_stats.num_allocated--;
        vc4->bo_stats.size_allocated -= obj->size;
+
+       if (bo->resv == &bo->_resv)
+               reservation_object_fini(bo->resv);
+
        drm_gem_cma_free_object(obj);
 }
 
@@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
                        return ERR_PTR(-ENOMEM);
                }
        }
+       bo = to_vc4_bo(&cma_obj->base);
 
-       return to_vc4_bo(&cma_obj->base);
+       bo->resv = &bo->_resv;
+       reservation_object_init(bo->resv);
+
+       return bo;
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
@@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data)
        schedule_work(&vc4->bo_cache.time_work);
 }
 
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
+{
+       struct vc4_bo *bo = to_vc4_bo(obj);
+
+       return bo->resv;
+}
+
 struct dma_buf *
 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
 {
@@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
        return drm_gem_cma_prime_vmap(obj);
 }
 
+struct drm_gem_object *
+vc4_prime_import_sg_table(struct drm_device *dev,
+                         struct dma_buf_attachment *attach,
+                         struct sg_table *sgt)
+{
+       struct drm_gem_object *obj;
+       struct vc4_bo *bo;
+
+       obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
+       if (IS_ERR(obj))
+               return obj;
+
+       bo = to_vc4_bo(obj);
+       bo->resv = attach->dmabuf->resv;
+
+       return obj;
+}
+
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_priv)
 {
index 61e674b..92fb9a4 100644 (file)
@@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = {
        .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
        .gem_prime_import = drm_gem_prime_import,
        .gem_prime_export = vc4_prime_export,
+       .gem_prime_res_obj = vc4_prime_res_obj,
        .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
-       .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+       .gem_prime_import_sg_table = vc4_prime_import_sg_table,
        .gem_prime_vmap = vc4_prime_vmap,
        .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
        .gem_prime_mmap = vc4_prime_mmap,
index dffce62..81d2bc0 100644 (file)
@@ -8,7 +8,9 @@
 
 #include "drmP.h"
 #include "drm_gem_cma_helper.h"
+#include "drm_gem_cma_helper.h"
 
+#include <linux/reservation.h>
 #include <drm/drm_encoder.h>
 
 struct vc4_dev {
@@ -56,6 +58,8 @@ struct vc4_dev {
        /* Protects bo_cache and the BO stats. */
        struct mutex bo_lock;
 
+       uint64_t dma_fence_context;
+
        /* Sequence number for the last job queued in bin_job_list.
         * Starts at 0 (no jobs emitted).
         */
@@ -150,6 +154,10 @@ struct vc4_bo {
         * DRM_IOCTL_VC4_CREATE_SHADER_BO.
         */
        struct vc4_validated_shader_info *validated_shader;
+
+       /* normally (resv == &_resv) except for imported bo's */
+       struct reservation_object *resv;
+       struct reservation_object _resv;
 };
 
 static inline struct vc4_bo *
@@ -158,6 +166,19 @@ to_vc4_bo(struct drm_gem_object *bo)
        return (struct vc4_bo *)bo;
 }
 
+struct vc4_fence {
+       struct dma_fence base;
+       struct drm_device *dev;
+       /* vc4 seqno for signaled() test */
+       uint64_t seqno;
+};
+
+static inline struct vc4_fence *
+to_vc4_fence(struct dma_fence *fence)
+{
+       return (struct vc4_fence *)fence;
+}
+
 struct vc4_seqno_cb {
        struct work_struct work;
        uint64_t seqno;
@@ -230,6 +251,8 @@ struct vc4_exec_info {
        /* Latest write_seqno of any BO that binning depends on. */
        uint64_t bin_dep_seqno;
 
+       struct dma_fence *fence;
+
        /* Last current addresses the hardware was processing when the
         * hangcheck timer checked on us.
         */
@@ -436,7 +459,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
                             struct drm_file *file_priv);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
+                                                struct dma_buf_attachment *attach,
+                                                struct sg_table *sgt);
 void *vc4_prime_vmap(struct drm_gem_object *obj);
 void vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
@@ -468,6 +495,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
 extern struct platform_driver vc4_dsi_driver;
 int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_fence.c */
+extern const struct dma_fence_ops vc4_fence_ops;
+
 /* vc4_gem.c */
 void vc4_gem_init(struct drm_device *dev);
 void vc4_gem_destroy(struct drm_device *dev);
diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c
new file mode 100644 (file)
index 0000000..dbf5a5a
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_drv.h"
+
+static const char *vc4_fence_get_driver_name(struct dma_fence *fence)
+{
+       return "vc4";
+}
+
+static const char *vc4_fence_get_timeline_name(struct dma_fence *fence)
+{
+       return "vc4-v3d";
+}
+
+static bool vc4_fence_enable_signaling(struct dma_fence *fence)
+{
+       return true;
+}
+
+static bool vc4_fence_signaled(struct dma_fence *fence)
+{
+       struct vc4_fence *f = to_vc4_fence(fence);
+       struct vc4_dev *vc4 = to_vc4_dev(f->dev);
+
+       return vc4->finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops vc4_fence_ops = {
+       .get_driver_name = vc4_fence_get_driver_name,
+       .get_timeline_name = vc4_fence_get_timeline_name,
+       .enable_signaling = vc4_fence_enable_signaling,
+       .signaled = vc4_fence_signaled,
+       .wait = dma_fence_default_wait,
+       .release = dma_fence_free,
+};
index e9c381c..a1a0104 100644 (file)
@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
        for (i = 0; i < exec->bo_count; i++) {
                bo = to_vc4_bo(&exec->bo[i]->base);
                bo->seqno = seqno;
+
+               reservation_object_add_shared_fence(bo->resv, exec->fence);
        }
 
        list_for_each_entry(bo, &exec->unref_list, unref_head) {
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
        for (i = 0; i < exec->rcl_write_bo_count; i++) {
                bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
                bo->write_seqno = seqno;
+
+               reservation_object_add_excl_fence(bo->resv, exec->fence);
+       }
+}
+
+static void
+vc4_unlock_bo_reservations(struct drm_device *dev,
+                          struct vc4_exec_info *exec,
+                          struct ww_acquire_ctx *acquire_ctx)
+{
+       int i;
+
+       for (i = 0; i < exec->bo_count; i++) {
+               struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+               ww_mutex_unlock(&bo->resv->lock);
        }
+
+       ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list).  They're entirely private
+ * to vc4, so we don't attach dma-buf fences to them.
+ */
+static int
+vc4_lock_bo_reservations(struct drm_device *dev,
+                        struct vc4_exec_info *exec,
+                        struct ww_acquire_ctx *acquire_ctx)
+{
+       int contended_lock = -1;
+       int i, ret;
+       struct vc4_bo *bo;
+
+       ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+       if (contended_lock != -1) {
+               bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+               ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+                                                      acquire_ctx);
+               if (ret) {
+                       ww_acquire_done(acquire_ctx);
+                       return ret;
+               }
+       }
+
+       for (i = 0; i < exec->bo_count; i++) {
+               if (i == contended_lock)
+                       continue;
+
+               bo = to_vc4_bo(&exec->bo[i]->base);
+
+               ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+               if (ret) {
+                       int j;
+
+                       for (j = 0; j < i; j++) {
+                               bo = to_vc4_bo(&exec->bo[j]->base);
+                               ww_mutex_unlock(&bo->resv->lock);
+                       }
+
+                       if (contended_lock != -1 && contended_lock >= i) {
+                               bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+
+                               ww_mutex_unlock(&bo->resv->lock);
+                       }
+
+                       if (ret == -EDEADLK) {
+                               contended_lock = i;
+                               goto retry;
+                       }
+
+                       ww_acquire_done(acquire_ctx);
+                       return ret;
+               }
+       }
+
+       ww_acquire_done(acquire_ctx);
+
+       /* Reserve space for our shared (read-only) fence references,
+        * before we commit the CL to the hardware.
+        */
+       for (i = 0; i < exec->bo_count; i++) {
+               bo = to_vc4_bo(&exec->bo[i]->base);
+
+               ret = reservation_object_reserve_shared(bo->resv);
+               if (ret) {
+                       vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+                       return ret;
+               }
+       }
+
+       return 0;
 }
 
 /* Queues a struct vc4_exec_info for execution.  If no job is
@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
  * then bump the end address.  That's a change for a later date,
  * though.
  */
-static void
-vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
+static int
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
+                struct ww_acquire_ctx *acquire_ctx)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        uint64_t seqno;
        unsigned long irqflags;
+       struct vc4_fence *fence;
+
+       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+       if (!fence)
+               return -ENOMEM;
+       fence->dev = dev;
 
        spin_lock_irqsave(&vc4->job_lock, irqflags);
 
        seqno = ++vc4->emit_seqno;
        exec->seqno = seqno;
+
+       dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
+                      vc4->dma_fence_context, exec->seqno);
+       fence->seqno = exec->seqno;
+       exec->fence = &fence->base;
+
        vc4_update_bo_seqnos(exec, seqno);
 
+       vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+
        list_add_tail(&exec->head, &vc4->bin_job_list);
 
        /* If no job was executing, kick ours off.  Otherwise, it'll
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
        }
 
        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+       return 0;
 }
 
 /**
@@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        unsigned i;
 
+       /* If we got force-completed because of GPU reset rather than
+        * through our IRQ handler, signal the fence now.
+        */
+       if (exec->fence)
+               dma_fence_signal(exec->fence);
+
        if (exec->bo) {
                for (i = 0; i < exec->bo_count; i++)
                        drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
@@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        struct drm_vc4_submit_cl *args = data;
        struct vc4_exec_info *exec;
+       struct ww_acquire_ctx acquire_ctx;
        int ret = 0;
 
        if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
@@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
        if (ret)
                goto fail;
 
+       ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
+       if (ret)
+               goto fail;
+
        /* Clear this out of the struct we'll be putting in the queue,
         * since it's part of our stack.
         */
        exec->args = NULL;
 
-       vc4_queue_submit(dev, exec);
+       ret = vc4_queue_submit(dev, exec, &acquire_ctx);
+       if (ret)
+               goto fail;
 
        /* Return the seqno for our job. */
        args->seqno = vc4->emit_seqno;
@@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+       vc4->dma_fence_context = dma_fence_context_alloc(1);
+
        INIT_LIST_HEAD(&vc4->bin_job_list);
        INIT_LIST_HEAD(&vc4->render_job_list);
        INIT_LIST_HEAD(&vc4->job_done_list);
index cdc6e67..1384af9 100644 (file)
@@ -142,6 +142,10 @@ vc4_irq_finish_render_job(struct drm_device *dev)
 
        vc4->finished_seqno++;
        list_move_tail(&exec->head, &vc4->job_done_list);
+       if (exec->fence) {
+               dma_fence_signal_locked(exec->fence);
+               exec->fence = NULL;
+       }
        vc4_submit_next_render_job(dev);
 
        wake_up_all(&vc4->job_wait_queue);