From 716ab859c3f250601fbeb878b0849954cc6139f0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 23 Jul 2018 10:46:12 -0400 Subject: [PATCH] freedreno/msm: "stateobj" support Adds support for "state object" cmdstream buffers which can be constructed once, and re-used many times. This enables the use for CP_SET_DRAW_STATE packets on newer hardware, to lower the CPU overhead. Signed-off-by: Rob Clark --- freedreno/freedreno_ringbuffer.c | 3 +- freedreno/msm/msm_ringbuffer.c | 89 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/freedreno/freedreno_ringbuffer.c b/freedreno/freedreno_ringbuffer.c index a0be2dbb..eb6cd0ca 100644 --- a/freedreno/freedreno_ringbuffer.c +++ b/freedreno/freedreno_ringbuffer.c @@ -71,7 +71,8 @@ fd_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) void fd_ringbuffer_del(struct fd_ringbuffer *ring) { - fd_ringbuffer_reset(ring); + if (!(ring->flags & FD_RINGBUFFER_OBJECT)) + fd_ringbuffer_reset(ring); ring->funcs->destroy(ring); } diff --git a/freedreno/msm/msm_ringbuffer.c b/freedreno/msm/msm_ringbuffer.c index 156c23c6..ec4b30fe 100644 --- a/freedreno/msm/msm_ringbuffer.c +++ b/freedreno/msm/msm_ringbuffer.c @@ -50,6 +50,8 @@ struct msm_cmd { struct msm_ringbuffer { struct fd_ringbuffer base; + atomic_t refcnt; + /* submit ioctl related tables: * Note that bos and cmds are tracked by the parent ringbuffer, since * that is global to the submit ioctl call. The reloc's table is tracked @@ -95,6 +97,9 @@ static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x) return (struct msm_ringbuffer *)x; } +static void msm_ringbuffer_unref(struct fd_ringbuffer *ring); +static void msm_ringbuffer_ref(struct fd_ringbuffer *ring); + #define INIT_SIZE 0x1000 static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; @@ -307,6 +312,8 @@ static void flush_reset(struct fd_ringbuffer *ring) /* for each of the cmd buffers, clear their reloc's: */ for (i = 0; i < msm_ring->submit.nr_cmds; i++) { struct msm_cmd *target_cmd = msm_ring->cmds[i]; + if (target_cmd->ring->flags & FD_RINGBUFFER_OBJECT) + continue; target_cmd->nr_relocs = 0; } @@ -369,6 +376,31 @@ static void dump_submit(struct msm_ringbuffer *msm_ring) } } +static struct drm_msm_gem_submit_reloc * +handle_stateobj_relocs(struct fd_ringbuffer *parent, struct fd_ringbuffer *stateobj, + struct drm_msm_gem_submit_reloc *orig_relocs, unsigned nr_relocs) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(stateobj); + struct drm_msm_gem_submit_reloc *relocs = malloc(nr_relocs * sizeof(*relocs)); + unsigned i; + + for (i = 0; i < nr_relocs; i++) { + unsigned idx = orig_relocs[i].reloc_idx; + struct fd_bo *bo = msm_ring->bos[idx]; + unsigned flags = 0; + + if (msm_ring->submit.bos[idx].flags & MSM_SUBMIT_BO_READ) + flags |= FD_RELOC_READ; + if (msm_ring->submit.bos[idx].flags & MSM_SUBMIT_BO_WRITE) + flags |= FD_RELOC_WRITE; + + relocs[i] = orig_relocs[i]; + relocs[i].reloc_idx = bo2idx(parent, bo, flags); + } + + return relocs; +} + static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start, int in_fence_fd, int *out_fence_fd) { @@ -380,6 +412,8 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start uint32_t i; int ret; + assert(!ring->parent); + if (in_fence_fd != -1) { req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; req.fence_fd = in_fence_fd; @@ -397,8 +431,22 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start struct msm_cmd *msm_cmd = msm_ring->cmds[i]; uint32_t a = find_next_reloc_idx(msm_cmd, 0, cmd->submit_offset); uint32_t b = find_next_reloc_idx(msm_cmd, a, cmd->submit_offset + cmd->size); - cmd->relocs = VOID2U64(&msm_cmd->relocs[a]); - cmd->nr_relocs = (b > a) ? b - a : 0; + struct drm_msm_gem_submit_reloc *relocs = &msm_cmd->relocs[a]; + unsigned nr_relocs = (b > a) ? b - a : 0; + + /* for reusable stateobjs, the reloc table has reloc_idx that + * points into it's own private bos table, rather than the global + * bos table used for the submit, so we need to add the stateobj's + * bos to the global table and construct new relocs table with + * corresponding reloc_idx + */ + if (msm_cmd->ring->flags & FD_RINGBUFFER_OBJECT) { + relocs = handle_stateobj_relocs(ring, msm_cmd->ring, + relocs, nr_relocs); + } + + cmd->relocs = VOID2U64(relocs); + cmd->nr_relocs = nr_relocs; } /* needs to be after get_cmd() as that could create bos/cmds table: */ @@ -426,6 +474,16 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start } } + /* free dynamically constructed stateobj relocs tables: */ + for (i = 0; i < msm_ring->submit.nr_cmds; i++) { + struct drm_msm_gem_submit_cmd *cmd = &msm_ring->submit.cmds[i]; + struct msm_cmd *msm_cmd = msm_ring->cmds[i]; + if (msm_cmd->ring->flags & FD_RINGBUFFER_OBJECT) { + msm_ringbuffer_unref(msm_cmd->ring); + free(U642VOID(cmd->relocs)); + } + } + flush_reset(ring); return ret; @@ -518,7 +576,8 @@ static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, */ size = cmd->size; } else { - get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF); + struct fd_ringbuffer *parent = ring->parent ? ring->parent : ring; + get_cmd(parent, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF); } msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ @@ -527,6 +586,14 @@ static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, .offset = submit_offset, }); + /* Unlike traditional ringbuffers which are deleted as a set (after + * being flushed), mesa can't really guarantee that a stateobj isn't + * destroyed after emitted but before flush, so we must hold a ref: + */ + if (target->flags & FD_RINGBUFFER_OBJECT) { + msm_ringbuffer_ref(target); + } + return size; } @@ -535,10 +602,13 @@ static uint32_t msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) return to_msm_ringbuffer(ring)->cmd_count; } -static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring) +static void msm_ringbuffer_unref(struct fd_ringbuffer *ring) { struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + if (!atomic_dec_and_test(&msm_ring->refcnt)) + return; + flush_reset(ring); delete_cmds(msm_ring); @@ -549,6 +619,12 @@ static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring) free(msm_ring); } +static void msm_ringbuffer_ref(struct fd_ringbuffer *ring) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + atomic_inc(&msm_ring->refcnt); +} + static const struct fd_ringbuffer_funcs funcs = { .hostptr = msm_ringbuffer_hostptr, .flush = msm_ringbuffer_flush, @@ -557,7 +633,7 @@ static const struct fd_ringbuffer_funcs funcs = { .emit_reloc = msm_ringbuffer_emit_reloc, .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, .cmd_count = msm_ringbuffer_cmd_count, - .destroy = msm_ringbuffer_destroy, + .destroy = msm_ringbuffer_unref, }; drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe, @@ -566,8 +642,6 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe, struct msm_ringbuffer *msm_ring; struct fd_ringbuffer *ring; - assert(!flags); - msm_ring = calloc(1, sizeof(*msm_ring)); if (!msm_ring) { ERROR_MSG("allocation failed"); @@ -582,6 +656,7 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe, list_inithead(&msm_ring->cmd_list); msm_ring->seqno = ++to_msm_device(pipe->dev)->ring_cnt; + atomic_set(&msm_ring->refcnt, 1); ring = &msm_ring->base; ring->funcs = &funcs; -- 2.11.0