OSDN Git Service

drm/msm: Add kernel side submit profiling and tracing
authorJordan Crouse <jcrouse@codeaurora.org>
Wed, 28 Jun 2017 15:55:09 +0000 (09:55 -0600)
committerJordan Crouse <jcrouse@codeaurora.org>
Mon, 3 Jul 2017 19:40:26 +0000 (13:40 -0600)
Record the GPU always on timer value at the start and end of a
submission on the ringbuffer. Since the timer runs at a constant
19.2 Mhz this is a handy way of tracking how long each
submission takes.

The timer values are recorded in the memptrs. Each ringbuffer is
given a circular list of 128 entries to store the event ticks;
this should be enough to avoid running out of room even when the
ring is completely full of submissions.

Add trace events for the user to track when submissions are
queued, submitted to the ringbuffer and retired. The submitted
trace point shows the GPU ticks and the current kernel time at
submit time (as read by the CPU) and the retired trace event shows
the GPU ticks at submission start/end as read by the GPU. Taken
together these two events can provide a pretty close match between
the current GPU time and the kernel time which is handy for tracing
tools that try to match up the various kernel events with one
another.

Change-Id: Ic0dedbadbcf89f032890820785b9fb49a6362b01
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
drivers/gpu/drm/msm/Makefile
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drivers/gpu/drm/msm/msm_gem.h
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/msm/msm_gpu.c
drivers/gpu/drm/msm/msm_ringbuffer.h
drivers/gpu/drm/msm/msm_trace.h [new file with mode: 0644]
drivers/gpu/drm/msm/msm_trace_points.c [new file with mode: 0644]

index 999d5e4..84125b3 100644 (file)
@@ -149,6 +149,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \
        msm_ringbuffer.o \
        msm_prop.o \
        msm_snapshot.o \
-       msm_submitqueue.o
+       msm_submitqueue.o \
+       msm_trace_points.o
 
 obj-$(CONFIG_DRM_MSM)  += msm_drm.o
index f8dbc84..687ca96 100644 (file)
@@ -13,6 +13,7 @@
 
 #include "msm_gem.h"
 #include "msm_iommu.h"
+#include "msm_trace.h"
 #include "a5xx_gpu.h"
 
 #define SECURE_VA_START 0xc0000000
@@ -100,12 +101,31 @@ static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
        OUT_RING(ring, 1);
 }
 
+/* Inline PM4 code to get the current value of the 19.2 Mhz always on counter */
+static void a5xx_get_ticks(struct msm_ringbuffer *ring, uint64_t iova)
+{
+       /*
+        * Set bit[30] to make this command a 64 bit write operation.
+        * bits[18-29] is to specify number of consecutive registers
+        * to copy, so set this space with 2, since we want to copy
+        * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
+        */
+
+       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+       OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
+               (1 << 30) | (2 << 18));
+       OUT_RING(ring, lower_32_bits(iova));
+       OUT_RING(ring, upper_32_bits(iova));
+}
+
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
        struct msm_ringbuffer *ring = gpu->rb[submit->ring];
        unsigned int i, ibs = 0;
+       unsigned long flags;
+       u64 ktime, ticks;
 
        a5xx_set_pagetable(gpu, ring, submit->aspace);
 
@@ -139,24 +159,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
                OUT_RING(ring, 1);
        }
 
-       /* Record the always on counter before command execution */
-       if (submit->profile_buf_iova) {
-               uint64_t gpuaddr = submit->profile_buf_iova +
-                       offsetof(struct drm_msm_gem_submit_profile_buffer,
-                                       ticks_submitted);
+       /* Record the GPU ticks at command start for kernel side profiling */
+       a5xx_get_ticks(ring,
+               RING_TICKS_IOVA(ring, submit->tick_index, started));
 
-               /*
-                * Set bit[30] to make this command a 64 bit write operation.
-                * bits[18-29] is to specify number of consecutive registers
-                * to copy, so set this space with 2, since we want to copy
-                * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
-                */
-               OUT_PKT7(ring, CP_REG_TO_MEM, 3);
-               OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
-                               (1 << 30) | (2 << 18));
-               OUT_RING(ring, lower_32_bits(gpuaddr));
-               OUT_RING(ring, upper_32_bits(gpuaddr));
-       }
+       /* And for the user profiling too if it is enabled */
+       if (submit->profile_buf_iova)
+               a5xx_get_ticks(ring, submit->profile_buf_iova +
+                       offsetof(struct drm_msm_gem_submit_profile_buffer,
+                               ticks_submitted));
 
        /* Submit the commands */
        for (i = 0; i < submit->nr_cmds; i++) {
@@ -190,18 +201,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
        OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
        OUT_RING(ring, 0x01);
 
+       /* Record the GPU ticks at command retire for kernel side profiling */
+       a5xx_get_ticks(ring,
+               RING_TICKS_IOVA(ring, submit->tick_index, retired));
+
        /* Record the always on counter after command execution */
-       if (submit->profile_buf_iova) {
-               uint64_t gpuaddr = submit->profile_buf_iova +
+       if (submit->profile_buf_iova)
+               a5xx_get_ticks(ring, submit->profile_buf_iova +
                        offsetof(struct drm_msm_gem_submit_profile_buffer,
-                                       ticks_retired);
-
-               OUT_PKT7(ring, CP_REG_TO_MEM, 3);
-               OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
-                               (1 << 30) | (2 << 18));
-               OUT_RING(ring, lower_32_bits(gpuaddr));
-               OUT_RING(ring, upper_32_bits(gpuaddr));
-       }
+                               ticks_retired));
 
        /* Write the fence to the scratch register */
        OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
@@ -237,33 +245,27 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
        /* Set bit 0 to trigger an interrupt on preempt complete */
        OUT_RING(ring, 0x01);
 
-       if (submit->profile_buf_iova) {
-               unsigned long flags;
-               uint64_t ktime;
-               struct drm_msm_gem_submit_profile_buffer *profile_buf =
-                       submit->profile_buf_vaddr;
-
-               /*
-                * With this profiling, we are trying to create closest
-                * possible mapping between the CPU time domain(monotonic clock)
-                * and the GPU time domain(ticks). In order to make this
-                * happen, we need to briefly turn off interrupts to make sure
-                * interrupts do not run between collecting these two samples.
-                */
-               local_irq_save(flags);
-
-               profile_buf->ticks_queued = gpu_read64(gpu,
-                       REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
-                       REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+       /*
+        * Get the current kernel time and ticks with interrupts off so we don't
+        * get interrupted between the operations and skew the numbers
+        */
 
-               ktime = ktime_get_raw_ns();
+       local_irq_save(flags);
+       ticks = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
+               REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+       ktime = ktime_get_raw_ns();
+       local_irq_restore(flags);
 
-               local_irq_restore(flags);
+       if (submit->profile_buf) {
+               /* Write the data into the use-specified profile buffer */
 
-               profile_buf->queue_time = ktime;
-               profile_buf->submit_time = ktime;
+               submit->profile_buf->queue_time = ktime;
+               submit->profile_buf->submit_time = ktime;
+               submit->profile_buf->ticks_queued = ticks;
        }
 
+       trace_msm_submitted(submit, ticks, ktime);
+
        a5xx_flush(gpu, ring);
 
        /* Check to see if we need to start preemption */
index e852889..df9ddad 100644 (file)
@@ -151,9 +151,10 @@ struct msm_gem_submit {
        u32 flags;
        bool valid;
        uint64_t profile_buf_iova;
-       void *profile_buf_vaddr;
+       struct drm_msm_gem_submit_profile_buffer *profile_buf;
        bool secure;
        struct msm_gpu_submitqueue *queue;
+       int tick_index;
        unsigned int nr_cmds;
        unsigned int nr_bos;
        struct {
index 7ccc146..b73379a 100644 (file)
@@ -18,6 +18,7 @@
 #include "msm_drv.h"
 #include "msm_gpu.h"
 #include "msm_gem.h"
+#include "msm_trace.h"
 
 /*
  * Cmdstream submission:
@@ -55,7 +56,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
                submit->nr_bos = 0;
                submit->nr_cmds = 0;
 
-               submit->profile_buf_vaddr = NULL;
+               submit->profile_buf = NULL;
                submit->profile_buf_iova = 0;
                submit->cmd = (void *)&submit->bos[nr_bos];
 
@@ -510,9 +511,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
                if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) {
                        submit->profile_buf_iova = submit->cmd[i].iova;
-                       submit->profile_buf_vaddr =
-                               msm_gem_vaddr(&msm_obj->base) +
-                               submit_cmd.submit_offset;
+                       submit->profile_buf = msm_gem_vaddr(&msm_obj->base)
+                               + submit_cmd.submit_offset;
                }
 
                if (submit->valid)
index d896e43..6bac1cf 100644 (file)
@@ -18,7 +18,7 @@
 #include "msm_gpu.h"
 #include "msm_gem.h"
 #include "msm_mmu.h"
-
+#include "msm_trace.h"
 
 /*
  * Power Management:
@@ -494,9 +494,18 @@ static void retire_submits(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
        WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
        list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
+               struct msm_memptr_ticks *ticks;
+
                if (submit->fence > fence)
                        break;
 
+               ticks = &(ring->memptrs->ticks[submit->tick_index]);
+
+               /* Add memory barrier to ensure the timer ticks are posted */
+               rmb();
+
+               trace_msm_retired(submit, ticks->started, ticks->retired);
+
                msm_gem_submit_free(submit);
        }
 }
@@ -578,6 +587,12 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 
        ring->submitted_fence = submit->fence;
 
+       submit->tick_index = ring->tick_index;
+       ring->tick_index = (ring->tick_index + 1) %
+               ARRAY_SIZE(ring->memptrs->ticks);
+
+       trace_msm_queued(submit);
+
        update_sw_cntrs(gpu);
 
        for (i = 0; i < submit->nr_bos; i++) {
index 3eb9a86..b19ce75 100644 (file)
 #define rbmemptr(ring, member) \
        ((ring)->memptrs_iova + offsetof(struct msm_memptrs, member))
 
+struct msm_memptr_ticks {
+       uint64_t started;
+       uint64_t retired;
+};
+
 struct msm_memptrs {
        volatile uint32_t rptr;
        volatile uint32_t fence;
        volatile uint64_t ttbr0;
        volatile unsigned int contextidr;
+       struct msm_memptr_ticks ticks[128];
 };
 
+#define RING_TICKS_IOVA(ring, index, field) \
+       ((ring)->memptrs_iova + offsetof(struct msm_memptrs, ticks) + \
+        ((index) * sizeof(struct msm_memptr_ticks)) + \
+        offsetof(struct msm_memptr_ticks, field))
+
 struct msm_ringbuffer {
        struct msm_gpu *gpu;
        int id;
@@ -42,6 +53,7 @@ struct msm_ringbuffer {
 
        struct msm_memptrs *memptrs;
        uint64_t memptrs_iova;
+       int tick_index;
 };
 
 struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
diff --git a/drivers/gpu/drm/msm/msm_trace.h b/drivers/gpu/drm/msm/msm_trace.h
new file mode 100644 (file)
index 0000000..68c7ff7
--- /dev/null
@@ -0,0 +1,98 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_MSM_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MSM_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msm_drm
+#define TRACE_INCLUDE_FILE msm_trace
+
+TRACE_EVENT(msm_queued,
+       TP_PROTO(struct msm_gem_submit *submit),
+       TP_ARGS(submit),
+       TP_STRUCT__entry(
+               __field(uint32_t, queue_id)
+               __field(uint32_t, fence_id)
+               __field(int, ring)
+       ),
+       TP_fast_assign(
+               __entry->queue_id = submit->queue->id;
+               __entry->fence_id = submit->fence;
+               __entry->ring = submit->ring;
+       ),
+       TP_printk(
+               "queue=%u fence=%u ring=%d",
+               __entry->queue_id, __entry->fence_id, __entry->ring
+       )
+);
+
+TRACE_EVENT(msm_submitted,
+       TP_PROTO(struct msm_gem_submit *submit, uint64_t ticks, uint64_t nsecs),
+       TP_ARGS(submit, ticks, nsecs),
+       TP_STRUCT__entry(
+               __field(uint32_t, queue_id)
+               __field(uint32_t, fence_id)
+               __field(int, ring)
+               __field(uint64_t, ticks)
+               __field(uint64_t, nsecs)
+       ),
+       TP_fast_assign(
+               __entry->queue_id = submit->queue->id;
+               __entry->fence_id = submit->fence;
+               __entry->ring = submit->ring;
+               __entry->ticks = ticks;
+               __entry->nsecs = nsecs;
+       ),
+       TP_printk(
+               "queue=%u fence=%u ring=%d ticks=%lld nsecs=%llu",
+               __entry->queue_id, __entry->fence_id, __entry->ring,
+               __entry->ticks, __entry->nsecs
+       )
+);
+
+TRACE_EVENT(msm_retired,
+       TP_PROTO(struct msm_gem_submit *submit, uint64_t start_ticks,
+               uint64_t retire_ticks),
+       TP_ARGS(submit, start_ticks, retire_ticks),
+       TP_STRUCT__entry(
+               __field(uint32_t, queue_id)
+               __field(uint32_t, fence_id)
+               __field(int, ring)
+               __field(uint64_t, start_ticks)
+               __field(uint64_t, retire_ticks)
+       ),
+       TP_fast_assign(
+               __entry->queue_id = submit->queue->id;
+               __entry->fence_id = submit->fence;
+               __entry->ring = submit->ring;
+               __entry->start_ticks = start_ticks;
+               __entry->retire_ticks = retire_ticks;
+       ),
+       TP_printk(
+               "queue=%u fence=%u ring=%d started=%lld retired=%lld",
+               __entry->queue_id, __entry->fence_id, __entry->ring,
+               __entry->start_ticks, __entry->retire_ticks
+       )
+);
+
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/drm/msm/msm_trace_points.c b/drivers/gpu/drm/msm/msm_trace_points.c
new file mode 100644 (file)
index 0000000..41d9a97
--- /dev/null
@@ -0,0 +1,18 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gem.h"
+#include "msm_gpu.h"
+
+#define CREATE_TRACE_POINTS
+#include "msm_trace.h"