OSDN Git Service

msm: kgsl: Execute user profiling commands in an IB
authorJordan Crouse <jcrouse@codeaurora.org>
Mon, 9 Sep 2019 16:41:36 +0000 (10:41 -0600)
committerGerrit - the friendly Code Review server <code-review@localhost>
Tue, 17 Sep 2019 09:17:38 +0000 (02:17 -0700)
Execute user profiling in an indirect buffer. This ensures that addresses
and values specified directly from the user don't end up in the
ringbuffer.

Change-Id: Ic0dedbadedcaab29ce5738a39c1ff6269261bae4
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Harshitha Sai Neelati <hsaine@codeaurora.org>
drivers/gpu/msm/adreno_ringbuffer.c
drivers/gpu/msm/adreno_ringbuffer.h

index 65e7335..8a3028c 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2017,2019, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -259,6 +259,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
                PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
        if (ret)
                return ret;
+
+       /* allocate a chunk of memory to create user profiling IB1s */
+       kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc,
+               PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc");
+
        return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
                        KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
                        0, "ringbuffer");
@@ -303,7 +308,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,
 
        kgsl_free_global(device, &rb->pagetable_desc);
        kgsl_free_global(device, &rb->preemption_desc);
-
+       kgsl_free_global(device, &rb->profile_desc);
        kgsl_free_global(device, &rb->buffer_desc);
        kgsl_del_event_group(&rb->events);
        memset(rb, 0, sizeof(struct adreno_ringbuffer));
@@ -737,6 +742,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
        return (unsigned int)(p - cmds);
 }
 
+/* This is the maximum possible size for 64 bit targets */
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static int set_user_profiling(struct adreno_device *adreno_dev,
+               struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr)
+{
+       int dwords, index = 0;
+       u64 ib_gpuaddr;
+       u32 *ib;
+
+       if (!rb->profile_desc.hostptr)
+               return 0;
+
+       ib = ((u32 *) rb->profile_desc.hostptr) +
+               (rb->profile_index * PROFILE_IB_DWORDS);
+       ib_gpuaddr = rb->profile_desc.gpuaddr +
+               (rb->profile_index * (PROFILE_IB_DWORDS << 2));
+
+       dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr);
+
+       /* Make an indirect buffer for the request */
+       cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+       index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr);
+       cmds[index++] = dwords;
+
+       rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+       return index;
+}
+
 /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
 int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
                struct kgsl_drawobj_cmd *cmdobj,
@@ -836,14 +872,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
        if (drawobj->flags & KGSL_DRAWOBJ_PROFILING &&
                !adreno_is_a3xx(adreno_dev) && profile_buffer) {
                user_profiling = true;
-               dwords += 6;
 
                /*
-                * REG_TO_MEM packet on A5xx and above needs another ordinal.
-                * Add 2 more dwords since we do profiling before and after.
+                * User side profiling uses two IB1s, one before with 4 dwords
+                * per INDIRECT_BUFFER_PFE call
                 */
-               if (!ADRENO_LEGACY_PM4(adreno_dev))
-                       dwords += 2;
+               dwords += 8;
 
                /*
                 * we want to use an adreno_submit_time struct to get the
@@ -886,11 +920,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
        }
 
        /*
-        * Add cmds to read the GPU ticks at the start of command obj and
+        * Add IB1 to read the GPU ticks at the start of command obj and
         * write it into the appropriate command obj profiling buffer offset
         */
        if (user_profiling) {
-               cmds += _get_alwayson_counter(adreno_dev, cmds,
+               cmds += set_user_profiling(adreno_dev, rb, cmds,
                        cmdobj->profiling_buffer_gpuaddr +
                        offsetof(struct kgsl_drawobj_profiling_buffer,
                        gpu_ticks_submitted));
@@ -929,11 +963,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
        }
 
        /*
-        * Add cmds to read the GPU ticks at the end of command obj and
+        * Add IB1 to read the GPU ticks at the end of command obj and
         * write it into the appropriate command obj profiling buffer offset
         */
        if (user_profiling) {
-               cmds += _get_alwayson_counter(adreno_dev, cmds,
+               cmds += set_user_profiling(adreno_dev, rb, cmds,
                        cmdobj->profiling_buffer_gpuaddr +
                        offsetof(struct kgsl_drawobj_profiling_buffer,
                        gpu_ticks_retired));
index 63374af..d64ccbd 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2016,2019, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -127,6 +127,18 @@ struct adreno_ringbuffer {
        unsigned long sched_timer;
        enum adreno_dispatcher_starve_timer_states starve_timer_state;
        spinlock_t preempt_lock;
+       /**
+        * @profile_desc: global memory to construct IB1s to do user side
+        * profiling
+        */
+       struct kgsl_memdesc profile_desc;
+       /**
+        * @profile_index: Pointer to the next "slot" in profile_desc for a user
+        * profiling IB1.  This allows for PAGE_SIZE / 16 = 256 simultaneous
+        * commands per ringbuffer with user profiling enabled
+        * enough.
+        */
+       u32 profile_index;
 };
 
 /* Returns the current ringbuffer */