OSDN Git Service

msm: kgsl: Use the GPU to write the RPTR
authorCarter Cooper <ccooper@codeaurora.org>
Tue, 3 May 2016 20:11:04 +0000 (14:11 -0600)
committerCarter Cooper <ccooper@codeaurora.org>
Wed, 20 Jul 2016 21:19:32 +0000 (15:19 -0600)
The memstore shared between the CPU and GPU is old but can not be
messed with. Rather than stealing values from it where available,
add a new block of shared memory that is exclusive to the driver
and GPU. This block can be used more freely than the old
memstore block.

Program the GPU to write the RPTR out to an address the CPU can read rather
than having the CPU read a GPU register directly. There are some very
small but very real conditions where different blocks on the GPU have
outdated values for the RPTR. When scheduling preemption the value read
from the register could not reflect the actual value of the RPTR in the CP.
This can cause the save/restore from preemption to give back incorrect RPTR
values causing much confusion between the GPU and CPU.

Remove the ringbuffers copy of the read pointer shadow.
Now that the GPU will update a shared memory address with the
value of the read pointer, there is no need to poll the register
to get the value and then keep a local copy of it.

CRs-Fixed: 987082
Change-Id: Ic44759d1a5c6e48b2f0f566ea8c153f01cf68279
Signed-off-by: Carter Cooper <ccooper@codeaurora.org>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
14 files changed:
drivers/gpu/msm/a5xx_reg.h
drivers/gpu/msm/adreno.c
drivers/gpu/msm/adreno.h
drivers/gpu/msm/adreno_a4xx.c
drivers/gpu/msm/adreno_a5xx.c
drivers/gpu/msm/adreno_dispatch.c
drivers/gpu/msm/adreno_iommu.c
drivers/gpu/msm/adreno_ringbuffer.c
drivers/gpu/msm/adreno_ringbuffer.h
drivers/gpu/msm/adreno_snapshot.c
drivers/gpu/msm/adreno_trace.h
drivers/gpu/msm/kgsl.c
drivers/gpu/msm/kgsl.h
drivers/gpu/msm/kgsl_device.h

index 913cedb..2075888 100644 (file)
@@ -60,6 +60,8 @@
 #define A5XX_CP_RB_BASE                  0x800
 #define A5XX_CP_RB_BASE_HI               0x801
 #define A5XX_CP_RB_CNTL                  0x802
+#define A5XX_CP_RB_RPTR_ADDR_LO          0x804
+#define A5XX_CP_RB_RPTR_ADDR_HI          0x805
 #define A5XX_CP_RB_RPTR                  0x806
 #define A5XX_CP_RB_WPTR                  0x807
 #define A5XX_CP_PFP_STAT_ADDR            0x808
index 26e341a..28980ec 100644 (file)
@@ -171,6 +171,30 @@ void adreno_writereg64(struct adreno_device *adreno_dev,
 }
 
 /**
+ * adreno_get_rptr() - Get the current ringbuffer read pointer
+ * @rb: Pointer the ringbuffer to query
+ *
+ * Get the latest rptr
+ */
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb)
+{
+       struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+       unsigned int rptr = 0;
+
+       if (adreno_is_a3xx(adreno_dev))
+               adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+                               &rptr);
+       else {
+               struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+               kgsl_sharedmem_readl(&device->scratch, &rptr,
+                               SCRATCH_RPTR_OFFSET(rb->id));
+       }
+
+       return rptr;
+}
+
+/**
  * adreno_of_read_property() - Adreno read property
  * @node: Device node
  *
@@ -2149,8 +2173,6 @@ bool adreno_isidle(struct kgsl_device *device)
        if (!kgsl_state_is_awake(device))
                return true;
 
-       adreno_get_rptr(ADRENO_CURRENT_RINGBUFFER(adreno_dev));
-
        /*
         * wptr is updated when we add commands to ringbuffer, add a barrier
         * to make sure updated wptr is compared to rptr
@@ -2161,15 +2183,13 @@ bool adreno_isidle(struct kgsl_device *device)
         * ringbuffer is truly idle when all ringbuffers read and write
         * pointers are equal
         */
+
        FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
-               if (rb->rptr != rb->wptr)
-                       break;
+               if (!adreno_rb_empty(rb))
+                       return false;
        }
 
-       if (i == adreno_dev->num_ringbuffers)
-               return adreno_hw_isidle(adreno_dev);
-
-       return false;
+       return adreno_hw_isidle(adreno_dev);
 }
 
 /**
index 7ac91f2..22827e7 100644 (file)
@@ -458,6 +458,8 @@ enum adreno_regs {
        ADRENO_REG_CP_WFI_PEND_CTR,
        ADRENO_REG_CP_RB_BASE,
        ADRENO_REG_CP_RB_BASE_HI,
+       ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+       ADRENO_REG_CP_RB_RPTR_ADDR_HI,
        ADRENO_REG_CP_RB_RPTR,
        ADRENO_REG_CP_RB_WPTR,
        ADRENO_REG_CP_CNTL,
@@ -1272,24 +1274,6 @@ static inline unsigned int adreno_preempt_state(
                state;
 }
 
-/**
- * adreno_get_rptr() - Get the current ringbuffer read pointer
- * @rb: Pointer the ringbuffer to query
- *
- * Get the current read pointer from the GPU register.
- */
-static inline unsigned int
-adreno_get_rptr(struct adreno_ringbuffer *rb)
-{
-       struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
-       if (adreno_dev->cur_rb == rb &&
-               adreno_preempt_state(adreno_dev,
-                       ADRENO_DISPATCHER_PREEMPT_CLEAR))
-               adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &(rb->rptr));
-
-       return rb->rptr;
-}
-
 static inline bool adreno_is_preemption_enabled(
                                struct adreno_device *adreno_dev)
 {
@@ -1371,6 +1355,13 @@ void adreno_readreg64(struct adreno_device *adreno_dev,
 void adreno_writereg64(struct adreno_device *adreno_dev,
                enum adreno_regs lo, enum adreno_regs hi, uint64_t val);
 
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb);
+
+static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb)
+{
+       return (adreno_get_rptr(rb) == rb->wptr);
+}
+
 static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev)
 {
        return adreno_dev->fast_hang_detect &&
index b1196da..29e3316 100644 (file)
@@ -196,9 +196,10 @@ static void a4xx_preemption_start(struct adreno_device *adreno_dev,
        /* scratch REG9 corresponds to CP_RB_CNTL register */
        kgsl_regwrite(device, A4XX_CP_SCRATCH_REG9, val);
        /* scratch REG10 corresponds to rptr address */
-       kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10, 0);
+       kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10,
+                       SCRATCH_RPTR_GPU_ADDR(device, rb->id));
        /* scratch REG11 corresponds to rptr */
-       kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, rb->rptr);
+       kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, adreno_get_rptr(rb));
        /* scratch REG12 corresponds to wptr */
        kgsl_regwrite(device, A4XX_CP_SCRATCH_REG12, rb->wptr);
        /*
@@ -222,7 +223,6 @@ static void a4xx_preemption_save(struct adreno_device *adreno_dev,
 {
        struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
 
-       kgsl_regread(device, A4XX_CP_SCRATCH_REG18, &rb->rptr);
        kgsl_regread(device, A4XX_CP_SCRATCH_REG23, &rb->gpr11);
 }
 
@@ -255,8 +255,7 @@ static int a4xx_preemption_pre_ibsubmit(
        int exec_ib = 0;
 
        cmds += a4xx_preemption_token(adreno_dev, rb, cmds,
-                               device->memstore.gpuaddr +
-                               KGSL_MEMSTORE_OFFSET(context->id, preempted));
+                       MEMSTORE_ID_GPU_ADDR(device, context->id, preempted));
 
        if (ib)
                exec_ib = 1;
@@ -839,6 +838,7 @@ static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
        ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A4XX_CP_WFI_PEND_CTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A4XX_CP_RB_BASE),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+       ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, A4XX_CP_RB_RPTR_ADDR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A4XX_CP_RB_RPTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A4XX_CP_RB_WPTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A4XX_CP_CNTL),
@@ -1634,8 +1634,15 @@ static int a4xx_rb_start(struct adreno_device *adreno_dev,
                         unsigned int start_type)
 {
        struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+       struct kgsl_device *device = &adreno_dev->dev;
+       uint64_t addr;
        int ret;
 
+       addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id);
+
+       adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+                       ADRENO_REG_CP_RB_RPTR_ADDR_HI, addr);
+
        /*
         * The size of the ringbuffer in the hardware is the log2
         * representation of the size in quadwords (sizedwords / 2).
@@ -1644,8 +1651,8 @@ static int a4xx_rb_start(struct adreno_device *adreno_dev,
         */
 
        adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
-               (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) |
-               (1 << 27));
+                       ((ilog2(4) << 8) & 0x1F00) |
+                       (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F));
 
        adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
                          rb->buffer_desc.gpuaddr);
@@ -1897,7 +1904,7 @@ static int a4xx_submit_preempt_token(struct adreno_ringbuffer *rb,
 
        ringcmds += gpudev->preemption_token(adreno_dev, rb, ringcmds,
                                device->memstore.gpuaddr +
-                               KGSL_MEMSTORE_RB_OFFSET(rb, preempted));
+                               MEMSTORE_RB_OFFSET(rb, preempted));
 
        if ((uint)(ringcmds - start) > total_sizedwords) {
                KGSL_DRV_ERR(device, "Insufficient rb size allocated\n");
@@ -1960,7 +1967,9 @@ static void a4xx_preempt_trig_state(
                        KGSL_DRV_INFO(device,
                        "Preemption completed without interrupt\n");
                        trace_adreno_hw_preempt_trig_to_comp(adreno_dev->cur_rb,
-                                       adreno_dev->next_rb);
+                                       adreno_dev->next_rb,
+                                       adreno_get_rptr(adreno_dev->cur_rb),
+                                       adreno_get_rptr(adreno_dev->next_rb));
                        atomic_set(&dispatcher->preemption_state,
                                ADRENO_DISPATCHER_PREEMPT_COMPLETE);
                        adreno_dispatcher_schedule(device);
@@ -1988,9 +1997,7 @@ static void a4xx_preempt_trig_state(
         * commands that got submitted to current RB after triggering preemption
         * then submit them as those commands may have a preempt token in them
         */
-       adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-                       &adreno_dev->cur_rb->rptr);
-       if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr) {
+       if (!adreno_rb_empty(adreno_dev->cur_rb)) {
                /*
                 * Memory barrier before informing the
                 * hardware of new commands
@@ -2011,7 +2018,9 @@ static void a4xx_preempt_trig_state(
        dispatcher->preempt_token_submit = 1;
        adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr;
        trace_adreno_hw_preempt_token_submit(adreno_dev->cur_rb,
-                                               adreno_dev->next_rb);
+                       adreno_dev->next_rb,
+                       adreno_get_rptr(adreno_dev->cur_rb),
+                       adreno_get_rptr(adreno_dev->next_rb));
 }
 
 /**
@@ -2035,10 +2044,6 @@ static void a4xx_preempt_clear_state(
        if (!kgsl_state_is_awake(device))
                return;
 
-       /* keep updating the current rptr when preemption is clear */
-       adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-                       &(adreno_dev->cur_rb->rptr));
-
        highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev);
        if (!highest_busy_rb)
                return;
@@ -2056,7 +2061,7 @@ static void a4xx_preempt_clear_state(
                 * if switching to lower priority make sure that the rptr and
                 * wptr are equal, when the lower rb is not starved
                 */
-               if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr)
+               if (!adreno_rb_empty(adreno_dev->cur_rb))
                        return;
                /*
                 * switch to default context because when we switch back
@@ -2095,7 +2100,9 @@ static void a4xx_preempt_clear_state(
                msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT));
 
        trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb,
-                                               adreno_dev->next_rb);
+                       adreno_dev->next_rb,
+                       adreno_get_rptr(adreno_dev->cur_rb),
+                       adreno_get_rptr(adreno_dev->next_rb));
        /* issue PREEMPT trigger */
        adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
        /*
@@ -2146,6 +2153,7 @@ static void a4xx_preempt_complete_state(
        struct adreno_dispatcher_cmdqueue *dispatch_q;
        unsigned int wptr, rbbase;
        unsigned int val, val1;
+       unsigned int prevrptr;
 
        del_timer_sync(&dispatcher->preempt_timer);
 
@@ -2176,12 +2184,15 @@ static void a4xx_preempt_complete_state(
        dispatch_q = &(adreno_dev->cur_rb->dispatch_q);
        /* new RB is the current RB */
        trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb,
-                                               adreno_dev->cur_rb);
+                       adreno_dev->cur_rb,
+                       adreno_get_rptr(adreno_dev->next_rb),
+                       adreno_get_rptr(adreno_dev->cur_rb));
        adreno_dev->prev_rb = adreno_dev->cur_rb;
        adreno_dev->cur_rb = adreno_dev->next_rb;
        adreno_dev->cur_rb->preempted_midway = 0;
        adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
        adreno_dev->next_rb = NULL;
+
        if (adreno_disp_preempt_fair_sched) {
                /* starved rb is now scheduled so unhalt dispatcher */
                if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
@@ -2194,7 +2205,7 @@ static void a4xx_preempt_complete_state(
                 * If the outgoing RB is has commands then set the
                 * busy time for it
                 */
-               if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) {
+               if (!adreno_rb_empty(adreno_dev->prev_rb)) {
                        adreno_dev->prev_rb->starve_timer_state =
                                ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
                        adreno_dev->prev_rb->sched_timer = jiffies;
@@ -2205,15 +2216,16 @@ static void a4xx_preempt_complete_state(
        }
        atomic_set(&dispatcher->preemption_state,
                ADRENO_DISPATCHER_PREEMPT_CLEAR);
+
+       prevrptr = adreno_get_rptr(adreno_dev->prev_rb);
+
        if (adreno_compare_prio_level(adreno_dev->prev_rb->id,
                                adreno_dev->cur_rb->id) < 0) {
-               if (adreno_dev->prev_rb->wptr_preempt_end !=
-                       adreno_dev->prev_rb->rptr)
+               if (adreno_dev->prev_rb->wptr_preempt_end != prevrptr)
                        adreno_dev->prev_rb->preempted_midway = 1;
-       } else if (adreno_dev->prev_rb->wptr_preempt_end !=
-               adreno_dev->prev_rb->rptr) {
+       } else if (adreno_dev->prev_rb->wptr_preempt_end != prevrptr)
                BUG();
-       }
+
        /* submit wptr if required for new rb */
        adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
        if (adreno_dev->cur_rb->wptr != wptr) {
index 512dcd4..f5ed909 100644 (file)
@@ -194,6 +194,8 @@ static void a5xx_preemption_start(struct adreno_device *adreno_dev,
 
        kgsl_sharedmem_writel(device, &rb->preemption_desc,
                PREEMPT_RECORD(wptr), rb->wptr);
+       kgsl_sharedmem_writel(device, &rb->preemption_desc,
+               PREEMPT_RECORD(rptr), adreno_get_rptr(rb));
        kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
                lower_32_bits(rb->preemption_desc.gpuaddr));
        kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
@@ -225,16 +227,8 @@ static void a5xx_preemption_start(struct adreno_device *adreno_dev,
                offsetof(struct a5xx_cp_smmu_info, context_idr), contextidr);
 }
 
-/*
- * a5xx_preemption_save() - Save the state after preemption is done
- */
-static void a5xx_preemption_save(struct adreno_device *adreno_dev,
-               struct adreno_ringbuffer *rb)
-{
-       /* save the rptr from ctxrecord here */
-       kgsl_sharedmem_readl(&rb->preemption_desc, &rb->rptr,
-               PREEMPT_RECORD(rptr));
-}
+#define _CP_CNTL (((ilog2(4) << 8) & 0x1F00) | \
+                       (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
 
 #ifdef CONFIG_QCOM_KGSL_IOMMU
 static int a5xx_preemption_iommu_init(struct adreno_device *adreno_dev)
@@ -290,7 +284,10 @@ static int a5xx_preemption_init(struct adreno_device *adreno_dev)
                kgsl_sharedmem_writel(device, &rb->preemption_desc,
                        PREEMPT_RECORD(data), 0);
                kgsl_sharedmem_writel(device, &rb->preemption_desc,
-                       PREEMPT_RECORD(cntl), 0x0800000C);
+                       PREEMPT_RECORD(cntl), _CP_CNTL);
+               kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+                               PREEMPT_RECORD(rptr_addr),
+                               SCRATCH_RPTR_GPU_ADDR(device, i));
                kgsl_sharedmem_writel(device, &rb->preemption_desc,
                        PREEMPT_RECORD(rptr), 0);
                kgsl_sharedmem_writel(device, &rb->preemption_desc,
@@ -445,9 +442,7 @@ static int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
        unsigned int ctx_id = context ? context->id : 0;
 
        return a5xx_preemption_token(adreno_dev, rb, cmds,
-                               device->memstore.gpuaddr +
-                               KGSL_MEMSTORE_OFFSET(ctx_id, preempted));
-
+                       MEMSTORE_ID_GPU_ADDR(device, ctx_id, preempted));
 }
 
 static void a5xx_platform_setup(struct adreno_device *adreno_dev)
@@ -2228,8 +2223,8 @@ static int _preemption_init(
        struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
        unsigned int *cmds_orig = cmds;
        uint64_t gpuaddr = rb->preemption_desc.gpuaddr;
-       uint64_t gpuaddr_token = device->memstore.gpuaddr +
-                               KGSL_MEMSTORE_OFFSET(0, preempted);
+       uint64_t gpuaddr_token = MEMSTORE_ID_GPU_ADDR(device,
+                       KGSL_MEMSTORE_GLOBAL, preempted);
 
        /* Turn CP protection OFF */
        *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
@@ -2595,8 +2590,15 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev,
                         unsigned int start_type)
 {
        struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+       struct kgsl_device *device = &adreno_dev->dev;
+       uint64_t addr;
        int ret;
 
+       addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id);
+
+       adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+                       ADRENO_REG_CP_RB_RPTR_ADDR_HI, addr);
+
        /*
         * The size of the ringbuffer in the hardware is the log2
         * representation of the size in quadwords (sizedwords / 2).
@@ -2604,9 +2606,7 @@ static int a5xx_rb_start(struct adreno_device *adreno_dev,
         * in certain circumstances.
         */
 
-       adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
-               (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) |
-               (1 << 27));
+       adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL, _CP_CNTL);
 
        adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
                        rb->buffer_desc.gpuaddr);
@@ -3147,6 +3147,10 @@ static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
        ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A5XX_CP_WFI_PEND_CTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI),
+       ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+                       A5XX_CP_RB_RPTR_ADDR_LO),
+       ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+                       A5XX_CP_RB_RPTR_ADDR_HI),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR),
        ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A5XX_CP_CNTL),
@@ -3805,7 +3809,9 @@ static void a5xx_preempt_trig_state(
                                "Preemption completed without interrupt\n");
                                trace_adreno_hw_preempt_trig_to_comp(
                                        adreno_dev->cur_rb,
-                                       adreno_dev->next_rb);
+                                       adreno_dev->next_rb,
+                                       adreno_get_rptr(adreno_dev->cur_rb),
+                                       adreno_get_rptr(adreno_dev->next_rb));
                                atomic_set(&dispatcher->preemption_state,
                                        ADRENO_DISPATCHER_PREEMPT_COMPLETE);
                        } else {
@@ -3867,10 +3873,6 @@ static void a5xx_preempt_clear_state(
        if (!kgsl_state_is_awake(device))
                return;
 
-       /* keep updating the current rptr when preemption is clear */
-       adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-                       &(adreno_dev->cur_rb->rptr));
-
        highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev);
        if (!highest_busy_rb)
                return;
@@ -3893,7 +3895,7 @@ static void a5xx_preempt_clear_state(
                 * if switching to lower priority make sure that the rptr and
                 * wptr are equal, when the lower rb is not starved
                 */
-               if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr)
+               if (!adreno_rb_empty(adreno_dev->cur_rb))
                        return;
                /*
                 * switch to default context because when we switch back
@@ -3912,10 +3914,6 @@ static void a5xx_preempt_clear_state(
                        return;
        }
 
-       /* rptr could be updated in drawctxt switch above, update it here */
-       adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-                       &(adreno_dev->cur_rb->rptr));
-
        /* turn on IOMMU as the preemption may trigger pt switch */
        kgsl_mmu_enable_clk(&device->mmu);
 
@@ -3933,7 +3931,9 @@ static void a5xx_preempt_clear_state(
                msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT));
 
        trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb,
-                                               adreno_dev->next_rb);
+                       adreno_dev->next_rb,
+                       adreno_get_rptr(adreno_dev->cur_rb),
+                       adreno_get_rptr(adreno_dev->next_rb));
        /* issue PREEMPT trigger */
        adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
 
@@ -3993,12 +3993,12 @@ static void a5xx_preempt_complete_state(
                return;
        }
 
-       a5xx_preemption_save(adreno_dev, adreno_dev->cur_rb);
-
        dispatch_q = &(adreno_dev->cur_rb->dispatch_q);
        /* new RB is the current RB */
        trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb,
-                                               adreno_dev->cur_rb);
+                       adreno_dev->cur_rb,
+                       adreno_get_rptr(adreno_dev->next_rb),
+                       adreno_get_rptr(adreno_dev->cur_rb));
        adreno_dev->prev_rb = adreno_dev->cur_rb;
        adreno_dev->cur_rb = adreno_dev->next_rb;
        adreno_dev->cur_rb->preempted_midway = 0;
@@ -4017,7 +4017,8 @@ static void a5xx_preempt_complete_state(
                 * If the outgoing RB is has commands then set the
                 * busy time for it
                 */
-               if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) {
+
+               if (!adreno_rb_empty(adreno_dev->prev_rb)) {
                        adreno_dev->prev_rb->starve_timer_state =
                                ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
                        adreno_dev->prev_rb->sched_timer = jiffies;
@@ -4043,13 +4044,10 @@ static void a5xx_preempt_complete_state(
        adreno_preempt_process_dispatch_queue(adreno_dev, dispatch_q);
 }
 
-static void a5xx_preemption_schedule(
-                               struct adreno_device *adreno_dev)
+static void a5xx_preemption_schedule(struct adreno_device *adreno_dev)
 {
-       struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
        struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
-       struct adreno_ringbuffer *rb;
-       int i = 0;
+       struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
 
        if (!adreno_is_preemption_enabled(adreno_dev))
                return;
@@ -4062,10 +4060,6 @@ static void a5xx_preemption_schedule(
         */
        smp_mb();
 
-       if (KGSL_STATE_ACTIVE == device->state)
-               FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
-                       rb->rptr = adreno_get_rptr(rb);
-
        switch (atomic_read(&dispatcher->preemption_state)) {
        case ADRENO_DISPATCHER_PREEMPT_CLEAR:
                a5xx_preempt_clear_state(adreno_dev);
index 8140b72..a549d0d 100644 (file)
@@ -283,7 +283,8 @@ static void _retire_marker(struct kgsl_cmdbatch *cmdbatch)
        /* Retire pending GPU events for the object */
        kgsl_process_event_group(device, &context->events);
 
-       trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb);
+       trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb,
+                       adreno_get_rptr(drawctxt->rb));
        kgsl_cmdbatch_destroy(cmdbatch);
 }
 
@@ -616,7 +617,8 @@ static int sendcmd(struct adreno_device *adreno_dev,
        nsecs = do_div(secs, 1000000000);
 
        trace_adreno_cmdbatch_submitted(cmdbatch, (int) dispatcher->inflight,
-               time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb);
+               time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb,
+               adreno_get_rptr(drawctxt->rb));
 
        cmdbatch->submit_ticks = time.ticks;
 
@@ -935,16 +937,18 @@ static int get_timestamp(struct adreno_context *drawctxt,
 static void adreno_dispatcher_preempt_timer(unsigned long data)
 {
        struct adreno_device *adreno_dev = (struct adreno_device *) data;
+       struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
        struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+       unsigned int cur_rptr = adreno_get_rptr(adreno_dev->cur_rb);
+       unsigned int next_rptr = adreno_get_rptr(adreno_dev->cur_rb);
 
-       KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+       KGSL_DRV_ERR(device,
        "Preemption timed out. cur_rb rptr/wptr %x/%x id %d, next_rb rptr/wptr %x/%x id %d, disp_state: %d\n",
-       adreno_dev->cur_rb->rptr, adreno_dev->cur_rb->wptr,
-       adreno_dev->cur_rb->id, adreno_dev->next_rb->rptr,
-       adreno_dev->next_rb->wptr, adreno_dev->next_rb->id,
+       cur_rptr, adreno_dev->cur_rb->wptr, adreno_dev->cur_rb->id,
+       next_rptr, adreno_dev->next_rb->wptr, adreno_dev->next_rb->id,
        atomic_read(&dispatcher->preemption_state));
        adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
-       adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+       adreno_dispatcher_schedule(device);
 }
 
 /**
@@ -957,9 +961,11 @@ struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb(
 {
        struct adreno_ringbuffer *rb, *highest_busy_rb = NULL;
        int i;
+       unsigned int rptr;
 
        FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
-               if (rb->rptr != rb->wptr && !highest_busy_rb) {
+               rptr = adreno_get_rptr(rb);
+               if (rptr != rb->wptr && !highest_busy_rb) {
                        highest_busy_rb = rb;
                        goto done;
                }
@@ -969,7 +975,7 @@ struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb(
 
                switch (rb->starve_timer_state) {
                case ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT:
-                       if (rb->rptr != rb->wptr &&
+                       if (rptr != rb->wptr &&
                                adreno_dev->cur_rb != rb) {
                                rb->starve_timer_state =
                                ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
@@ -991,7 +997,7 @@ struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb(
                         * If the RB has not been running for the minimum
                         * time slice then allow it to run
                         */
-                       if ((rb->rptr != rb->wptr) && time_before(jiffies,
+                       if ((rptr != rb->wptr) && time_before(jiffies,
                                adreno_dev->cur_rb->sched_timer +
                                msecs_to_jiffies(_dispatch_time_slice)))
                                highest_busy_rb = rb;
@@ -1437,7 +1443,7 @@ static void adreno_fault_header(struct kgsl_device *device,
                if (rb != NULL)
                        pr_fault(device, cmdbatch,
                                "gpu fault rb %d rb sw r/w %4.4x/%4.4x\n",
-                               rb->id, rb->rptr, rb->wptr);
+                               rb->id, rptr, rb->wptr);
        } else {
                int id = (rb != NULL) ? rb->id : -1;
 
@@ -1448,7 +1454,7 @@ static void adreno_fault_header(struct kgsl_device *device,
                if (rb != NULL)
                        dev_err(device->dev,
                                "RB[%d] gpu fault rb sw r/w %4.4x/%4.4x\n",
-                               rb->id, rb->rptr, rb->wptr);
+                               rb->id, rptr, rb->wptr);
        }
 }
 
@@ -1823,8 +1829,6 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev)
                if (base == rb->buffer_desc.gpuaddr) {
                        dispatch_q = &(rb->dispatch_q);
                        hung_rb = rb;
-                       adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-                               &hung_rb->rptr);
                        if (adreno_dev->cur_rb != hung_rb) {
                                adreno_dev->prev_rb = adreno_dev->cur_rb;
                                adreno_dev->cur_rb = hung_rb;
@@ -1879,12 +1883,12 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev)
 
        if (hung_rb != NULL) {
                kgsl_sharedmem_writel(device, &device->memstore,
-                       KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id,
-                               soptimestamp), hung_rb->timestamp);
+                               MEMSTORE_RB_OFFSET(hung_rb, soptimestamp),
+                               hung_rb->timestamp);
 
                kgsl_sharedmem_writel(device, &device->memstore,
-                       KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id,
-                               eoptimestamp), hung_rb->timestamp);
+                               MEMSTORE_RB_OFFSET(hung_rb, eoptimestamp),
+                               hung_rb->timestamp);
 
                /* Schedule any pending events to be run */
                kgsl_process_event_group(device, &hung_rb->events);
@@ -2016,7 +2020,8 @@ int adreno_dispatch_process_cmdqueue(struct adreno_device *adreno_dev,
 
                        trace_adreno_cmdbatch_retired(cmdbatch,
                                (int) dispatcher->inflight, start_ticks,
-                               retire_ticks, ADRENO_CMDBATCH_RB(cmdbatch));
+                               retire_ticks, ADRENO_CMDBATCH_RB(cmdbatch),
+                               adreno_get_rptr(drawctxt->rb));
 
                        /* Record the delta between submit and retire ticks */
                        drawctxt->submit_retire_ticks[drawctxt->ticks_index] =
@@ -2589,7 +2594,9 @@ void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev,
                return;
 
        trace_adreno_hw_preempt_trig_to_comp_int(adreno_dev->cur_rb,
-                             adreno_dev->next_rb);
+                             adreno_dev->next_rb,
+                             adreno_get_rptr(adreno_dev->cur_rb),
+                             adreno_get_rptr(adreno_dev->next_rb));
        atomic_set(&dispatcher->preemption_state,
                        ADRENO_DISPATCHER_PREEMPT_COMPLETE);
        adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
index 2eeda01..22dd8fe 100644 (file)
@@ -275,6 +275,7 @@ static bool _ctx_switch_use_cpu_path(
                                struct adreno_ringbuffer *rb)
 {
        struct kgsl_mmu *mmu = KGSL_MMU(adreno_dev);
+
        /*
         * If rb is current, we can use cpu path when GPU is
         * idle and we are switching to default pt.
@@ -284,7 +285,7 @@ static bool _ctx_switch_use_cpu_path(
        if (adreno_dev->cur_rb == rb)
                return adreno_isidle(KGSL_DEVICE(adreno_dev)) &&
                        (new_pt == mmu->defaultpagetable);
-       else if ((rb->wptr == rb->rptr) &&
+       else if (adreno_rb_empty(rb) &&
                        (new_pt == mmu->defaultpagetable))
                return true;
 
@@ -651,14 +652,14 @@ static unsigned int __add_curr_ctxt_cmds(struct adreno_ringbuffer *rb,
        *cmds++ = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 
        *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
-       cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr +
-                          KGSL_MEMSTORE_RB_OFFSET(rb, current_context));
+       cmds += cp_gpuaddr(adreno_dev, cmds,
+                       MEMSTORE_RB_GPU_ADDR(device, rb, current_context));
        *cmds++ = (drawctxt ? drawctxt->base.id : 0);
 
        *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
-       cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr +
-                       KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-                       current_context));
+       cmds += cp_gpuaddr(adreno_dev, cmds,
+                       MEMSTORE_ID_GPU_ADDR(device,
+                               KGSL_MEMSTORE_GLOBAL, current_context));
        *cmds++ = (drawctxt ? drawctxt->base.id : 0);
 
        /* Invalidate UCHE for new context */
@@ -706,7 +707,7 @@ static void _set_ctxt_cpu(struct adreno_ringbuffer *rb,
        }
        /* Update rb memstore with current context */
        kgsl_sharedmem_writel(device, &device->memstore,
-               KGSL_MEMSTORE_RB_OFFSET(rb, current_context),
+               MEMSTORE_RB_OFFSET(rb, current_context),
                drawctxt ? drawctxt->base.id : 0);
 }
 
index dceb8fb..8f0c313 100644 (file)
@@ -279,8 +279,9 @@ int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
        FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
                kgsl_sharedmem_set(device, &(rb->buffer_desc),
                                0, 0xAA, KGSL_RB_SIZE);
+               kgsl_sharedmem_writel(device, &device->scratch,
+                               SCRATCH_RPTR_OFFSET(rb->id), 0);
                rb->wptr = 0;
-               rb->rptr = 0;
                rb->wptr_preempt_end = 0xFFFFFFFF;
                rb->starve_timer_state =
                        ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
@@ -446,7 +447,6 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
        unsigned int total_sizedwords = sizedwords;
        unsigned int i;
        unsigned int context_id = 0;
-       uint64_t gpuaddr = device->memstore.gpuaddr;
        bool profile_ready;
        struct adreno_context *drawctxt = rb->drawctxt_active;
        struct kgsl_context *context = NULL;
@@ -565,9 +565,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
 
        if (adreno_is_preemption_enabled(adreno_dev) &&
                                gpudev->preemption_pre_ibsubmit) {
-               cond_addr = device->memstore.gpuaddr +
-                                       KGSL_MEMSTORE_OFFSET(context_id,
-                                        preempted);
+               cond_addr = MEMSTORE_ID_GPU_ADDR(device, context_id, preempted);
                ringcmds += gpudev->preemption_pre_ibsubmit(
                                        adreno_dev, rb, ringcmds, context,
                                        cond_addr, NULL);
@@ -605,11 +603,10 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
        *ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
        if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
                ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
-                       gpuaddr + KGSL_MEMSTORE_OFFSET(context_id,
-                       soptimestamp));
+                       MEMSTORE_ID_GPU_ADDR(device, context_id, soptimestamp));
        else
                ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
-                       gpuaddr + KGSL_MEMSTORE_RB_OFFSET(rb, soptimestamp));
+                       MEMSTORE_ID_GPU_ADDR(device, context_id, soptimestamp));
        *ringcmds++ = timestamp;
 
        if (secured_ctxt)
@@ -660,9 +657,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
         * off system collapse.
         */
        *ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
-       ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
-                       KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-                               ref_wait_ts));
+       ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+               MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL,
+                       ref_wait_ts));
        *ringcmds++ = ++_seq_cnt;
 
        /*
@@ -677,16 +674,16 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
                *ringcmds++ = CACHE_FLUSH_TS;
 
        if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
-               ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
-                               KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp));
+               ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+                       MEMSTORE_ID_GPU_ADDR(device, context_id, eoptimestamp));
                *ringcmds++ = timestamp;
                *ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
-               ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
-                               KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp));
+               ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+                       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp));
                *ringcmds++ = rb->timestamp;
        } else {
-               ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
-                               KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp));
+               ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+                       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp));
                *ringcmds++ = timestamp;
        }
 
index f1980fd..66cc768 100644 (file)
@@ -78,7 +78,6 @@ struct adreno_ringbuffer_pagetable_info {
  * @flags: Internal control flags for the ringbuffer
  * @buffer_desc: Pointer to the ringbuffer memory descriptor
  * @wptr: Local copy of the wptr offset
- * @rptr: Read pointer offset in dwords from baseaddr
  * @last_wptr: offset of the last H/W committed wptr
  * @rb_ctx: The context that represents a ringbuffer
  * @id: Priority level of the ringbuffer, also used as an ID
@@ -106,7 +105,6 @@ struct adreno_ringbuffer {
        uint32_t flags;
        struct kgsl_memdesc buffer_desc;
        unsigned int wptr;
-       unsigned int rptr;
        unsigned int last_wptr;
        int id;
        unsigned int fault_detect_ts;
@@ -127,9 +125,6 @@ struct adreno_ringbuffer {
 /* Returns the current ringbuffer */
 #define ADRENO_CURRENT_RINGBUFFER(a)   ((a)->cur_rb)
 
-#define KGSL_MEMSTORE_RB_OFFSET(rb, field)     \
-       KGSL_MEMSTORE_OFFSET((rb->id + KGSL_MEMSTORE_MAX), field)
-
 int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds, int set);
 
 int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
index ca61d36..6bdf731 100644 (file)
@@ -467,7 +467,7 @@ static size_t snapshot_rb(struct kgsl_device *device, u8 *buf,
        header->start = 0;
        header->end = KGSL_RB_DWORDS;
        header->wptr = rb->wptr;
-       header->rptr = rb->rptr;
+       header->rptr = adreno_get_rptr(rb);
        header->rbsize = KGSL_RB_DWORDS;
        header->count = KGSL_RB_DWORDS;
        adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_QUEUED,
index 5f1bbb9..0109a4c 100644 (file)
@@ -55,8 +55,8 @@ TRACE_EVENT(adreno_cmdbatch_queued,
 TRACE_EVENT(adreno_cmdbatch_submitted,
        TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight, uint64_t ticks,
                unsigned long secs, unsigned long usecs,
-               struct adreno_ringbuffer *rb),
-       TP_ARGS(cmdbatch, inflight, ticks, secs, usecs, rb),
+               struct adreno_ringbuffer *rb, unsigned int rptr),
+       TP_ARGS(cmdbatch, inflight, ticks, secs, usecs, rb, rptr),
        TP_STRUCT__entry(
                __field(unsigned int, id)
                __field(unsigned int, timestamp)
@@ -81,7 +81,7 @@ TRACE_EVENT(adreno_cmdbatch_submitted,
                __entry->usecs = usecs;
                __entry->prio = cmdbatch->context->priority;
                __entry->rb_id = rb->id;
-               __entry->rptr = rb->rptr;
+               __entry->rptr = rptr;
                __entry->wptr = rb->wptr;
                __entry->q_inflight = rb->dispatch_q.inflight;
        ),
@@ -100,8 +100,8 @@ TRACE_EVENT(adreno_cmdbatch_submitted,
 TRACE_EVENT(adreno_cmdbatch_retired,
        TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight,
                uint64_t start, uint64_t retire,
-               struct adreno_ringbuffer *rb),
-       TP_ARGS(cmdbatch, inflight, start, retire, rb),
+               struct adreno_ringbuffer *rb, unsigned int rptr),
+       TP_ARGS(cmdbatch, inflight, start, retire, rb, rptr),
        TP_STRUCT__entry(
                __field(unsigned int, id)
                __field(unsigned int, timestamp)
@@ -126,7 +126,7 @@ TRACE_EVENT(adreno_cmdbatch_retired,
                __entry->retire = retire;
                __entry->prio = cmdbatch->context->priority;
                __entry->rb_id = rb->id;
-               __entry->rptr = rb->rptr;
+               __entry->rptr = rptr;
                __entry->wptr = rb->wptr;
                __entry->q_inflight = rb->dispatch_q.inflight;
        ),
@@ -427,8 +427,9 @@ TRACE_EVENT(kgsl_a5xx_irq_status,
 
 DECLARE_EVENT_CLASS(adreno_hw_preempt_template,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb),
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
        TP_STRUCT__entry(__field(int, cur_level)
                        __field(int, new_level)
                        __field(unsigned int, cur_rptr)
@@ -440,8 +441,8 @@ DECLARE_EVENT_CLASS(adreno_hw_preempt_template,
        ),
        TP_fast_assign(__entry->cur_level = cur_rb->id;
                        __entry->new_level = new_rb->id;
-                       __entry->cur_rptr = cur_rb->rptr;
-                       __entry->new_rptr = new_rb->rptr;
+                       __entry->cur_rptr = cur_rptr;
+                       __entry->new_rptr = new_rptr;
                        __entry->cur_wptr = cur_rb->wptr;
                        __entry->new_wptr = new_rb->wptr;
                        __entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
@@ -458,26 +459,30 @@ DECLARE_EVENT_CLASS(adreno_hw_preempt_template,
 
 DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb)
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
 );
 
 DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb)
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
 );
 
 DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb)
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
 );
 
 TRACE_EVENT(adreno_hw_preempt_comp_to_clear,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb),
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
        TP_STRUCT__entry(__field(int, cur_level)
                        __field(int, new_level)
                        __field(unsigned int, cur_rptr)
@@ -490,8 +495,8 @@ TRACE_EVENT(adreno_hw_preempt_comp_to_clear,
        ),
        TP_fast_assign(__entry->cur_level = cur_rb->id;
                        __entry->new_level = new_rb->id;
-                       __entry->cur_rptr = cur_rb->rptr;
-                       __entry->new_rptr = new_rb->rptr;
+                       __entry->cur_rptr = cur_rptr;
+                       __entry->new_rptr = new_rptr;
                        __entry->cur_wptr = cur_rb->wptr;
                        __entry->new_wptr_end = new_rb->wptr_preempt_end;
                        __entry->new_wptr = new_rb->wptr;
@@ -509,8 +514,9 @@ TRACE_EVENT(adreno_hw_preempt_comp_to_clear,
 
 TRACE_EVENT(adreno_hw_preempt_token_submit,
        TP_PROTO(struct adreno_ringbuffer *cur_rb,
-               struct adreno_ringbuffer *new_rb),
-       TP_ARGS(cur_rb, new_rb),
+               struct adreno_ringbuffer *new_rb,
+               unsigned int cur_rptr, unsigned int new_rptr),
+       TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
        TP_STRUCT__entry(__field(int, cur_level)
                __field(int, new_level)
                __field(unsigned int, cur_rptr)
@@ -523,8 +529,8 @@ TRACE_EVENT(adreno_hw_preempt_token_submit,
        ),
        TP_fast_assign(__entry->cur_level = cur_rb->id;
                        __entry->new_level = new_rb->id;
-                       __entry->cur_rptr = cur_rb->rptr;
-                       __entry->new_rptr = new_rb->rptr;
+                       __entry->cur_rptr = cur_rptr;
+                       __entry->new_rptr = new_rptr;
                        __entry->cur_wptr = cur_rb->wptr;
                        __entry->cur_wptr_end = cur_rb->wptr_preempt_end;
                        __entry->new_wptr = new_rb->wptr;
@@ -541,23 +547,6 @@ TRACE_EVENT(adreno_hw_preempt_token_submit,
        )
 );
 
-TRACE_EVENT(adreno_rb_starve,
-       TP_PROTO(struct adreno_ringbuffer *rb),
-       TP_ARGS(rb),
-       TP_STRUCT__entry(__field(int, id)
-               __field(unsigned int, rptr)
-               __field(unsigned int, wptr)
-       ),
-       TP_fast_assign(__entry->id = rb->id;
-               __entry->rptr = rb->rptr;
-               __entry->wptr = rb->wptr;
-       ),
-       TP_printk(
-               "rb %d r/w %x/%x starved", __entry->id, __entry->rptr,
-               __entry->wptr
-       )
-);
-
 #endif /* _ADRENO_TRACE_H */
 
 /* This part must be outside protection */
index 2563591..7902480 100644 (file)
@@ -1150,6 +1150,8 @@ static int kgsl_open_device(struct kgsl_device *device)
                atomic_inc(&device->active_cnt);
                kgsl_sharedmem_set(device, &device->memstore, 0, 0,
                                device->memstore.size);
+               kgsl_sharedmem_set(device, &device->scratch, 0, 0,
+                               device->scratch.size);
 
                result = device->ftbl->init(device);
                if (result)
@@ -3910,11 +3912,13 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
        status = kgsl_allocate_global(device, &device->memstore,
                KGSL_MEMSTORE_SIZE, 0, 0);
 
-       if (status != 0) {
-               KGSL_DRV_ERR(device, "kgsl_allocate_global failed %d\n",
-                               status);
+       if (status != 0)
                goto error_close_mmu;
-       }
+
+       status = kgsl_allocate_global(device, &device->scratch,
+               PAGE_SIZE, 0, 0);
+       if (status != 0)
+               goto error_free_memstore;
 
        /*
         * The default request type PM_QOS_REQ_ALL_CORES is
@@ -3964,6 +3968,8 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
 
        return 0;
 
+error_free_memstore:
+       kgsl_free_global(device, &device->memstore);
 error_close_mmu:
        kgsl_mmu_close(device);
 error_pwrctrl_close:
@@ -3990,6 +3996,8 @@ void kgsl_device_platform_remove(struct kgsl_device *device)
 
        idr_destroy(&device->context_idr);
 
+       kgsl_free_global(device, &device->scratch);
+
        kgsl_free_global(device, &device->memstore);
 
        kgsl_mmu_close(device);
index dfe83be..ad1a8f1 100644 (file)
 #define KGSL_MEMSTORE_MAX      (KGSL_MEMSTORE_SIZE / \
        sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS)
 
+#define MEMSTORE_RB_OFFSET(rb, field)  \
+       KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field)
+
+#define MEMSTORE_ID_GPU_ADDR(dev, iter, field) \
+       ((dev)->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(iter, field))
+
+#define MEMSTORE_RB_GPU_ADDR(dev, rb, field)   \
+       ((dev)->memstore.gpuaddr + \
+        KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field))
+
+/*
+ * SCRATCH MEMORY: The scratch memory is one page worth of data that
+ * is mapped into the GPU. This allows for some 'shared' data between
+ * the GPU and CPU. For example, it will be used by the GPU to write
+ * each updated RPTR for each RB.
+ *
+ * Used Data:
+ * Offset: Length(bytes): What
+ * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR
+ */
+
+/* Shadow global helpers */
+#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int))
+#define SCRATCH_RPTR_GPU_ADDR(dev, id) \
+       ((dev)->scratch.gpuaddr + SCRATCH_RPTR_OFFSET(id))
+
 /* Timestamp window used to detect rollovers (half of integer range) */
 #define KGSL_TIMESTAMP_WINDOW 0x80000000
 
index c3fb2b8..4159a5f 100644 (file)
@@ -227,6 +227,7 @@ struct kgsl_device {
        /* GPU shader memory size */
        unsigned int shader_mem_len;
        struct kgsl_memdesc memstore;
+       struct kgsl_memdesc scratch;
        const char *iomemname;
        const char *shadermemname;