From 940aece471bd6656b86f5d77132b6670a3b88dc8 Mon Sep 17 00:00:00 2001 From: Deepak S Date: Sat, 23 Nov 2013 14:55:43 +0530 Subject: [PATCH] drm/i915/vlv: Valleyview support for forcewake Individual power wells. Split vlv force wake routines to help individually control Media/Render well based on the register access. We've seen power savings in the lower sub-1W range on workloads that only need on of the power wells, e.g. glbenchmark, media playback Note: The same split isn't there for the forcewake queue, only the forcwake domains are split. Signed-off-by: Deepak S Reviewed-by: Jesse Barnes [danvet: Rebase on top of the removed forcewake hack in the ring irq get/put code and add a note to add Deepak's answer to Chris question.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 17 ++++ drivers/gpu/drm/i915/intel_pm.c | 7 +- drivers/gpu/drm/i915/intel_uncore.c | 183 ++++++++++++++++++++++++++++++------ 3 files changed, 176 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b219d3bf233..6b18b4714d7e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -465,6 +465,9 @@ struct intel_uncore { unsigned fifo_count; unsigned forcewake_count; + unsigned fw_rendercount; + unsigned fw_mediacount; + struct delayed_work force_wake_work; }; @@ -2470,6 +2473,20 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val); int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val); +void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine); +void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine); + +#define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \ + (((reg) >= 0x2000 && (reg) < 0x4000) ||\ + ((reg) >= 0x5000 && (reg) < 0x8000) ||\ + ((reg) >= 0xB000 && (reg) < 0x12000) ||\ + ((reg) >= 0x2E000 && (reg) < 0x30000)) + +#define FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)\ + (((reg) >= 0x12000 && (reg) < 0x14000) ||\ + ((reg) >= 0x22000 && (reg) < 0x24000) ||\ + ((reg) >= 0x30000 && (reg) < 0x40000)) + #define FORCEWAKE_RENDER (1 << 0) #define FORCEWAKE_MEDIA (1 << 1) #define FORCEWAKE_ALL (FORCEWAKE_RENDER | FORCEWAKE_MEDIA) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fd2537d429f2..1659265a7f7a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -191,7 +191,10 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev) u32 blt_ecoskpd; /* Make sure blitter notifies FBC of writes */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* Blitter is part of Media powerwell on VLV. No impact of + * his param in other platforms for now */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA); blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << @@ -204,7 +207,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev) I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); POSTING_READ(GEN6_BLITTER_ECOSKPD); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA); } static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d7c3f5effc79..d002e30aaae0 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -175,38 +175,112 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv) __raw_posting_read(dev_priv, FORCEWAKE_ACK_VLV); } -static void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) -{ - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); - - __raw_i915_write32(dev_priv, FORCEWAKE_VLV, - _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); - __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, - _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); - - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n"); +static void __vlv_force_wake_get(struct drm_i915_private *dev_priv, + int fw_engine) +{ + /* Check for Render Engine */ + if (FORCEWAKE_RENDER & fw_engine) { + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_VLV) & + FORCEWAKE_KERNEL) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: Render forcewake old ack to clear.\n"); + + __raw_i915_write32(dev_priv, FORCEWAKE_VLV, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_VLV) & + FORCEWAKE_KERNEL), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: waiting for Render to ack.\n"); + } - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) & - FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for media to ack forcewake request.\n"); + /* Check for Media Engine */ + if (FORCEWAKE_MEDIA & fw_engine) { + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_MEDIA_VLV) & + FORCEWAKE_KERNEL) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: Media forcewake old ack to clear.\n"); + + __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_MEDIA_VLV) & + FORCEWAKE_KERNEL), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: waiting for media to ack.\n"); + } /* WaRsForcewakeWaitTC0:vlv */ __gen6_gt_wait_for_thread_c0(dev_priv); + } -static void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) +static void __vlv_force_wake_put(struct drm_i915_private *dev_priv, + int fw_engine) { - __raw_i915_write32(dev_priv, FORCEWAKE_VLV, - _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); - __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, - _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + + /* Check for Render Engine */ + if (FORCEWAKE_RENDER & fw_engine) + __raw_i915_write32(dev_priv, FORCEWAKE_VLV, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + + + /* Check for Media Engine */ + if (FORCEWAKE_MEDIA & fw_engine) + __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + /* The below doubles as a POSTING_READ */ gen6_gt_check_fifodbg(dev_priv); + +} + +void vlv_force_wake_get(struct drm_i915_private *dev_priv, + int fw_engine) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (FORCEWAKE_RENDER & fw_engine) { + if (dev_priv->uncore.fw_rendercount++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, + FORCEWAKE_RENDER); + } + if (FORCEWAKE_MEDIA & fw_engine) { + if (dev_priv->uncore.fw_mediacount++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, + FORCEWAKE_MEDIA); + } + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); +} + +void vlv_force_wake_put(struct drm_i915_private *dev_priv, + int fw_engine) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + if (FORCEWAKE_RENDER & fw_engine) { + WARN_ON(dev_priv->uncore.fw_rendercount == 0); + if (--dev_priv->uncore.fw_rendercount == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, + FORCEWAKE_RENDER); + } + + if (FORCEWAKE_MEDIA & fw_engine) { + WARN_ON(dev_priv->uncore.fw_mediacount == 0); + if (--dev_priv->uncore.fw_mediacount == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, + FORCEWAKE_MEDIA); + } + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void gen6_force_wake_work(struct work_struct *work) @@ -290,6 +364,10 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) if (!dev_priv->uncore.funcs.force_wake_get) return; + /* Redirect to VLV specific routine */ + if (IS_VALLEYVIEW(dev_priv->dev)) + return vlv_force_wake_get(dev_priv, fw_engine); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (dev_priv->uncore.forcewake_count++ == 0) dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); @@ -306,6 +384,11 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) if (!dev_priv->uncore.funcs.force_wake_put) return; + /* Redirect to VLV specific routine */ + if (IS_VALLEYVIEW(dev_priv->dev)) + return vlv_force_wake_put(dev_priv, fw_engine); + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) { dev_priv->uncore.forcewake_count++; @@ -393,6 +476,39 @@ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ REG_READ_FOOTER; \ } +#define __vlv_read(x) \ +static u##x \ +vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ + unsigned fwengine = 0; \ + unsigned *fwcount = 0; \ + REG_READ_HEADER(x); \ + if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) { \ + fwengine = FORCEWAKE_RENDER; \ + fwcount = &dev_priv->uncore.fw_rendercount; \ + } \ + else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) { \ + fwengine = FORCEWAKE_MEDIA; \ + fwcount = &dev_priv->uncore.fw_mediacount; \ + } \ + if (fwengine != 0) { \ + if ((*fwcount)++ == 0) \ + (dev_priv)->uncore.funcs.force_wake_get(dev_priv, \ + fwengine); \ + val = __raw_i915_read##x(dev_priv, reg); \ + if (--(*fwcount) == 0) \ + (dev_priv)->uncore.funcs.force_wake_put(dev_priv, \ + FORCEWAKE_ALL); \ + } else { \ + val = __raw_i915_read##x(dev_priv, reg); \ + } \ + REG_READ_FOOTER; \ +} + + +__vlv_read(8) +__vlv_read(16) +__vlv_read(32) +__vlv_read(64) __gen6_read(8) __gen6_read(16) __gen6_read(32) @@ -406,6 +522,7 @@ __gen4_read(16) __gen4_read(32) __gen4_read(64) +#undef __vlv_read #undef __gen6_read #undef __gen5_read #undef __gen4_read @@ -540,8 +657,8 @@ void intel_uncore_init(struct drm_device *dev) gen6_force_wake_work); if (IS_VALLEYVIEW(dev)) { - dev_priv->uncore.funcs.force_wake_get = vlv_force_wake_get; - dev_priv->uncore.funcs.force_wake_put = vlv_force_wake_put; + dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get; + dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put; } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get; dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put; @@ -607,10 +724,18 @@ void intel_uncore_init(struct drm_device *dev) dev_priv->uncore.funcs.mmio_writel = gen6_write32; dev_priv->uncore.funcs.mmio_writeq = gen6_write64; } - dev_priv->uncore.funcs.mmio_readb = gen6_read8; - dev_priv->uncore.funcs.mmio_readw = gen6_read16; - dev_priv->uncore.funcs.mmio_readl = gen6_read32; - dev_priv->uncore.funcs.mmio_readq = gen6_read64; + + if (IS_VALLEYVIEW(dev)) { + dev_priv->uncore.funcs.mmio_readb = vlv_read8; + dev_priv->uncore.funcs.mmio_readw = vlv_read16; + dev_priv->uncore.funcs.mmio_readl = vlv_read32; + dev_priv->uncore.funcs.mmio_readq = vlv_read64; + } else { + dev_priv->uncore.funcs.mmio_readb = gen6_read8; + dev_priv->uncore.funcs.mmio_readw = gen6_read16; + dev_priv->uncore.funcs.mmio_readl = gen6_read32; + dev_priv->uncore.funcs.mmio_readq = gen6_read64; + } break; case 5: dev_priv->uncore.funcs.mmio_writeb = gen5_write8; -- 2.11.0