2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
7 #include <drm/i915_drm.h>
11 #include "intel_gt_irq.h"
12 #include "intel_gt_pm_irq.h"
13 #include "intel_rps.h"
14 #include "intel_sideband.h"
15 #include "../../../platform/x86/intel_ips.h"
18 * Lock protecting IPS related data structures
20 static DEFINE_SPINLOCK(mchdev_lock);
22 static struct intel_gt *rps_to_gt(struct intel_rps *rps)
24 return container_of(rps, struct intel_gt, rps);
27 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
29 return rps_to_gt(rps)->i915;
32 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
34 return rps_to_gt(rps)->uncore;
37 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
39 return mask & ~rps->pm_intrmsk_mbz;
42 static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
44 intel_uncore_write_fw(uncore, reg, val);
47 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
51 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
52 if (val > rps->min_freq_softlimit)
53 mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
54 GEN6_PM_RP_DOWN_THRESHOLD |
55 GEN6_PM_RP_DOWN_TIMEOUT);
57 if (val < rps->max_freq_softlimit)
58 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
60 mask &= READ_ONCE(rps->pm_events);
62 return rps_pm_sanitize_mask(rps, ~mask);
65 static void rps_reset_ei(struct intel_rps *rps)
67 memset(&rps->ei, 0, sizeof(rps->ei));
70 static void rps_enable_interrupts(struct intel_rps *rps)
72 struct intel_gt *gt = rps_to_gt(rps);
77 if (IS_VALLEYVIEW(gt->i915))
78 /* WaGsvRC0ResidencyMethod:vlv */
79 events = GEN6_PM_RP_UP_EI_EXPIRED;
81 events = (GEN6_PM_RP_UP_THRESHOLD |
82 GEN6_PM_RP_DOWN_THRESHOLD |
83 GEN6_PM_RP_DOWN_TIMEOUT);
85 WRITE_ONCE(rps->pm_events, events);
86 spin_lock_irq(>->irq_lock);
87 gen6_gt_pm_enable_irq(gt, rps->pm_events);
88 spin_unlock_irq(>->irq_lock);
90 set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
93 static void gen6_rps_reset_interrupts(struct intel_rps *rps)
95 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
98 static void gen11_rps_reset_interrupts(struct intel_rps *rps)
100 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
104 static void rps_reset_interrupts(struct intel_rps *rps)
106 struct intel_gt *gt = rps_to_gt(rps);
108 spin_lock_irq(>->irq_lock);
109 if (INTEL_GEN(gt->i915) >= 11)
110 gen11_rps_reset_interrupts(rps);
112 gen6_rps_reset_interrupts(rps);
115 spin_unlock_irq(>->irq_lock);
118 static void rps_disable_interrupts(struct intel_rps *rps)
120 struct intel_gt *gt = rps_to_gt(rps);
122 WRITE_ONCE(rps->pm_events, 0);
123 set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
125 spin_lock_irq(>->irq_lock);
126 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
127 spin_unlock_irq(>->irq_lock);
129 intel_synchronize_irq(gt->i915);
132 * Now that we will not be generating any more work, flush any
133 * outstanding tasks. As we are called on the RPS idle path,
134 * we will reset the GPU to minimum frequencies, so the current
135 * state of the worker can be discarded.
137 cancel_work_sync(&rps->work);
139 rps_reset_interrupts(rps);
142 static const struct cparams {
148 { 1, 1333, 301, 28664 },
149 { 1, 1066, 294, 24460 },
150 { 1, 800, 294, 25192 },
151 { 0, 1333, 276, 27605 },
152 { 0, 1066, 276, 27605 },
153 { 0, 800, 231, 23784 },
156 static void gen5_rps_init(struct intel_rps *rps)
158 struct drm_i915_private *i915 = rps_to_i915(rps);
159 struct intel_uncore *uncore = rps_to_uncore(rps);
160 u8 fmax, fmin, fstart;
164 if (i915->fsb_freq <= 3200)
166 else if (i915->fsb_freq <= 4800)
171 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
172 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
173 rps->ips.m = cparams[i].m;
174 rps->ips.c = cparams[i].c;
179 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
181 /* Set up min, max, and cur for interrupt handling */
182 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
183 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
184 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
185 MEMMODE_FSTART_SHIFT;
186 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
189 rps->min_freq = fmax;
190 rps->max_freq = fmin;
192 rps->idle_freq = rps->min_freq;
193 rps->cur_freq = rps->idle_freq;
197 __ips_chipset_val(struct intel_ips *ips)
199 struct intel_uncore *uncore =
200 rps_to_uncore(container_of(ips, struct intel_rps, ips));
201 unsigned long now = jiffies_to_msecs(jiffies), dt;
202 unsigned long result;
205 lockdep_assert_held(&mchdev_lock);
208 * Prevent division-by-zero if we are asking too fast.
209 * Also, we don't get interesting results if we are polling
210 * faster than once in 10ms, so just return the saved value
213 dt = now - ips->last_time1;
215 return ips->chipset_power;
217 /* FIXME: handle per-counter overflow */
218 total = intel_uncore_read(uncore, DMIEC);
219 total += intel_uncore_read(uncore, DDREC);
220 total += intel_uncore_read(uncore, CSIEC);
222 delta = total - ips->last_count1;
224 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
226 ips->last_count1 = total;
227 ips->last_time1 = now;
229 ips->chipset_power = result;
234 static unsigned long ips_mch_val(struct intel_uncore *uncore)
236 unsigned int m, x, b;
239 tsfs = intel_uncore_read(uncore, TSFS);
240 x = intel_uncore_read8(uncore, TR1);
242 b = tsfs & TSFS_INTR_MASK;
243 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
245 return m * x / 127 - b;
248 static int _pxvid_to_vd(u8 pxvid)
253 if (pxvid >= 8 && pxvid < 31)
256 return (pxvid + 2) * 125;
259 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
261 const int vd = _pxvid_to_vd(pxvid);
263 if (INTEL_INFO(i915)->is_mobile)
264 return max(vd - 1125, 0);
269 static void __gen5_ips_update(struct intel_ips *ips)
271 struct intel_uncore *uncore =
272 rps_to_uncore(container_of(ips, struct intel_rps, ips));
276 lockdep_assert_held(&mchdev_lock);
278 now = ktime_get_raw_ns();
279 dt = now - ips->last_time2;
280 do_div(dt, NSEC_PER_MSEC);
282 /* Don't divide by 0 */
286 count = intel_uncore_read(uncore, GFXEC);
287 delta = count - ips->last_count2;
289 ips->last_count2 = count;
290 ips->last_time2 = now;
292 /* More magic constants... */
293 ips->gfx_power = div_u64(delta * 1181, dt * 10);
296 static void gen5_rps_update(struct intel_rps *rps)
298 spin_lock_irq(&mchdev_lock);
299 __gen5_ips_update(&rps->ips);
300 spin_unlock_irq(&mchdev_lock);
303 static bool gen5_rps_set(struct intel_rps *rps, u8 val)
305 struct intel_uncore *uncore = rps_to_uncore(rps);
308 lockdep_assert_held(&mchdev_lock);
310 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
311 if (rgvswctl & MEMCTL_CMD_STS) {
312 DRM_DEBUG("gpu busy, RCS change rejected\n");
313 return false; /* still busy with another command */
316 /* Invert the frequency bin into an ips delay */
317 val = rps->max_freq - val;
318 val = rps->min_freq + val;
321 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
322 (val << MEMCTL_FREQ_SHIFT) |
324 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
325 intel_uncore_posting_read16(uncore, MEMSWCTL);
327 rgvswctl |= MEMCTL_CMD_STS;
328 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
333 static unsigned long intel_pxfreq(u32 vidfreq)
335 int div = (vidfreq & 0x3f0000) >> 16;
336 int post = (vidfreq & 0x3000) >> 12;
337 int pre = (vidfreq & 0x7);
342 return div * 133333 / (pre << post);
345 static unsigned int init_emon(struct intel_uncore *uncore)
350 /* Disable to program */
351 intel_uncore_write(uncore, ECR, 0);
352 intel_uncore_posting_read(uncore, ECR);
354 /* Program energy weights for various events */
355 intel_uncore_write(uncore, SDEW, 0x15040d00);
356 intel_uncore_write(uncore, CSIEW0, 0x007f0000);
357 intel_uncore_write(uncore, CSIEW1, 0x1e220004);
358 intel_uncore_write(uncore, CSIEW2, 0x04000004);
360 for (i = 0; i < 5; i++)
361 intel_uncore_write(uncore, PEW(i), 0);
362 for (i = 0; i < 3; i++)
363 intel_uncore_write(uncore, DEW(i), 0);
365 /* Program P-state weights to account for frequency power adjustment */
366 for (i = 0; i < 16; i++) {
367 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
368 unsigned int freq = intel_pxfreq(pxvidfreq);
370 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
373 val = vid * vid * freq / 1000 * 255;
374 val /= 127 * 127 * 900;
378 /* Render standby states get 0 weight */
382 for (i = 0; i < 4; i++) {
383 intel_uncore_write(uncore, PXW(i),
384 pxw[i * 4 + 0] << 24 |
385 pxw[i * 4 + 1] << 16 |
386 pxw[i * 4 + 2] << 8 |
387 pxw[i * 4 + 3] << 0);
390 /* Adjust magic regs to magic values (more experimental results) */
391 intel_uncore_write(uncore, OGW0, 0);
392 intel_uncore_write(uncore, OGW1, 0);
393 intel_uncore_write(uncore, EG0, 0x00007f00);
394 intel_uncore_write(uncore, EG1, 0x0000000e);
395 intel_uncore_write(uncore, EG2, 0x000e0000);
396 intel_uncore_write(uncore, EG3, 0x68000300);
397 intel_uncore_write(uncore, EG4, 0x42000000);
398 intel_uncore_write(uncore, EG5, 0x00140031);
399 intel_uncore_write(uncore, EG6, 0);
400 intel_uncore_write(uncore, EG7, 0);
402 for (i = 0; i < 8; i++)
403 intel_uncore_write(uncore, PXWL(i), 0);
405 /* Enable PMON + select events */
406 intel_uncore_write(uncore, ECR, 0x80000019);
408 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
411 static bool gen5_rps_enable(struct intel_rps *rps)
413 struct intel_uncore *uncore = rps_to_uncore(rps);
417 spin_lock_irq(&mchdev_lock);
419 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
421 /* Enable temp reporting */
422 intel_uncore_write16(uncore, PMMISC,
423 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
424 intel_uncore_write16(uncore, TSC1,
425 intel_uncore_read16(uncore, TSC1) | TSE);
427 /* 100ms RC evaluation intervals */
428 intel_uncore_write(uncore, RCUPEI, 100000);
429 intel_uncore_write(uncore, RCDNEI, 100000);
431 /* Set max/min thresholds to 90ms and 80ms respectively */
432 intel_uncore_write(uncore, RCBMAXAVG, 90000);
433 intel_uncore_write(uncore, RCBMINAVG, 80000);
435 intel_uncore_write(uncore, MEMIHYST, 1);
437 /* Set up min, max, and cur for interrupt handling */
438 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
439 MEMMODE_FSTART_SHIFT;
441 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
442 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
444 intel_uncore_write(uncore,
446 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
448 intel_uncore_write(uncore, VIDSTART, vstart);
449 intel_uncore_posting_read(uncore, VIDSTART);
451 rgvmodectl |= MEMMODE_SWMODE_EN;
452 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
454 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
455 MEMCTL_CMD_STS) == 0, 10))
456 DRM_ERROR("stuck trying to change perf mode\n");
459 gen5_rps_set(rps, rps->cur_freq);
461 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
462 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
463 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
464 rps->ips.last_time1 = jiffies_to_msecs(jiffies);
466 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
467 rps->ips.last_time2 = ktime_get_raw_ns();
469 spin_unlock_irq(&mchdev_lock);
471 rps->ips.corr = init_emon(uncore);
476 static void gen5_rps_disable(struct intel_rps *rps)
478 struct intel_uncore *uncore = rps_to_uncore(rps);
481 spin_lock_irq(&mchdev_lock);
483 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
485 /* Ack interrupts, disable EFC interrupt */
486 intel_uncore_write(uncore, MEMINTREN,
487 intel_uncore_read(uncore, MEMINTREN) &
488 ~MEMINT_EVAL_CHG_EN);
489 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
490 intel_uncore_write(uncore, DEIER,
491 intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
492 intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
493 intel_uncore_write(uncore, DEIMR,
494 intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
496 /* Go back to the starting frequency */
497 gen5_rps_set(rps, rps->idle_freq);
499 rgvswctl |= MEMCTL_CMD_STS;
500 intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
503 spin_unlock_irq(&mchdev_lock);
506 static u32 rps_limits(struct intel_rps *rps, u8 val)
511 * Only set the down limit when we've reached the lowest level to avoid
512 * getting more interrupts, otherwise leave this clear. This prevents a
513 * race in the hw when coming out of rc6: There's a tiny window where
514 * the hw runs at the minimal clock before selecting the desired
515 * frequency, if the down threshold expires in that window we will not
516 * receive a down interrupt.
518 if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
519 limits = rps->max_freq_softlimit << 23;
520 if (val <= rps->min_freq_softlimit)
521 limits |= rps->min_freq_softlimit << 14;
523 limits = rps->max_freq_softlimit << 24;
524 if (val <= rps->min_freq_softlimit)
525 limits |= rps->min_freq_softlimit << 16;
531 static void rps_set_power(struct intel_rps *rps, int new_power)
533 struct intel_uncore *uncore = rps_to_uncore(rps);
534 struct drm_i915_private *i915 = rps_to_i915(rps);
535 u32 threshold_up = 0, threshold_down = 0; /* in % */
536 u32 ei_up = 0, ei_down = 0;
538 lockdep_assert_held(&rps->power.mutex);
540 if (new_power == rps->power.mode)
543 /* Note the units here are not exactly 1us, but 1280ns. */
546 /* Upclock if more than 95% busy over 16ms */
550 /* Downclock if less than 85% busy over 32ms */
556 /* Upclock if more than 90% busy over 13ms */
560 /* Downclock if less than 75% busy over 32ms */
566 /* Upclock if more than 85% busy over 10ms */
570 /* Downclock if less than 60% busy over 32ms */
576 /* When byt can survive without system hang with dynamic
577 * sw freq adjustments, this restriction can be lifted.
579 if (IS_VALLEYVIEW(i915))
582 set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
583 set(uncore, GEN6_RP_UP_THRESHOLD,
584 GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
586 set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
587 set(uncore, GEN6_RP_DOWN_THRESHOLD,
588 GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
590 set(uncore, GEN6_RP_CONTROL,
591 (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
592 GEN6_RP_MEDIA_HW_NORMAL_MODE |
593 GEN6_RP_MEDIA_IS_GFX |
595 GEN6_RP_UP_BUSY_AVG |
596 GEN6_RP_DOWN_IDLE_AVG);
599 rps->power.mode = new_power;
600 rps->power.up_threshold = threshold_up;
601 rps->power.down_threshold = threshold_down;
604 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
608 new_power = rps->power.mode;
609 switch (rps->power.mode) {
611 if (val > rps->efficient_freq + 1 &&
617 if (val <= rps->efficient_freq &&
619 new_power = LOW_POWER;
620 else if (val >= rps->rp0_freq &&
622 new_power = HIGH_POWER;
626 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
631 /* Max/min bins are special */
632 if (val <= rps->min_freq_softlimit)
633 new_power = LOW_POWER;
634 if (val >= rps->max_freq_softlimit)
635 new_power = HIGH_POWER;
637 mutex_lock(&rps->power.mutex);
638 if (rps->power.interactive)
639 new_power = HIGH_POWER;
640 rps_set_power(rps, new_power);
641 mutex_unlock(&rps->power.mutex);
644 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
646 mutex_lock(&rps->power.mutex);
648 if (!rps->power.interactive++ && READ_ONCE(rps->active))
649 rps_set_power(rps, HIGH_POWER);
651 GEM_BUG_ON(!rps->power.interactive);
652 rps->power.interactive--;
654 mutex_unlock(&rps->power.mutex);
657 static int gen6_rps_set(struct intel_rps *rps, u8 val)
659 struct intel_uncore *uncore = rps_to_uncore(rps);
660 struct drm_i915_private *i915 = rps_to_i915(rps);
663 if (INTEL_GEN(i915) >= 9)
664 swreq = GEN9_FREQUENCY(val);
665 else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
666 swreq = HSW_FREQUENCY(val);
668 swreq = (GEN6_FREQUENCY(val) |
670 GEN6_AGGRESSIVE_TURBO);
671 set(uncore, GEN6_RPNSWREQ, swreq);
676 static int vlv_rps_set(struct intel_rps *rps, u8 val)
678 struct drm_i915_private *i915 = rps_to_i915(rps);
682 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
688 static int rps_set(struct intel_rps *rps, u8 val, bool update)
690 struct drm_i915_private *i915 = rps_to_i915(rps);
693 if (INTEL_GEN(i915) < 6)
696 if (val == rps->last_freq)
699 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
700 err = vlv_rps_set(rps, val);
702 err = gen6_rps_set(rps, val);
707 gen6_rps_set_thresholds(rps, val);
708 rps->last_freq = val;
713 void intel_rps_unpark(struct intel_rps *rps)
721 * Use the user's desired frequency as a guide, but for better
722 * performance, jump directly to RPe as our starting frequency.
724 mutex_lock(&rps->lock);
726 WRITE_ONCE(rps->active, true);
728 freq = max(rps->cur_freq, rps->efficient_freq),
729 freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
730 intel_rps_set(rps, freq);
734 mutex_unlock(&rps->lock);
736 if (INTEL_GEN(rps_to_i915(rps)) >= 6)
737 rps_enable_interrupts(rps);
739 if (IS_GEN(rps_to_i915(rps), 5))
740 gen5_rps_update(rps);
743 void intel_rps_park(struct intel_rps *rps)
745 struct drm_i915_private *i915 = rps_to_i915(rps);
750 if (INTEL_GEN(i915) >= 6)
751 rps_disable_interrupts(rps);
753 WRITE_ONCE(rps->active, false);
754 if (rps->last_freq <= rps->idle_freq)
758 * The punit delays the write of the frequency and voltage until it
759 * determines the GPU is awake. During normal usage we don't want to
760 * waste power changing the frequency if the GPU is sleeping (rc6).
761 * However, the GPU and driver is now idle and we do not want to delay
762 * switching to minimum voltage (reducing power whilst idle) as we do
763 * not expect to be woken in the near future and so must flush the
764 * change by waking the device.
766 * We choose to take the media powerwell (either would do to trick the
767 * punit into committing the voltage change) as that takes a lot less
768 * power than the render powerwell.
770 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
771 rps_set(rps, rps->idle_freq, false);
772 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
775 * Since we will try and restart from the previously requested
776 * frequency on unparking, treat this idle point as a downclock
777 * interrupt and reduce the frequency for resume. If we park/unpark
778 * more frequently than the rps worker can run, we will not respond
779 * to any EI and never see a change in frequency.
781 * (Note we accommodate Cherryview's limitation of only using an
782 * even bin by applying it to all.)
785 max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
788 void intel_rps_boost(struct i915_request *rq)
790 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
793 if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
796 /* Serializes with i915_request_retire() */
797 spin_lock_irqsave(&rq->lock, flags);
798 if (!i915_request_has_waitboost(rq) &&
799 !dma_fence_is_signaled_locked(&rq->fence)) {
800 set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
802 if (!atomic_fetch_inc(&rps->num_waiters) &&
803 READ_ONCE(rps->cur_freq) < rps->boost_freq)
804 schedule_work(&rps->work);
806 atomic_inc(&rps->boosts);
808 spin_unlock_irqrestore(&rq->lock, flags);
811 int intel_rps_set(struct intel_rps *rps, u8 val)
815 lockdep_assert_held(&rps->lock);
816 GEM_BUG_ON(val > rps->max_freq);
817 GEM_BUG_ON(val < rps->min_freq);
820 err = rps_set(rps, val, true);
825 * Make sure we continue to get interrupts
826 * until we hit the minimum or maximum frequencies.
828 if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
829 struct intel_uncore *uncore = rps_to_uncore(rps);
832 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
834 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
842 static void gen6_rps_init(struct intel_rps *rps)
844 struct drm_i915_private *i915 = rps_to_i915(rps);
845 struct intel_uncore *uncore = rps_to_uncore(rps);
847 /* All of these values are in units of 50MHz */
849 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
850 if (IS_GEN9_LP(i915)) {
851 u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
853 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
854 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
855 rps->min_freq = (rp_state_cap >> 0) & 0xff;
857 u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
859 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
860 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
861 rps->min_freq = (rp_state_cap >> 16) & 0xff;
864 /* hw_max = RP0 until we check for overclocking */
865 rps->max_freq = rps->rp0_freq;
867 rps->efficient_freq = rps->rp1_freq;
868 if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
869 IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
872 if (sandybridge_pcode_read(i915,
873 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
874 &ddcc_status, NULL) == 0)
875 rps->efficient_freq =
877 (ddcc_status >> 8) & 0xff,
882 if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
883 /* Store the frequency values in 16.66 MHZ units, which is
884 * the natural hardware unit for SKL
886 rps->rp0_freq *= GEN9_FREQ_SCALER;
887 rps->rp1_freq *= GEN9_FREQ_SCALER;
888 rps->min_freq *= GEN9_FREQ_SCALER;
889 rps->max_freq *= GEN9_FREQ_SCALER;
890 rps->efficient_freq *= GEN9_FREQ_SCALER;
894 static bool rps_reset(struct intel_rps *rps)
897 rps->power.mode = -1;
900 if (rps_set(rps, rps->min_freq, true)) {
901 DRM_ERROR("Failed to reset RPS to initial values\n");
905 rps->cur_freq = rps->min_freq;
909 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
910 static bool gen9_rps_enable(struct intel_rps *rps)
912 struct drm_i915_private *i915 = rps_to_i915(rps);
913 struct intel_uncore *uncore = rps_to_uncore(rps);
915 /* Program defaults and thresholds for RPS */
917 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
918 GEN9_FREQUENCY(rps->rp1_freq));
920 /* 1 second timeout */
921 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
922 GT_INTERVAL_FROM_US(i915, 1000000));
924 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
926 return rps_reset(rps);
929 static bool gen8_rps_enable(struct intel_rps *rps)
931 struct intel_uncore *uncore = rps_to_uncore(rps);
933 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
934 HSW_FREQUENCY(rps->rp1_freq));
936 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
937 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
938 100000000 / 128); /* 1 second timeout */
940 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
942 return rps_reset(rps);
945 static bool gen6_rps_enable(struct intel_rps *rps)
947 struct intel_uncore *uncore = rps_to_uncore(rps);
949 /* Power down if completely idle for over 50ms */
950 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
951 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
953 return rps_reset(rps);
956 static int chv_rps_max_freq(struct intel_rps *rps)
958 struct drm_i915_private *i915 = rps_to_i915(rps);
961 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
963 switch (RUNTIME_INFO(i915)->sseu.eu_total) {
966 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
970 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
975 /* Setting (2 * 8) Min RP0 for any other combination */
976 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
980 return val & FB_GFX_FREQ_FUSE_MASK;
983 static int chv_rps_rpe_freq(struct intel_rps *rps)
985 struct drm_i915_private *i915 = rps_to_i915(rps);
988 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
989 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
991 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
994 static int chv_rps_guar_freq(struct intel_rps *rps)
996 struct drm_i915_private *i915 = rps_to_i915(rps);
999 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1001 return val & FB_GFX_FREQ_FUSE_MASK;
1004 static u32 chv_rps_min_freq(struct intel_rps *rps)
1006 struct drm_i915_private *i915 = rps_to_i915(rps);
1009 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1010 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1012 return val & FB_GFX_FREQ_FUSE_MASK;
1015 static bool chv_rps_enable(struct intel_rps *rps)
1017 struct intel_uncore *uncore = rps_to_uncore(rps);
1018 struct drm_i915_private *i915 = rps_to_i915(rps);
1021 /* 1: Program defaults and thresholds for RPS*/
1022 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1023 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1024 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1025 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1026 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1028 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1031 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1032 GEN6_RP_MEDIA_HW_NORMAL_MODE |
1033 GEN6_RP_MEDIA_IS_GFX |
1035 GEN6_RP_UP_BUSY_AVG |
1036 GEN6_RP_DOWN_IDLE_AVG);
1038 /* Setting Fixed Bias */
1039 vlv_punit_get(i915);
1041 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1042 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1044 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1046 vlv_punit_put(i915);
1048 /* RPS code assumes GPLL is used */
1049 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1050 "GPLL not enabled\n");
1052 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1053 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
1055 return rps_reset(rps);
1058 static int vlv_rps_guar_freq(struct intel_rps *rps)
1060 struct drm_i915_private *i915 = rps_to_i915(rps);
1063 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1065 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1066 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1071 static int vlv_rps_max_freq(struct intel_rps *rps)
1073 struct drm_i915_private *i915 = rps_to_i915(rps);
1076 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1078 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1080 rp0 = min_t(u32, rp0, 0xea);
1085 static int vlv_rps_rpe_freq(struct intel_rps *rps)
1087 struct drm_i915_private *i915 = rps_to_i915(rps);
1090 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1091 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1092 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1093 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1098 static int vlv_rps_min_freq(struct intel_rps *rps)
1100 struct drm_i915_private *i915 = rps_to_i915(rps);
1103 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1105 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1106 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1107 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1108 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1109 * to make sure it matches what Punit accepts.
1111 return max_t(u32, val, 0xc0);
1114 static bool vlv_rps_enable(struct intel_rps *rps)
1116 struct intel_uncore *uncore = rps_to_uncore(rps);
1117 struct drm_i915_private *i915 = rps_to_i915(rps);
1120 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1121 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1122 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1123 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1124 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1126 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1128 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1129 GEN6_RP_MEDIA_TURBO |
1130 GEN6_RP_MEDIA_HW_NORMAL_MODE |
1131 GEN6_RP_MEDIA_IS_GFX |
1133 GEN6_RP_UP_BUSY_AVG |
1134 GEN6_RP_DOWN_IDLE_CONT);
1136 vlv_punit_get(i915);
1138 /* Setting Fixed Bias */
1139 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1140 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1142 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1144 vlv_punit_put(i915);
1146 /* RPS code assumes GPLL is used */
1147 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1148 "GPLL not enabled\n");
1150 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1151 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
1153 return rps_reset(rps);
1156 static unsigned long __ips_gfx_val(struct intel_ips *ips)
1158 struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1159 struct intel_uncore *uncore = rps_to_uncore(rps);
1160 unsigned long t, corr, state1, corr2, state2;
1163 lockdep_assert_held(&mchdev_lock);
1165 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1166 pxvid = (pxvid >> 24) & 0x7f;
1167 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1171 /* Revel in the empirically derived constants */
1173 /* Correction factor in 1/100000 units */
1174 t = ips_mch_val(uncore);
1176 corr = t * 2349 + 135940;
1178 corr = t * 964 + 29317;
1180 corr = t * 301 + 1004;
1182 corr = corr * 150142 * state1 / 10000 - 78642;
1184 corr2 = corr * ips->corr;
1186 state2 = corr2 * state1 / 10000;
1187 state2 /= 100; /* convert to mW */
1189 __gen5_ips_update(ips);
1191 return ips->gfx_power + state2;
1194 void intel_rps_enable(struct intel_rps *rps)
1196 struct drm_i915_private *i915 = rps_to_i915(rps);
1197 struct intel_uncore *uncore = rps_to_uncore(rps);
1199 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1200 if (IS_CHERRYVIEW(i915))
1201 rps->enabled = chv_rps_enable(rps);
1202 else if (IS_VALLEYVIEW(i915))
1203 rps->enabled = vlv_rps_enable(rps);
1204 else if (INTEL_GEN(i915) >= 9)
1205 rps->enabled = gen9_rps_enable(rps);
1206 else if (INTEL_GEN(i915) >= 8)
1207 rps->enabled = gen8_rps_enable(rps);
1208 else if (INTEL_GEN(i915) >= 6)
1209 rps->enabled = gen6_rps_enable(rps);
1210 else if (IS_IRONLAKE_M(i915))
1211 rps->enabled = gen5_rps_enable(rps);
1212 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1216 drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
1217 drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
1219 drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
1220 drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
1223 static void gen6_rps_disable(struct intel_rps *rps)
1225 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1228 void intel_rps_disable(struct intel_rps *rps)
1230 struct drm_i915_private *i915 = rps_to_i915(rps);
1232 rps->enabled = false;
1234 if (INTEL_GEN(i915) >= 6)
1235 gen6_rps_disable(rps);
1236 else if (IS_IRONLAKE_M(i915))
1237 gen5_rps_disable(rps);
1240 static int byt_gpu_freq(struct intel_rps *rps, int val)
1244 * Slow = Fast = GPLL ref * N
1246 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1249 static int byt_freq_opcode(struct intel_rps *rps, int val)
1251 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1254 static int chv_gpu_freq(struct intel_rps *rps, int val)
1258 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1260 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1263 static int chv_freq_opcode(struct intel_rps *rps, int val)
1265 /* CHV needs even values */
1266 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1269 int intel_gpu_freq(struct intel_rps *rps, int val)
1271 struct drm_i915_private *i915 = rps_to_i915(rps);
1273 if (INTEL_GEN(i915) >= 9)
1274 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1276 else if (IS_CHERRYVIEW(i915))
1277 return chv_gpu_freq(rps, val);
1278 else if (IS_VALLEYVIEW(i915))
1279 return byt_gpu_freq(rps, val);
1281 return val * GT_FREQUENCY_MULTIPLIER;
1284 int intel_freq_opcode(struct intel_rps *rps, int val)
1286 struct drm_i915_private *i915 = rps_to_i915(rps);
1288 if (INTEL_GEN(i915) >= 9)
1289 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1290 GT_FREQUENCY_MULTIPLIER);
1291 else if (IS_CHERRYVIEW(i915))
1292 return chv_freq_opcode(rps, val);
1293 else if (IS_VALLEYVIEW(i915))
1294 return byt_freq_opcode(rps, val);
1296 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1299 static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1301 struct drm_i915_private *i915 = rps_to_i915(rps);
1303 rps->gpll_ref_freq =
1304 vlv_get_cck_clock(i915, "GPLL ref",
1305 CCK_GPLL_CLOCK_CONTROL,
1308 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
1311 static void vlv_rps_init(struct intel_rps *rps)
1313 struct drm_i915_private *i915 = rps_to_i915(rps);
1316 vlv_iosf_sb_get(i915,
1317 BIT(VLV_IOSF_SB_PUNIT) |
1318 BIT(VLV_IOSF_SB_NC) |
1319 BIT(VLV_IOSF_SB_CCK));
1321 vlv_init_gpll_ref_freq(rps);
1323 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1324 switch ((val >> 6) & 3) {
1327 i915->mem_freq = 800;
1330 i915->mem_freq = 1066;
1333 i915->mem_freq = 1333;
1336 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
1338 rps->max_freq = vlv_rps_max_freq(rps);
1339 rps->rp0_freq = rps->max_freq;
1340 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
1341 intel_gpu_freq(rps, rps->max_freq),
1344 rps->efficient_freq = vlv_rps_rpe_freq(rps);
1345 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
1346 intel_gpu_freq(rps, rps->efficient_freq),
1347 rps->efficient_freq);
1349 rps->rp1_freq = vlv_rps_guar_freq(rps);
1350 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1351 intel_gpu_freq(rps, rps->rp1_freq),
1354 rps->min_freq = vlv_rps_min_freq(rps);
1355 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
1356 intel_gpu_freq(rps, rps->min_freq),
1359 vlv_iosf_sb_put(i915,
1360 BIT(VLV_IOSF_SB_PUNIT) |
1361 BIT(VLV_IOSF_SB_NC) |
1362 BIT(VLV_IOSF_SB_CCK));
1365 static void chv_rps_init(struct intel_rps *rps)
1367 struct drm_i915_private *i915 = rps_to_i915(rps);
1370 vlv_iosf_sb_get(i915,
1371 BIT(VLV_IOSF_SB_PUNIT) |
1372 BIT(VLV_IOSF_SB_NC) |
1373 BIT(VLV_IOSF_SB_CCK));
1375 vlv_init_gpll_ref_freq(rps);
1377 val = vlv_cck_read(i915, CCK_FUSE_REG);
1379 switch ((val >> 2) & 0x7) {
1381 i915->mem_freq = 2000;
1384 i915->mem_freq = 1600;
1387 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
1389 rps->max_freq = chv_rps_max_freq(rps);
1390 rps->rp0_freq = rps->max_freq;
1391 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
1392 intel_gpu_freq(rps, rps->max_freq),
1395 rps->efficient_freq = chv_rps_rpe_freq(rps);
1396 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
1397 intel_gpu_freq(rps, rps->efficient_freq),
1398 rps->efficient_freq);
1400 rps->rp1_freq = chv_rps_guar_freq(rps);
1401 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
1402 intel_gpu_freq(rps, rps->rp1_freq),
1405 rps->min_freq = chv_rps_min_freq(rps);
1406 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
1407 intel_gpu_freq(rps, rps->min_freq),
1410 vlv_iosf_sb_put(i915,
1411 BIT(VLV_IOSF_SB_PUNIT) |
1412 BIT(VLV_IOSF_SB_NC) |
1413 BIT(VLV_IOSF_SB_CCK));
1415 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1416 rps->rp1_freq | rps->min_freq) & 1,
1417 "Odd GPU freq values\n");
1420 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1422 ei->ktime = ktime_get_raw();
1423 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1424 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1427 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1429 struct intel_uncore *uncore = rps_to_uncore(rps);
1430 const struct intel_rps_ei *prev = &rps->ei;
1431 struct intel_rps_ei now;
1434 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1437 vlv_c0_read(uncore, &now);
1443 time = ktime_us_delta(now.ktime, prev->ktime);
1445 time *= rps_to_i915(rps)->czclk_freq;
1447 /* Workload can be split between render + media,
1448 * e.g. SwapBuffers being blitted in X after being rendered in
1449 * mesa. To account for this we need to combine both engines
1450 * into our activity counter.
1452 render = now.render_c0 - prev->render_c0;
1453 media = now.media_c0 - prev->media_c0;
1454 c0 = max(render, media);
1455 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1457 if (c0 > time * rps->power.up_threshold)
1458 events = GEN6_PM_RP_UP_THRESHOLD;
1459 else if (c0 < time * rps->power.down_threshold)
1460 events = GEN6_PM_RP_DOWN_THRESHOLD;
1467 static void rps_work(struct work_struct *work)
1469 struct intel_rps *rps = container_of(work, typeof(*rps), work);
1470 struct intel_gt *gt = rps_to_gt(rps);
1471 bool client_boost = false;
1472 int new_freq, adj, min, max;
1475 spin_lock_irq(>->irq_lock);
1476 pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
1477 client_boost = atomic_read(&rps->num_waiters);
1478 spin_unlock_irq(>->irq_lock);
1480 /* Make sure we didn't queue anything we're not going to process. */
1481 if (!pm_iir && !client_boost)
1484 mutex_lock(&rps->lock);
1486 pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1488 adj = rps->last_adj;
1489 new_freq = rps->cur_freq;
1490 min = rps->min_freq_softlimit;
1491 max = rps->max_freq_softlimit;
1493 max = rps->max_freq;
1494 if (client_boost && new_freq < rps->boost_freq) {
1495 new_freq = rps->boost_freq;
1497 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1500 else /* CHV needs even encode values */
1501 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1503 if (new_freq >= rps->max_freq_softlimit)
1505 } else if (client_boost) {
1507 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1508 if (rps->cur_freq > rps->efficient_freq)
1509 new_freq = rps->efficient_freq;
1510 else if (rps->cur_freq > rps->min_freq_softlimit)
1511 new_freq = rps->min_freq_softlimit;
1513 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1516 else /* CHV needs even encode values */
1517 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1519 if (new_freq <= rps->min_freq_softlimit)
1521 } else { /* unknown event */
1525 rps->last_adj = adj;
1528 * Limit deboosting and boosting to keep ourselves at the extremes
1529 * when in the respective power modes (i.e. slowly decrease frequencies
1530 * while in the HIGH_POWER zone and slowly increase frequencies while
1531 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
1532 * to the next level quickly, and conversely if busy we expect to
1533 * hit a waitboost and rapidly switch into max power.
1535 if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
1536 (adj > 0 && rps->power.mode == LOW_POWER))
1539 /* sysfs frequency interfaces may have snuck in while servicing the
1543 new_freq = clamp_t(int, new_freq, min, max);
1545 if (intel_rps_set(rps, new_freq)) {
1546 DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
1550 mutex_unlock(&rps->lock);
1553 spin_lock_irq(>->irq_lock);
1554 gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1555 spin_unlock_irq(>->irq_lock);
1558 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1560 struct intel_gt *gt = rps_to_gt(rps);
1561 const u32 events = rps->pm_events & pm_iir;
1563 lockdep_assert_held(>->irq_lock);
1565 if (unlikely(!events))
1568 gen6_gt_pm_mask_irq(gt, events);
1570 rps->pm_iir |= events;
1571 schedule_work(&rps->work);
1574 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1576 struct intel_gt *gt = rps_to_gt(rps);
1579 events = pm_iir & READ_ONCE(rps->pm_events);
1581 spin_lock(>->irq_lock);
1583 gen6_gt_pm_mask_irq(gt, events);
1584 rps->pm_iir |= events;
1586 schedule_work(&rps->work);
1587 spin_unlock(>->irq_lock);
1590 if (INTEL_GEN(gt->i915) >= 8)
1593 if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1594 intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
1596 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1597 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
1600 void gen5_rps_irq_handler(struct intel_rps *rps)
1602 struct intel_uncore *uncore = rps_to_uncore(rps);
1603 u32 busy_up, busy_down, max_avg, min_avg;
1606 spin_lock(&mchdev_lock);
1608 intel_uncore_write16(uncore,
1610 intel_uncore_read(uncore, MEMINTRSTS));
1612 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1613 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1614 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1615 max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1616 min_avg = intel_uncore_read(uncore, RCBMINAVG);
1618 /* Handle RCS change request from hw */
1619 new_freq = rps->cur_freq;
1620 if (busy_up > max_avg)
1622 else if (busy_down < min_avg)
1624 new_freq = clamp(new_freq,
1625 rps->min_freq_softlimit,
1626 rps->max_freq_softlimit);
1628 if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
1629 rps->cur_freq = new_freq;
1631 spin_unlock(&mchdev_lock);
1634 void intel_rps_init_early(struct intel_rps *rps)
1636 mutex_init(&rps->lock);
1637 mutex_init(&rps->power.mutex);
1639 INIT_WORK(&rps->work, rps_work);
1641 atomic_set(&rps->num_waiters, 0);
1644 void intel_rps_init(struct intel_rps *rps)
1646 struct drm_i915_private *i915 = rps_to_i915(rps);
1648 if (IS_CHERRYVIEW(i915))
1650 else if (IS_VALLEYVIEW(i915))
1652 else if (INTEL_GEN(i915) >= 6)
1654 else if (IS_IRONLAKE_M(i915))
1657 /* Derive initial user preferences/limits from the hardware limits */
1658 rps->max_freq_softlimit = rps->max_freq;
1659 rps->min_freq_softlimit = rps->min_freq;
1661 /* After setting max-softlimit, find the overclock max freq */
1662 if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
1665 sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
1667 if (params & BIT(31)) { /* OC supported */
1668 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
1669 (rps->max_freq & 0xff) * 50,
1670 (params & 0xff) * 50);
1671 rps->max_freq = params & 0xff;
1675 /* Finally allow us to boost to max by default */
1676 rps->boost_freq = rps->max_freq;
1677 rps->idle_freq = rps->min_freq;
1678 rps->cur_freq = rps->idle_freq;
1680 rps->pm_intrmsk_mbz = 0;
1683 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
1684 * if GEN6_PM_UP_EI_EXPIRED is masked.
1686 * TODO: verify if this can be reproduced on VLV,CHV.
1688 if (INTEL_GEN(i915) <= 7)
1689 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
1691 if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
1692 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1695 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
1697 struct drm_i915_private *i915 = rps_to_i915(rps);
1700 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
1701 cagf = (rpstat >> 8) & 0xff;
1702 else if (INTEL_GEN(i915) >= 9)
1703 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
1704 else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
1705 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
1707 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
1712 static u32 read_cagf(struct intel_rps *rps)
1714 struct drm_i915_private *i915 = rps_to_i915(rps);
1717 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
1718 vlv_punit_get(i915);
1719 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1720 vlv_punit_put(i915);
1722 freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
1725 return intel_rps_get_cagf(rps, freq);
1728 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
1730 struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
1731 intel_wakeref_t wakeref;
1734 with_intel_runtime_pm_if_in_use(rpm, wakeref)
1735 freq = intel_gpu_freq(rps, read_cagf(rps));
1740 /* External interface for intel_ips.ko */
1742 static struct drm_i915_private __rcu *ips_mchdev;
1745 * Tells the intel_ips driver that the i915 driver is now loaded, if
1746 * IPS got loaded first.
1748 * This awkward dance is so that neither module has to depend on the
1749 * other in order for IPS to do the appropriate communication of
1750 * GPU turbo limits to i915.
1753 ips_ping_for_i915_load(void)
1757 link = symbol_get(ips_link_to_i915_driver);
1760 symbol_put(ips_link_to_i915_driver);
1764 void intel_rps_driver_register(struct intel_rps *rps)
1766 struct intel_gt *gt = rps_to_gt(rps);
1769 * We only register the i915 ips part with intel-ips once everything is
1770 * set up, to avoid intel-ips sneaking in and reading bogus values.
1772 if (IS_GEN(gt->i915, 5)) {
1773 GEM_BUG_ON(ips_mchdev);
1774 rcu_assign_pointer(ips_mchdev, gt->i915);
1775 ips_ping_for_i915_load();
1779 void intel_rps_driver_unregister(struct intel_rps *rps)
1781 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
1782 rcu_assign_pointer(ips_mchdev, NULL);
1785 static struct drm_i915_private *mchdev_get(void)
1787 struct drm_i915_private *i915;
1790 i915 = rcu_dereference(ips_mchdev);
1791 if (!kref_get_unless_zero(&i915->drm.ref))
1799 * i915_read_mch_val - return value for IPS use
1801 * Calculate and return a value for the IPS driver to use when deciding whether
1802 * we have thermal and power headroom to increase CPU or GPU power budget.
1804 unsigned long i915_read_mch_val(void)
1806 struct drm_i915_private *i915;
1807 unsigned long chipset_val = 0;
1808 unsigned long graphics_val = 0;
1809 intel_wakeref_t wakeref;
1811 i915 = mchdev_get();
1815 with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
1816 struct intel_ips *ips = &i915->gt.rps.ips;
1818 spin_lock_irq(&mchdev_lock);
1819 chipset_val = __ips_chipset_val(ips);
1820 graphics_val = __ips_gfx_val(ips);
1821 spin_unlock_irq(&mchdev_lock);
1824 drm_dev_put(&i915->drm);
1825 return chipset_val + graphics_val;
1827 EXPORT_SYMBOL_GPL(i915_read_mch_val);
1830 * i915_gpu_raise - raise GPU frequency limit
1832 * Raise the limit; IPS indicates we have thermal headroom.
1834 bool i915_gpu_raise(void)
1836 struct drm_i915_private *i915;
1837 struct intel_rps *rps;
1839 i915 = mchdev_get();
1843 rps = &i915->gt.rps;
1845 spin_lock_irq(&mchdev_lock);
1846 if (rps->max_freq_softlimit < rps->max_freq)
1847 rps->max_freq_softlimit++;
1848 spin_unlock_irq(&mchdev_lock);
1850 drm_dev_put(&i915->drm);
1853 EXPORT_SYMBOL_GPL(i915_gpu_raise);
1856 * i915_gpu_lower - lower GPU frequency limit
1858 * IPS indicates we're close to a thermal limit, so throttle back the GPU
1859 * frequency maximum.
1861 bool i915_gpu_lower(void)
1863 struct drm_i915_private *i915;
1864 struct intel_rps *rps;
1866 i915 = mchdev_get();
1870 rps = &i915->gt.rps;
1872 spin_lock_irq(&mchdev_lock);
1873 if (rps->max_freq_softlimit > rps->min_freq)
1874 rps->max_freq_softlimit--;
1875 spin_unlock_irq(&mchdev_lock);
1877 drm_dev_put(&i915->drm);
1880 EXPORT_SYMBOL_GPL(i915_gpu_lower);
1883 * i915_gpu_busy - indicate GPU business to IPS
1885 * Tell the IPS driver whether or not the GPU is busy.
1887 bool i915_gpu_busy(void)
1889 struct drm_i915_private *i915;
1892 i915 = mchdev_get();
1896 ret = i915->gt.awake;
1898 drm_dev_put(&i915->drm);
1901 EXPORT_SYMBOL_GPL(i915_gpu_busy);
1904 * i915_gpu_turbo_disable - disable graphics turbo
1906 * Disable graphics turbo by resetting the max frequency and setting the
1907 * current frequency to the default.
1909 bool i915_gpu_turbo_disable(void)
1911 struct drm_i915_private *i915;
1912 struct intel_rps *rps;
1915 i915 = mchdev_get();
1919 rps = &i915->gt.rps;
1921 spin_lock_irq(&mchdev_lock);
1922 rps->max_freq_softlimit = rps->min_freq;
1923 ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
1924 spin_unlock_irq(&mchdev_lock);
1926 drm_dev_put(&i915->drm);
1929 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);