From 771cd8d4c36975cdac6ced2f1270178752a56ae4 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Mon, 7 Feb 2022 18:37:45 +0100 Subject: [PATCH] mt76: mt7915e: Fix degraded performance after temporary overheat mt7915e registers a cooling_device with wrong semantics: 1. cooling_device expect that higher states values should cool more, but mt7915e did the opposite... with the exception of state == 0, which should "disable thermal management", but does not seem to have any effect since the previous state is kept. The result is that when the thermal zone heats up a bit and bumps the cooling_device state from 0 to 1 to cool a bit, the performance is destroyed, and when going back from 1 to 0, the performance stays bad. 2. Reading the cooling_device state does not always return the last written state, but can return the actual hardware throttle state, which is different. This is a problem because the mt7915 firmware actually implement the equivalent of a thermal zone with trip points. Setting the cooling device state actually changes the throttles at each trip point, so the following could occur if the first issue is fixed: - thermal subsystem set state to 100% power (state=0) - mt7915e driver set trip throttles to [100%, 50%, 25%, 12%] - hardware heats up and decides to switch to 50% power - thermal subsystem see that power is 50% (state=50), decide to increase it to 60% (state=40) because the rest of the system is cool. - mt7915e driver set trip throttle to [60%, 30%, 15%, 7%] - hardware thus switches to 30% power [race to the bottom continues...] This patch corrects the semantics of the cooling_device to the one that the thermal subsystem expect it. Signed-off-by: Nicolas Cavallari Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 17 +++++++++++------ drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h | 2 ++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 4bf3fe7bd527..dda2b5e8bbb9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -97,7 +97,7 @@ static int mt7915_thermal_get_max_throttle_state(struct thermal_cooling_device *cdev, unsigned long *state) { - *state = MT7915_THERMAL_THROTTLE_MAX; + *state = MT7915_CDEV_THROTTLE_MAX; return 0; } @@ -108,7 +108,7 @@ mt7915_thermal_get_cur_throttle_state(struct thermal_cooling_device *cdev, { struct mt7915_phy *phy = cdev->devdata; - *state = phy->throttle_state; + *state = phy->cdev_state; return 0; } @@ -118,22 +118,27 @@ mt7915_thermal_set_cur_throttle_state(struct thermal_cooling_device *cdev, unsigned long state) { struct mt7915_phy *phy = cdev->devdata; + u8 throttling = MT7915_THERMAL_THROTTLE_MAX - state; int ret; - if (state > MT7915_THERMAL_THROTTLE_MAX) + if (state > MT7915_CDEV_THROTTLE_MAX) return -EINVAL; if (phy->throttle_temp[0] > phy->throttle_temp[1]) return 0; - if (state == phy->throttle_state) + if (state == phy->cdev_state) return 0; - ret = mt7915_mcu_set_thermal_throttling(phy, state); + /* + * cooling_device convention: 0 = no cooling, more = more cooling + * mcu convention: 1 = max cooling, more = less cooling + */ + ret = mt7915_mcu_set_thermal_throttling(phy, throttling); if (ret) return ret; - phy->throttle_state = state; + phy->cdev_state = state; return 0; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index 6db0db117699..52b848dd4b66 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -60,6 +60,7 @@ #define MT7915_CFEND_RATE_11B 0x03 /* 11B LP, 11M */ #define MT7915_THERMAL_THROTTLE_MAX 100 +#define MT7915_CDEV_THROTTLE_MAX 99 #define MT7915_SKU_RATE_NUM 161 @@ -229,6 +230,7 @@ struct mt7915_phy { struct ieee80211_vif *monitor_vif; struct thermal_cooling_device *cdev; + u8 cdev_state; u8 throttle_state; u32 throttle_temp[2]; /* 0: critical high, 1: maximum */ -- 2.11.0