OSDN Git Service

sched/fair: streamline find_best_target heuristics
authorPatrick Bellasi <patrick.bellasi@arm.com>
Wed, 29 Mar 2017 08:01:06 +0000 (09:01 +0100)
committerChris Redpath <chris.redpath@arm.com>
Tue, 25 Jul 2017 15:31:00 +0000 (16:31 +0100)
The find_best_target() code has evolved over time to integrate different
micro-optimizations to the point to be quite difficult now to follow
exactly what it's doing.

This patch rafactors the existing code to make it more readable and easy
to maintain. It does that by properly identifying the three main
use-cases and addressing them in priority order:
 A) latency sensitive tasks
 B) non latency sensitive tasks on IDLE CPUs
 C) non latency sensitive tasks on ACTIVE CPUs

The original behaviors are preserved. Some tests to compare
power/performances before and after this patch have been done using
Jankbench and YouTube and we did not noticed sensible differences.

The only difference with respect of the original code is a small update
to favor lower-capacity idle CPUs in case B. The same preference is not
enforce in case A since this can lead to a selection of a non-reserved
CPU for TOP_APP tasks, which ultimately can lead to non desirable
co-scheduling side-effects.

Change-Id: I871e5d95af89176217e4e239b64d44a420baabe8
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
(removed checkpatch whitespace error)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
kernel/sched/fair.c
kernel/sched/walt.h

index 226d199..a61c47a 100644 (file)
@@ -6189,46 +6189,54 @@ static int start_cpu(bool boosted)
 
 static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle)
 {
-       int target_cpu = -1;
-       unsigned long target_util = prefer_idle ? ULONG_MAX : 0;
-       unsigned long backup_capacity = ULONG_MAX;
-       int best_idle_cpu = -1;
-       int best_idle_cstate = INT_MAX;
-       int backup_cpu = -1;
+       unsigned long best_idle_min_cap_orig = ULONG_MAX;
        unsigned long min_util = boosted_task_util(p);
+       unsigned long target_capacity = ULONG_MAX;
+       unsigned long min_wake_util = ULONG_MAX;
+       unsigned long target_max_spare_cap = 0;
+       unsigned long target_util = ULONG_MAX;
+       unsigned long best_active_util = ULONG_MAX;
+       int best_idle_cstate = INT_MAX;
        struct sched_domain *sd;
        struct sched_group *sg;
-       int cpu = start_cpu(boosted);
+       int best_active_cpu = -1;
+       int best_idle_cpu = -1;
+       int target_cpu = -1;
+       int cpu, i;
 
        schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts);
        schedstat_inc(this_rq(), eas_stats.fbt_attempts);
 
+       /* Find start CPU based on boost value */
+       cpu = start_cpu(boosted);
        if (cpu < 0) {
                schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu);
                schedstat_inc(this_rq(), eas_stats.fbt_no_cpu);
-               return target_cpu;
+               return -1;
        }
 
+       /* Find SD for the start CPU */
        sd = rcu_dereference(per_cpu(sd_ea, cpu));
-
        if (!sd) {
                schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd);
                schedstat_inc(this_rq(), eas_stats.fbt_no_sd);
-               return target_cpu;
+               return -1;
        }
 
+       /* Scan CPUs in all SDs */
        sg = sd->groups;
-
        do {
-               int i;
-
                for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
-                       unsigned long cur_capacity, new_util, wake_util;
-                       unsigned long min_wake_util = ULONG_MAX;
+                       unsigned long capacity_curr = capacity_curr_of(i);
+                       unsigned long capacity_orig = capacity_orig_of(i);
+                       unsigned long wake_util, new_util;
 
                        if (!cpu_online(i))
                                continue;
 
+                       if (walt_cpu_high_irqload(i))
+                               continue;
+
                        /*
                         * p's blocked utilization is still accounted for on prev_cpu
                         * so prev_cpu will receive a negative bias due to the double
@@ -6243,70 +6251,190 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre
                         * than the one required to boost the task.
                         */
                        new_util = max(min_util, new_util);
-
-                       if (new_util > capacity_orig_of(i))
-                               continue;
-
-#ifdef CONFIG_SCHED_WALT
-                       if (walt_cpu_high_irqload(i))
+                       if (new_util > capacity_orig)
                                continue;
-#endif
 
                        /*
-                        * Unconditionally favoring tasks that prefer idle cpus to
+                        * Case A) Latency sensitive tasks
+                        *
+                        * Unconditionally favoring tasks that prefer idle CPU to
                         * improve latency.
+                        *
+                        * Looking for:
+                        * - an idle CPU, whatever its idle_state is, since
+                        *   the first CPUs we explore are more likely to be
+                        *   reserved for latency sensitive tasks.
+                        * - a non idle CPU where the task fits in its current
+                        *   capacity and has the maximum spare capacity.
+                        * - a non idle CPU with lower contention from other
+                        *   tasks and running at the lowest possible OPP.
+                        *
+                        * The last two goals tries to favor a non idle CPU
+                        * where the task can run as if it is "almost alone".
+                        * A maximum spare capacity CPU is favoured since
+                        * the task already fits into that CPU's capacity
+                        * without waiting for an OPP chance.
+                        *
+                        * The following code path is the only one in the CPUs
+                        * exploration loop which is always used by
+                        * prefer_idle tasks. It exits the loop with wither a
+                        * best_active_cpu or a target_cpu which should
+                        * represent an optimal choice for latency sensitive
+                        * tasks.
                         */
-                       if (idle_cpu(i) && prefer_idle) {
-                               schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle);
-                               schedstat_inc(this_rq(), eas_stats.fbt_pref_idle);
-                               return i;
-                       }
+                       if (prefer_idle) {
 
-                       cur_capacity = capacity_curr_of(i);
-
-                       if (new_util < cur_capacity) {
-                               if (cpu_rq(i)->nr_running) {
-                                       /*
-                                        * Find a target cpu with the lowest/highest
-                                        * utilization if prefer_idle/!prefer_idle.
-                                        */
-                                       if (prefer_idle) {
-                                               /* Favor the CPU that last ran the task */
-                                               if (new_util > target_util ||
-                                                   wake_util > min_wake_util)
-                                                       continue;
-                                               min_wake_util = wake_util;
-                                               target_util = new_util;
-                                               target_cpu = i;
-                                       } else if (target_util < new_util) {
-                                               target_util = new_util;
-                                               target_cpu = i;
-                                       }
-                               } else if (!prefer_idle) {
-                                       int idle_idx = idle_get_state_idx(cpu_rq(i));
+                               /*
+                                * Case A.1: IDLE CPU
+                                * Return the first IDLE CPU we find.
+                                */
+                               if (idle_cpu(i)) {
+                                       schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle);
+                                       schedstat_inc(this_rq(), eas_stats.fbt_pref_idle);
+                                       return i;
+                               }
 
-                                       if (best_idle_cpu < 0 ||
-                                               (sysctl_sched_cstate_aware &&
-                                                       best_idle_cstate > idle_idx)) {
-                                               best_idle_cstate = idle_idx;
-                                               best_idle_cpu = i;
-                                       }
+                               /*
+                                * Case A.2: Target ACTIVE CPU
+                                * Favor CPUs with max spare capacity.
+                                */
+                               if ((capacity_curr > new_util) &&
+                                       (capacity_orig - new_util > target_max_spare_cap)) {
+                                       target_max_spare_cap = capacity_orig - new_util;
+                                       target_cpu = i;
+                                       continue;
                                }
-                       } else if (backup_capacity > cur_capacity) {
-                               /* Find a backup cpu with least capacity. */
-                               backup_capacity = cur_capacity;
-                               backup_cpu = i;
+                               if (target_cpu != -1)
+                                       continue;
+
+
+                               /*
+                                * Case A.3: Backup ACTIVE CPU
+                                * Favor CPUs with:
+                                * - lower utilization due to other tasks
+                                * - lower utilization with the task in
+                                */
+                               if (wake_util > min_wake_util)
+                                       continue;
+                               if (new_util > best_active_util)
+                                       continue;
+                               min_wake_util = wake_util;
+                               best_active_util = new_util;
+                               best_active_cpu = i;
+                               continue;
                        }
+
+                       /*
+                        * Case B) Non latency sensitive tasks on IDLE CPUs.
+                        *
+                        * Find an optimal backup IDLE CPU for non latency
+                        * sensitive tasks.
+                        *
+                        * Looking for:
+                        * - minimizing the capacity_orig,
+                        *   i.e. preferring LITTLE CPUs
+                        * - favoring shallowest idle states
+                        *   i.e. avoid to wakeup deep-idle CPUs
+                        *
+                        * The following code path is used by non latency
+                        * sensitive tasks if IDLE CPUs are available. If at
+                        * least one of such CPUs are available it sets the
+                        * best_idle_cpu to the most suitable idle CPU to be
+                        * selected.
+                        *
+                        * If idle CPUs are available, favour these CPUs to
+                        * improve performances by spreading tasks.
+                        * Indeed, the energy_diff() computed by the caller
+                        * will take care to ensure the minimization of energy
+                        * consumptions without affecting performance.
+                        */
+                       if (idle_cpu(i)) {
+                               int idle_idx = idle_get_state_idx(cpu_rq(i));
+
+                               /* Select idle CPU with lower cap_orig */
+                               if (capacity_orig > best_idle_min_cap_orig)
+                                       continue;
+
+                               /*
+                                * Skip CPUs in deeper idle state, but only
+                                * if they are also less energy efficient.
+                                * IOW, prefer a deep IDLE LITTLE CPU vs a
+                                * shallow idle big CPU.
+                                */
+                               if (sysctl_sched_cstate_aware &&
+                                   best_idle_cstate <= idle_idx)
+                                       continue;
+
+                               /* Keep track of best idle CPU */
+                               best_idle_min_cap_orig = capacity_orig;
+                               best_idle_cstate = idle_idx;
+                               best_idle_cpu = i;
+                               continue;
+                       }
+
+                       /*
+                        * Case C) Non latency sensitive tasks on ACTIVE CPUs.
+                        *
+                        * Pack tasks in the most energy efficient capacities.
+                        *
+                        * This task packing strategy prefers more energy
+                        * efficient CPUs (i.e. pack on smaller maximum
+                        * capacity CPUs) while also trying to spread tasks to
+                        * run them all at the lower OPP.
+                        *
+                        * This assumes for example that it's more energy
+                        * efficient to run two tasks on two CPUs at a lower
+                        * OPP than packing both on a single CPU but running
+                        * that CPU at an higher OPP.
+                        *
+                        * Thus, this case keep track of the CPU with the
+                        * smallest maximum capacity and highest spare maximum
+                        * capacity.
+                        */
+
+                       /* Favor CPUs with smaller capacity */
+                       if (capacity_orig > target_capacity)
+                               continue;
+
+                       /* Favor CPUs with maximum spare capacity */
+                       if ((capacity_orig - new_util) < target_max_spare_cap)
+                               continue;
+
+                       target_max_spare_cap = capacity_orig - new_util;
+                       target_capacity = capacity_orig;
+                       target_util = new_util;
+                       target_cpu = i;
                }
+
        } while (sg = sg->next, sg != sd->groups);
 
-       if (target_cpu < 0)
-               target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu;
+       /*
+        * For non latency sensitive tasks, cases B and C in the previous loop,
+        * we pick the best IDLE CPU only if we was not able to find a target
+        * ACTIVE CPU.
+        *
+        * Policies priorities:
+        *
+        * - prefer_idle tasks:
+        *
+        *   a) IDLE CPU available, we return immediately
+        *   b) ACTIVE CPU where task fits and has the bigger maximum spare
+        *      capacity (i.e. target_cpu)
+        *   c) ACTIVE CPU with less contention due to other tasks
+        *      (i.e. best_active_cpu)
+        *
+        * - NON prefer_idle tasks:
+        *
+        *   a) ACTIVE CPU: target_cpu
+        *   b) IDLE CPU: best_idle_cpu
+        */
+       if (target_cpu == -1)
+               target_cpu = prefer_idle
+                       ? best_active_cpu
+                       : best_idle_cpu;
 
-       if (target_cpu >= 0) {
-               schedstat_inc(p, se.statistics.nr_wakeups_fbt_count);
-               schedstat_inc(this_rq(), eas_stats.fbt_count);
-       }
+       schedstat_inc(p, se.statistics.nr_wakeups_fbt_count);
+       schedstat_inc(this_rq(), eas_stats.fbt_count);
 
        return target_cpu;
 }
index e181c87..f56c4da 100644 (file)
@@ -55,6 +55,8 @@ static inline void walt_migrate_sync_cpu(int cpu) { }
 static inline void walt_init_cpu_efficiency(void) { }
 static inline u64 walt_ktime_clock(void) { return 0; }
 
+#define walt_cpu_high_irqload(cpu) false
+
 #endif /* CONFIG_SCHED_WALT */
 
 extern unsigned int walt_disabled;