OSDN Git Service

ANDROID: sched: EAS: take cstate into account when selecting idle core
authorSrinath Sridharan <srinathsr@google.com>
Sat, 4 Nov 2017 15:14:15 +0000 (15:14 +0000)
committerChris Redpath <chris.redpath@arm.com>
Tue, 19 Dec 2017 16:51:08 +0000 (16:51 +0000)
Introduce a new sysctl for this option, 'sched_cstate_aware'.
When this is enabled, select_idle_sibling in CFS is modified to
choose the idle CPU in the sibling group which has the lowest
idle state index - idle state indexes are assumed to increase
as sleep depth and hence wakeup latency increase. In this way,
we attempt to minimise wakeup latency when an idle CPU is
required.

Signed-off-by: Srinath Sridharan <srinathsr@google.com>
Includes:
sched: EAS: fix select_idle_sibling
when sysctl_sched_cstate_aware is enabled, best_idle cpu will not be chosen
in the original flow because it will goto done directly

Bug: 30107557
Change-Id: Ie09c2e3960cafbb976f8d472747faefab3b4d6ac
Signed-off-by: martin_liu <martin_liu@htc.com>
Signed-off-by: Andres Oportus <andresoportus@google.com>
[refactored and fixed conflicts]
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
include/linux/sched/sysctl.h
kernel/sched/fair.c
kernel/sysctl.c

index a3a6195..4824a94 100644 (file)
@@ -22,6 +22,7 @@ enum { sysctl_hung_task_timeout_secs = 0 };
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_sync_hint_enable;
+extern unsigned int sysctl_sched_cstate_aware;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 
index 69f5bfd..988e852 100644 (file)
@@ -58,6 +58,10 @@ unsigned int normalized_sysctl_sched_latency         = 6000000ULL;
  * Enable/disable honoring sync flag in energy-aware wakeups.
  */
 unsigned int sysctl_sched_sync_hint_enable = 1;
+/*
+ * Enable/disable using cstate knowledge in idle sibling selection
+ */
+unsigned int sysctl_sched_cstate_aware = 1;
 
 /*
  * The initial- and re-scaling of tunables is configurable
@@ -5450,6 +5454,18 @@ static void record_wakee(struct task_struct *p)
        }
 }
 
+/*
+ * Returns the current capacity of cpu after applying both
+ * cpu and freq scaling.
+ */
+unsigned long capacity_curr_of(int cpu)
+{
+       unsigned long max_cap = cpu_rq(cpu)->cpu_capacity_orig;
+       unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
+
+       return cap_scale(max_cap, scale_freq);
+}
+
 static inline bool energy_aware(void)
 {
        return sched_feat(ENERGY_AWARE);
@@ -6258,7 +6274,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
-static inselect_idle_sibling(struct task_struct *p, int prev, int target)
+static inline int __select_idle_sibling(struct task_struct *p, int prev, int target)
 {
        struct sched_domain *sd;
        int i;
@@ -6291,6 +6307,79 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
        return target;
 }
 
+static inline int select_idle_sibling_cstate_aware(struct task_struct *p, int prev, int target)
+{
+       struct sched_domain *sd;
+       struct sched_group *sg;
+       int best_idle_cpu = -1;
+       int best_idle_cstate = -1;
+       int best_idle_capacity = INT_MAX;
+       int i;
+
+       /*
+        * Iterate the domains and find an elegible idle cpu.
+        */
+       sd = rcu_dereference(per_cpu(sd_llc, target));
+       for_each_lower_domain(sd) {
+               sg = sd->groups;
+               do {
+                       if (!cpumask_intersects(
+                                       sched_group_span(sg), &p->cpus_allowed))
+                               goto next;
+
+                       for_each_cpu_and(i, &p->cpus_allowed, sched_group_span(sg)) {
+                               int idle_idx;
+                               unsigned long new_usage;
+                               unsigned long capacity_orig;
+
+                               if (!idle_cpu(i))
+                                       goto next;
+
+                               /* figure out if the task can fit here at all */
+                               new_usage = boosted_task_util(p);
+                               capacity_orig = capacity_orig_of(i);
+
+                               if (new_usage > capacity_orig)
+                                       goto next;
+
+                               /* if the task fits without changing OPP and we
+                                * intended to use this CPU, just proceed
+                                */
+                               if (i == target && new_usage <= capacity_curr_of(target)) {
+                                       return target;
+                               }
+
+                               /* otherwise select CPU with shallowest idle state
+                                * to reduce wakeup latency.
+                                */
+                               idle_idx = idle_get_state_idx(cpu_rq(i));
+
+                               if (idle_idx < best_idle_cstate &&
+                                       capacity_orig <= best_idle_capacity) {
+                                       best_idle_cpu = i;
+                                       best_idle_cstate = idle_idx;
+                                       best_idle_capacity = capacity_orig;
+                               }
+                       }
+       next:
+                       sg = sg->next;
+               } while (sg != sd->groups);
+       }
+
+       if (best_idle_cpu >= 0)
+               target = best_idle_cpu;
+
+       return target;
+}
+
+static int select_idle_sibling(struct task_struct *p, int prev, int target)
+{
+       if (!sysctl_sched_cstate_aware)
+               return __select_idle_sibling(p, prev, target);
+
+       return select_idle_sibling_cstate_aware(p, prev, target);
+}
+
 static inline int task_util(struct task_struct *p)
 {
        return p->se.avg.util_avg;
index 8100e2c..4d931de 100644 (file)
@@ -337,6 +337,13 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
        {
+               .procname       = "sched_cstate_aware",
+               .data           = &sysctl_sched_cstate_aware,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
                .procname       = "sched_wakeup_granularity_ns",
                .data           = &sysctl_sched_wakeup_granularity,
                .maxlen         = sizeof(unsigned int),