sched: Accumulate per-cfs_rq cpu usage and charge against bandwidth

author Paul Turner <pjt@google.com>

Thu, 21 Jul 2011 16:43:30 +0000 (09:43 -0700)

committer Ingo Molnar <mingo@elte.hu>

Sun, 14 Aug 2011 10:03:26 +0000 (12:03 +0200)
author Paul Turner <pjt@google.com>
Thu, 21 Jul 2011 16:43:30 +0000 (09:43 -0700)
committer Ingo Molnar <mingo@elte.hu>
Sun, 14 Aug 2011 10:03:26 +0000 (12:03 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 4ac2c05..bc6f5f2 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2040,6 +2040,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
  static inline void sched_autogroup_exit(struct signal_struct *sig) { }
  #endif
  
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
  #ifdef CONFIG_RT_MUTEXES
  extern int rt_mutex_getprio(struct task_struct *p);
  extern void rt_mutex_setprio(struct task_struct *p, int prio);
diff --git a/kernel/sched.c b/kernel/sched.c

index ea6850d..35561c6 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -251,7 +251,7 @@ struct cfs_bandwidth {
  #ifdef CONFIG_CFS_BANDWIDTH
         raw_spinlock_t lock;
         ktime_t period;
-       u64 quota;
+       u64 quota, runtime;
         s64 hierarchal_quota;
  #endif
  };
@@ -407,6 +407,7 @@ static inline u64 default_cfs_period(void);
  static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
  {
         raw_spin_lock_init(&cfs_b->lock);
+       cfs_b->runtime = 0;
         cfs_b->quota = RUNTIME_INF;
         cfs_b->period = ns_to_ktime(default_cfs_period());
  }
@@ -9107,6 +9108,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
         raw_spin_lock_irq(&cfs_b->lock);
         cfs_b->period = ns_to_ktime(period);
         cfs_b->quota = quota;
+       cfs_b->runtime = quota;
         raw_spin_unlock_irq(&cfs_b->lock);
  
         for_each_possible_cpu(i) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index f24f417..9502aa8 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -89,6 +89,20 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
   */
  unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
  
+#ifdef CONFIG_CFS_BANDWIDTH
+/*
+ * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
+ * each time a cfs_rq requests quota.
+ *
+ * Note: in the case that the slice exceeds the runtime remaining (either due
+ * to consumption or the quota being specified to be smaller than the slice)
+ * we will always only issue the remaining available time.
+ *
+ * default: 5 msec, units: microseconds
+  */
+unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
+#endif
+
  static const struct sched_class fair_sched_class;
  
  /**************************************************************
@@ -292,6 +306,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
+static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+                                  unsigned long delta_exec);
  
  /**************************************************************
   * Scheduling class tree data structure manipulation methods:
@@ -583,6 +599,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
                 cpuacct_charge(curtask, delta_exec);
                 account_group_exec_runtime(curtask, delta_exec);
         }
+
+       account_cfs_rq_runtime(cfs_rq, delta_exec);
  }
  
  static inline void
@@ -1248,6 +1266,58 @@ static inline u64 default_cfs_period(void)
  {
         return 100000000ULL;
  }
+
+static inline u64 sched_cfs_bandwidth_slice(void)
+{
+       return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
+}
+
+static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
+{
+       struct task_group *tg = cfs_rq->tg;
+       struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
+       u64 amount = 0, min_amount;
+
+       /* note: this is a positive sum as runtime_remaining <= 0 */
+       min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
+
+       raw_spin_lock(&cfs_b->lock);
+       if (cfs_b->quota == RUNTIME_INF)
+               amount = min_amount;
+       else if (cfs_b->runtime > 0) {
+               amount = min(cfs_b->runtime, min_amount);
+               cfs_b->runtime -= amount;
+       }
+       raw_spin_unlock(&cfs_b->lock);
+
+       cfs_rq->runtime_remaining += amount;
+}
+
+static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+                                    unsigned long delta_exec)
+{
+       if (!cfs_rq->runtime_enabled)
+               return;
+
+       cfs_rq->runtime_remaining -= delta_exec;
+       if (cfs_rq->runtime_remaining > 0)
+               return;
+
+       assign_cfs_rq_runtime(cfs_rq);
+}
+
+static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+                                                  unsigned long delta_exec)
+{
+       if (!cfs_rq->runtime_enabled)
+               return;
+
+       __account_cfs_rq_runtime(cfs_rq, delta_exec);
+}
+
+#else
+static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+                                    unsigned long delta_exec) {}
  #endif
  
  /**************************************************
@@ -4266,8 +4336,13 @@ static void set_curr_task_fair(struct rq *rq)
  {
         struct sched_entity *se = &rq->curr->se;
  
-       for_each_sched_entity(se)
-               set_next_entity(cfs_rq_of(se), se);
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               set_next_entity(cfs_rq, se);
+               /* ensure bandwidth has been allocated on our new cfs_rq */
+               account_cfs_rq_runtime(cfs_rq, 0);
+       }
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 11d65b5..2d2ecdc 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -379,6 +379,16 @@ static struct ctl_table kern_table[] = {
                 .extra2         = &one,
         },
  #endif
+#ifdef CONFIG_CFS_BANDWIDTH
+       {
+               .procname       = "sched_cfs_bandwidth_slice_us",
+               .data           = &sysctl_sched_cfs_bandwidth_slice,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+       },
+#endif
  #ifdef CONFIG_PROVE_LOCKING
         {
                 .procname       = "prove_locking",
author	Paul Turner <pjt@google.com>
	Thu, 21 Jul 2011 16:43:30 +0000 (09:43 -0700)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 14 Aug 2011 10:03:26 +0000 (12:03 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history