Merge 4.4.166 into android-4.4-p

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 563f316..328e17d 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -32,7 +32,7 @@
  #include <linux/init.h>
  #include <linux/uaccess.h>
  #include <linux/highmem.h>
-#include <asm/mmu_context.h>
+#include <linux/mmu_context.h>
  #include <linux/interrupt.h>
  #include <linux/capability.h>
  #include <linux/completion.h>
@@ -74,6 +74,7 @@
  #include <linux/binfmts.h>
  #include <linux/context_tracking.h>
  #include <linux/compiler.h>
+#include <linux/cpufreq_times.h>
  
  #include <asm/switch_to.h>
  #include <asm/tlb.h>
@@ -546,6 +547,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
         if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
                 return;
  
+       head->count++;
+
         get_task_struct(task);
  
         /*
@@ -555,6 +558,10 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
         head->lastp = &node->next;
  }
  
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+              int sibling_count_hint);
+
  void wake_up_q(struct wake_q_head *head)
  {
         struct wake_q_node *node = head->first;
@@ -569,10 +576,10 @@ void wake_up_q(struct wake_q_head *head)
                 task->wake_q.next = NULL;
  
                 /*
-                * wake_up_process() implies a wmb() to pair with the queueing
+                * try_to_wake_up() implies a wmb() to pair with the queueing
                  * in wake_q_add() so as not to miss wakeups.
                  */
-               wake_up_process(task);
+               try_to_wake_up(task, TASK_NORMAL, 0, head->count);
                 put_task_struct(task);
         }
  }
@@ -613,9 +620,9 @@ void resched_cpu(int cpu)
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
  
-       if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-               return;
-       resched_curr(rq);
+       raw_spin_lock_irqsave(&rq->lock, flags);
+       if (cpu_online(cpu) || cpu == smp_processor_id())
+               resched_curr(rq);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
  
@@ -1642,12 +1649,14 @@ out:
   * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
   */
  static inline
-int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
+                  int sibling_count_hint)
  {
         lockdep_assert_held(&p->pi_lock);
  
         if (p->nr_cpus_allowed > 1)
-               cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+               cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
+                                                    sibling_count_hint);
  
         /*
          * In order not to call set_task_cpu() on a blocking task we need
@@ -1932,6 +1941,8 @@ static void ttwu_queue(struct task_struct *p, int cpu)
   * @p: the thread to be awakened
   * @state: the mask of task states that can be woken
   * @wake_flags: wake modifier flags (WF_*)
+ * @sibling_count_hint: A hint at the number of threads that are being woken up
+ *                      in this event.
   *
   * Put it on the run-queue if it's not already there. The "current"
   * thread is always on the run-queue (except when the actual
@@ -1943,7 +1954,8 @@ static void ttwu_queue(struct task_struct *p, int cpu)
   * or @state didn't match @p's state.
   */
  static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+              int sibling_count_hint)
  {
         unsigned long flags;
         int cpu, success = 0;
@@ -2044,8 +2056,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         if (p->sched_class->task_waking)
                 p->sched_class->task_waking(p);
  
-       cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
-
+       cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
+                            sibling_count_hint);
         if (task_cpu(p) != cpu) {
                 wake_flags |= WF_MIGRATED;
                 set_task_cpu(p, cpu);
@@ -2127,13 +2139,13 @@ out:
   */
  int wake_up_process(struct task_struct *p)
  {
-       return try_to_wake_up(p, TASK_NORMAL, 0);
+       return try_to_wake_up(p, TASK_NORMAL, 0, 1);
  }
  EXPORT_SYMBOL(wake_up_process);
  
  int wake_up_state(struct task_struct *p, unsigned int state)
  {
-       return try_to_wake_up(p, state, 0);
+       return try_to_wake_up(p, state, 0, 1);
  }
  
  /*
@@ -2148,6 +2160,7 @@ void __dl_clear_params(struct task_struct *p)
         dl_se->dl_period = 0;
         dl_se->flags = 0;
         dl_se->dl_bw = 0;
+       dl_se->dl_density = 0;
  
         dl_se->dl_throttled = 0;
         dl_se->dl_new = 1;
@@ -2189,6 +2202,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
         init_dl_task_timer(&p->dl);
         __dl_clear_params(p);
  
+       init_rt_schedtune_timer(&p->rt);
         INIT_LIST_HEAD(&p->rt.run_list);
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2467,7 +2481,7 @@ void wake_up_new_task(struct task_struct *p)
          * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
          * as we're not fully set-up yet.
          */
-       __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+       __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
  #endif
         rq = __task_rq_lock(p);
         update_rq_clock(rq);
@@ -2771,7 +2785,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
                 atomic_inc(&oldmm->mm_count);
                 enter_lazy_tlb(oldmm, next);
         } else
-               switch_mm(oldmm, mm, next);
+               switch_mm_irqs_off(oldmm, mm, next);
  
         if (!prev->mm) {
                 prev->active_mm = NULL;
@@ -2905,7 +2919,7 @@ void sched_exec(void)
         int dest_cpu;
  
         raw_spin_lock_irqsave(&p->pi_lock, flags);
-       dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
+       dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
         if (dest_cpu == smp_processor_id())
                 goto unlock;
  
@@ -2971,91 +2985,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         return ns;
  }
  
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-
-static inline
-unsigned long add_capacity_margin(unsigned long cpu_capacity)
-{
-       cpu_capacity  = cpu_capacity * capacity_margin;
-       cpu_capacity /= SCHED_CAPACITY_SCALE;
-       return cpu_capacity;
-}
-
-static inline
-unsigned long sum_capacity_reqs(unsigned long cfs_cap,
-                               struct sched_capacity_reqs *scr)
-{
-       unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
-       return total += scr->dl;
-}
-
-unsigned long boosted_cpu_util(int cpu);
-static void sched_freq_tick_pelt(int cpu)
-{
-       unsigned long cpu_utilization = boosted_cpu_util(cpu);
-       unsigned long capacity_curr = capacity_curr_of(cpu);
-       struct sched_capacity_reqs *scr;
-
-       scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-       if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
-               return;
-
-       /*
-        * To make free room for a task that is building up its "real"
-        * utilization and to harm its performance the least, request
-        * a jump to a higher OPP as soon as the margin of free capacity
-        * is impacted (specified by capacity_margin).
-        * Remember CPU utilization in sched_capacity_reqs should be normalised.
-        */
-       cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static void sched_freq_tick_walt(int cpu)
-{
-       unsigned long cpu_utilization = cpu_util_freq(cpu);
-       unsigned long capacity_curr = capacity_curr_of(cpu);
-
-       if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
-               return sched_freq_tick_pelt(cpu);
-
-       /*
-        * Add a margin to the WALT utilization to check if we will need to
-        * increase frequency.
-        * NOTE: WALT tracks a single CPU signal for all the scheduling
-        * classes, thus this margin is going to be added to the DL class as
-        * well, which is something we do not do in sched_freq_tick_pelt case.
-        */
-       if (add_capacity_margin(cpu_utilization) <= capacity_curr)
-               return;
-
-       /*
-        * It is likely that the load is growing so we
-        * keep the added margin in our request as an
-        * extra boost.
-        * Remember CPU utilization in sched_capacity_reqs should be normalised.
-        */
-       cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-
-}
-#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
-#else
-#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
-#endif /* CONFIG_SCHED_WALT */
-
-static void sched_freq_tick(int cpu)
-{
-       if (!sched_freq())
-               return;
-
-       _sched_freq_tick(cpu);
-}
-#else
-static inline void sched_freq_tick(int cpu) { }
-#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
-
  /*
   * This function gets called by the timer code, with HZ frequency.
   * We call it with interrupts disabled.
@@ -3076,7 +3005,6 @@ void scheduler_tick(void)
         curr->sched_class->task_tick(rq, curr, 0);
         update_cpu_load_active(rq);
         calc_global_load_tick(rq);
-       sched_freq_tick(cpu);
         raw_spin_unlock(&rq->lock);
  
         perf_event_task_tick();
@@ -3086,6 +3014,9 @@ void scheduler_tick(void)
         trigger_load_balance(rq);
  #endif
         rq_last_tick_reset(rq);
+
+       if (curr->sched_class == &fair_sched_class)
+               check_for_migration(rq, curr);
  }
  
  #ifdef CONFIG_NO_HZ_FULL
@@ -3560,7 +3491,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
  int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
                           void *key)
  {
-       return try_to_wake_up(curr->private, mode, wake_flags);
+       return try_to_wake_up(curr->private, mode, wake_flags, 1);
  }
  EXPORT_SYMBOL(default_wake_function);
  
@@ -3840,6 +3771,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
         dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
         dl_se->flags = attr->sched_flags;
         dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+       dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
  
         /*
          * Changing the parameters of a task is 'tricky' and we're not doing
@@ -4012,8 +3944,8 @@ static int __sched_setscheduler(struct task_struct *p,
         struct rq *rq;
         int reset_on_fork;
  
-       /* may grab non-irq protected spin_locks */
-       BUG_ON(in_interrupt());
+       /* The pi code expects interrupts enabled */
+       BUG_ON(pi && in_interrupt());
  recheck:
         /* double check policy once rq lock held */
         if (policy < 0) {
@@ -6165,6 +6097,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
                 call_rcu_sched(&old_rd->rcu, free_rootdomain);
  }
  
+void sched_get_rd(struct root_domain *rd)
+{
+       atomic_inc(&rd->refcount);
+}
+
+void sched_put_rd(struct root_domain *rd)
+{
+       if (!atomic_dec_and_test(&rd->refcount))
+               return;
+
+       call_rcu_sched(&rd->rcu, free_rootdomain);
+}
+
  static int init_rootdomain(struct root_domain *rd)
  {
         memset(rd, 0, sizeof(*rd));
@@ -6178,6 +6123,12 @@ static int init_rootdomain(struct root_domain *rd)
         if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
                 goto free_dlo_mask;
  
+#ifdef HAVE_RT_PUSH_IPI
+       rd->rto_cpu = -1;
+       raw_spin_lock_init(&rd->rto_lock);
+       init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
+#endif
+
         init_dl_bw(&rd->dl_bw);
         if (cpudl_init(&rd->cpudl) != 0)
                 goto free_dlo_mask;
@@ -7898,6 +7849,7 @@ void __init sched_init(void)
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
                 rq->push_cpu = 0;
+               rq->push_task = NULL;
                 rq->cpu = i;
                 rq->online = 0;
                 rq->idle_stamp = 0;
@@ -8192,11 +8144,9 @@ void sched_destroy_group(struct task_group *tg)
  void sched_offline_group(struct task_group *tg)
  {
         unsigned long flags;
-       int i;
  
         /* end participation in shares distribution */
-       for_each_possible_cpu(i)
-               unregister_fair_sched_group(tg, i);
+       unregister_fair_sched_group(tg);
  
         spin_lock_irqsave(&task_group_lock, flags);
         list_del_rcu(&tg->list);