rcu: Do RCU GP kthread self-wakeup from softirq and interrupt

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / kernel / fork.c
diff --git a/kernel/fork.c b/kernel/fork.c

index fce002e..e4b8191 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
  #include <linux/tsacct_kern.h>
  #include <linux/cn_proc.h>
  #include <linux/freezer.h>
+#include <linux/kaiser.h>
  #include <linux/delayacct.h>
  #include <linux/taskstats_kern.h>
  #include <linux/random.h>
@@ -169,6 +170,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  
  static inline void free_thread_info(struct thread_info *ti)
  {
+       kaiser_unmap_thread_stack(ti);
         free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
  }
  # else
@@ -331,13 +333,14 @@ void set_task_stack_end_magic(struct task_struct *tsk)
         *stackend = STACK_END_MAGIC;    /* for overflow detection */
  }
  
-static struct task_struct *dup_task_struct(struct task_struct *orig)
+static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  {
         struct task_struct *tsk;
         struct thread_info *ti;
-       int node = tsk_fork_get_node(orig);
         int err;
  
+       if (node == NUMA_NO_NODE)
+               node = tsk_fork_get_node(orig);
         tsk = alloc_task_struct_node(node);
         if (!tsk)
                 return NULL;
@@ -351,6 +354,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                 goto free_ti;
  
         tsk->stack = ti;
+
+       err = kaiser_map_thread_stack(tsk->stack);
+       if (err)
+               goto free_ti;
  #ifdef CONFIG_SECCOMP
         /*
          * We must handle setting up seccomp filters once we're under
@@ -367,7 +374,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
         set_task_stack_end_magic(tsk);
  
  #ifdef CONFIG_CC_STACKPROTECTOR
-       tsk->stack_canary = get_random_int();
+       tsk->stack_canary = get_random_long();
  #endif
  
         /*
@@ -380,6 +387,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
  #endif
         tsk->splice_pipe = NULL;
         tsk->task_frag.page = NULL;
+       tsk->wake_q.next = NULL;
  
         account_kernel_stack(ti, 1);
  
@@ -584,7 +592,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  #endif
  }
  
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+       struct user_namespace *user_ns)
  {
         mm->mmap = NULL;
         mm->mm_rb = RB_ROOT;
@@ -624,6 +633,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
         if (init_new_context(p, mm))
                 goto fail_nocontext;
  
+       mm->user_ns = get_user_ns(user_ns);
         return mm;
  
  fail_nocontext:
@@ -669,7 +679,7 @@ struct mm_struct *mm_alloc(void)
                 return NULL;
  
         memset(mm, 0, sizeof(*mm));
-       return mm_init(mm, current);
+       return mm_init(mm, current, current_user_ns());
  }
  
  /*
@@ -684,6 +694,7 @@ void __mmdrop(struct mm_struct *mm)
         destroy_context(mm);
         mmu_notifier_mm_destroy(mm);
         check_mm(mm);
+       put_user_ns(mm->user_ns);
         free_mm(mm);
  }
  EXPORT_SYMBOL_GPL(__mmdrop);
@@ -763,6 +774,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
  EXPORT_SYMBOL(get_mm_exe_file);
  
  /**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
+/**
   * get_task_mm - acquire a reference to the task's mm
   *
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
@@ -877,14 +911,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
         deactivate_mm(tsk, mm);
  
         /*
-        * If we're exiting normally, clear a user-space tid field if
-        * requested.  We leave this alone when dying by signal, to leave
-        * the value intact in a core dump, and to save the unnecessary
-        * trouble, say, a killed vfork parent shouldn't touch this mm.
-        * Userland only wants this done for a sys_exit.
+        * Signal userspace if we're not exiting with a core dump
+        * because we want to leave the value intact for debugging
+        * purposes.
          */
         if (tsk->clear_child_tid) {
-               if (!(tsk->flags & PF_SIGNALED) &&
+               if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
                     atomic_read(&mm->mm_users) > 1) {
                         /*
                          * We don't check the error code - if userspace has
@@ -920,7 +952,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
  
         memcpy(mm, oldmm, sizeof(*mm));
  
-       if (!mm_init(mm, tsk))
+       if (!mm_init(mm, tsk, mm->user_ns))
                 goto fail_nomem;
  
         err = dup_mmap(mm, oldmm);
@@ -1077,7 +1109,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
                 return -ENOMEM;
  
         atomic_set(&sig->count, 1);
+       spin_lock_irq(&current->sighand->siglock);
         memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       spin_unlock_irq(&current->sighand->siglock);
         return 0;
  }
  
@@ -1245,7 +1279,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                                         int __user *child_tidptr,
                                         struct pid *pid,
                                         int trace,
-                                       unsigned long tls)
+                                       unsigned long tls,
+                                       int node)
  {
         int retval;
         struct task_struct *p;
@@ -1298,10 +1333,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                 goto fork_out;
  
         retval = -ENOMEM;
-       p = dup_task_struct(current);
+       p = dup_task_struct(current, node);
         if (!p)
                 goto fork_out;
  
+       /*
+        * This _must_ happen before we call free_task(), i.e. before we jump
+        * to any of the bad_fork_* labels. This is to avoid freeing
+        * p->set_child_tid which is (ab)used as a kthread's data pointer for
+        * kernel threads (PF_KTHREAD).
+        */
+       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+       /*
+        * Clear TID on mm_release()?
+        */
+       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
         ftrace_graph_init_task(p);
  
         rt_mutex_init_task(p);
@@ -1364,11 +1411,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  
         posix_cpu_timers_init(p);
  
-       p->start_time = ktime_get_ns();
-       p->real_start_time = ktime_get_boot_ns();
         p->io_context = NULL;
         p->audit_context = NULL;
-       threadgroup_change_begin(current);
         cgroup_fork(p);
  #ifdef CONFIG_NUMA
         p->mempolicy = mpol_dup(p->mempolicy);
@@ -1464,11 +1508,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                 }
         }
  
-       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
-       /*
-        * Clear TID on mm_release()?
-        */
-       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
  #ifdef CONFIG_BLOCK
         p->plug = NULL;
  #endif
@@ -1520,6 +1559,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         INIT_LIST_HEAD(&p->thread_group);
         p->task_works = NULL;
  
+       threadgroup_change_begin(current);
         /*
          * Ensure that the cgroup subsystem policies allow the new process to be
          * forked. It should be noted the the new process's css_set can be changed
@@ -1531,6 +1571,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                 goto bad_fork_free_pid;
  
         /*
+        * From this point on we must avoid any synchronous user-space
+        * communication until we take the tasklist-lock. In particular, we do
+        * not want user-space to be able to predict the process start-time by
+        * stalling fork(2) after we recorded the start_time but before it is
+        * visible to the system.
+        */
+
+       p->start_time = ktime_get_ns();
+       p->real_start_time = ktime_get_boot_ns();
+
+       /*
          * Make it visible to the rest of the system, but dont wake it up yet.
          * Need tasklist lock for parent etc handling!
          */
@@ -1563,11 +1614,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
         recalc_sigpending();
         if (signal_pending(current)) {
-               spin_unlock(&current->sighand->siglock);
-               write_unlock_irq(&tasklist_lock);
                 retval = -ERESTARTNOINTR;
                 goto bad_fork_cancel_cgroup;
         }
+       if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+               retval = -ENOMEM;
+               goto bad_fork_cancel_cgroup;
+       }
  
         if (likely(p->pid)) {
                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1618,8 +1671,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         return p;
  
  bad_fork_cancel_cgroup:
+       spin_unlock(&current->sighand->siglock);
+       write_unlock_irq(&tasklist_lock);
         cgroup_cancel_fork(p, cgrp_ss_priv);
  bad_fork_free_pid:
+       threadgroup_change_end(current);
         if (pid != &init_struct_pid)
                 free_pid(pid);
  bad_fork_cleanup_io:
@@ -1650,7 +1706,6 @@ bad_fork_cleanup_policy:
         mpol_put(p->mempolicy);
  bad_fork_cleanup_threadgroup_lock:
  #endif
-       threadgroup_change_end(current);
         delayacct_tsk_free(p);
  bad_fork_cleanup_count:
         atomic_dec(&p->cred->user->processes);
@@ -1674,7 +1729,8 @@ static inline void init_idle_pids(struct pid_link *links)
  struct task_struct *fork_idle(int cpu)
  {
         struct task_struct *task;
-       task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
+       task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+                           cpu_to_node(cpu));
         if (!IS_ERR(task)) {
                 init_idle_pids(task->pids);
                 init_idle(task, cpu);
@@ -1719,7 +1775,7 @@ long _do_fork(unsigned long clone_flags,
         }
  
         p = copy_process(clone_flags, stack_start, stack_size,
-                        child_tidptr, NULL, trace, tls);
+                        child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
         /*
          * Do this prior waking up the new thread - the thread pointer
          * might get invalid after that point, if the thread exits quickly.