OSDN Git Service

fork: add clone3
[android-x86/kernel.git] / kernel / fork.c
index b4cba95..08ff131 100644 (file)
@@ -1760,19 +1760,15 @@ static __always_inline void delayed_free_task(struct task_struct *tsk)
  * flags). The actual kick-off is left to the caller.
  */
 static __latent_entropy struct task_struct *copy_process(
-                                       unsigned long clone_flags,
-                                       unsigned long stack_start,
-                                       unsigned long stack_size,
-                                       int __user *parent_tidptr,
-                                       int __user *child_tidptr,
                                        struct pid *pid,
                                        int trace,
-                                       unsigned long tls,
-                                       int node)
+                                       int node,
+                                       struct kernel_clone_args *args)
 {
        int pidfd = -1, retval;
        struct task_struct *p;
        struct multiprocess_signals delayed;
+       u64 clone_flags = args->flags;
 
        /*
         * Don't allow sharing the root directory with processes in a different
@@ -1821,27 +1817,12 @@ static __latent_entropy struct task_struct *copy_process(
        }
 
        if (clone_flags & CLONE_PIDFD) {
-               int reserved;
-
                /*
-                * - CLONE_PARENT_SETTID is useless for pidfds and also
-                *   parent_tidptr is used to return pidfds.
                 * - CLONE_DETACHED is blocked so that we can potentially
                 *   reuse it later for CLONE_PIDFD.
                 * - CLONE_THREAD is blocked until someone really needs it.
                 */
-               if (clone_flags &
-                   (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
-                       return ERR_PTR(-EINVAL);
-
-               /*
-                * Verify that parent_tidptr is sane so we can potentially
-                * reuse it later.
-                */
-               if (get_user(reserved, parent_tidptr))
-                       return ERR_PTR(-EFAULT);
-
-               if (reserved != 0)
+               if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
                        return ERR_PTR(-EINVAL);
        }
 
@@ -1874,11 +1855,11 @@ static __latent_entropy struct task_struct *copy_process(
         * p->set_child_tid which is (ab)used as a kthread's data pointer for
         * kernel threads (PF_KTHREAD).
         */
-       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
        /*
         * Clear TID on mm_release()?
         */
-       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
 
        ftrace_graph_init_task(p);
 
@@ -2037,7 +2018,8 @@ static __latent_entropy struct task_struct *copy_process(
        retval = copy_io(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_namespaces;
-       retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
+       retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
+                                args->tls);
        if (retval)
                goto bad_fork_cleanup_io;
 
@@ -2062,7 +2044,7 @@ static __latent_entropy struct task_struct *copy_process(
                        goto bad_fork_free_pid;
 
                pidfd = retval;
-               retval = put_user(pidfd, parent_tidptr);
+               retval = put_user(pidfd, args->pidfd);
                if (retval)
                        goto bad_fork_put_pidfd;
        }
@@ -2105,7 +2087,7 @@ static __latent_entropy struct task_struct *copy_process(
                if (clone_flags & CLONE_PARENT)
                        p->exit_signal = current->group_leader->exit_signal;
                else
-                       p->exit_signal = (clone_flags & CSIGNAL);
+                       p->exit_signal = args->exit_signal;
                p->group_leader = p;
                p->tgid = p->pid;
        }
@@ -2313,8 +2295,11 @@ static inline void init_idle_pids(struct task_struct *idle)
 struct task_struct *fork_idle(int cpu)
 {
        struct task_struct *task;
-       task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
-                           cpu_to_node(cpu));
+       struct kernel_clone_args args = {
+               .flags = CLONE_VM,
+       };
+
+       task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
        if (!IS_ERR(task)) {
                init_idle_pids(task);
                init_idle(task, cpu);
@@ -2334,13 +2319,9 @@ struct mm_struct *copy_init_mm(void)
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
-long _do_fork(unsigned long clone_flags,
-             unsigned long stack_start,
-             unsigned long stack_size,
-             int __user *parent_tidptr,
-             int __user *child_tidptr,
-             unsigned long tls)
+long _do_fork(struct kernel_clone_args *args)
 {
+       u64 clone_flags = args->flags;
        struct completion vfork;
        struct pid *pid;
        struct task_struct *p;
@@ -2356,7 +2337,7 @@ long _do_fork(unsigned long clone_flags,
        if (!(clone_flags & CLONE_UNTRACED)) {
                if (clone_flags & CLONE_VFORK)
                        trace = PTRACE_EVENT_VFORK;
-               else if ((clone_flags & CSIGNAL) != SIGCHLD)
+               else if (args->exit_signal != SIGCHLD)
                        trace = PTRACE_EVENT_CLONE;
                else
                        trace = PTRACE_EVENT_FORK;
@@ -2365,8 +2346,7 @@ long _do_fork(unsigned long clone_flags,
                        trace = 0;
        }
 
-       p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
-                        child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
+       p = copy_process(NULL, trace, NUMA_NO_NODE, args);
        add_latent_entropy();
 
        if (IS_ERR(p))
@@ -2382,7 +2362,7 @@ long _do_fork(unsigned long clone_flags,
        nr = pid_vnr(pid);
 
        if (clone_flags & CLONE_PARENT_SETTID)
-               put_user(nr, parent_tidptr);
+               put_user(nr, args->parent_tid);
 
        if (clone_flags & CLONE_VFORK) {
                p->vfork_done = &vfork;
@@ -2414,8 +2394,16 @@ long do_fork(unsigned long clone_flags,
              int __user *parent_tidptr,
              int __user *child_tidptr)
 {
-       return _do_fork(clone_flags, stack_start, stack_size,
-                       parent_tidptr, child_tidptr, 0);
+       struct kernel_clone_args args = {
+               .flags          = (clone_flags & ~CSIGNAL),
+               .child_tid      = child_tidptr,
+               .parent_tid     = parent_tidptr,
+               .exit_signal    = (clone_flags & CSIGNAL),
+               .stack          = stack_start,
+               .stack_size     = stack_size,
+       };
+
+       return _do_fork(&args);
 }
 #endif
 
@@ -2424,15 +2412,25 @@ long do_fork(unsigned long clone_flags,
  */
 pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
-       return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
-               (unsigned long)arg, NULL, NULL, 0);
+       struct kernel_clone_args args = {
+               .flags          = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
+               .exit_signal    = (flags & CSIGNAL),
+               .stack          = (unsigned long)fn,
+               .stack_size     = (unsigned long)arg,
+       };
+
+       return _do_fork(&args);
 }
 
 #ifdef __ARCH_WANT_SYS_FORK
 SYSCALL_DEFINE0(fork)
 {
 #ifdef CONFIG_MMU
-       return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
+       struct kernel_clone_args args = {
+               .exit_signal = SIGCHLD,
+       };
+
+       return _do_fork(&args);
 #else
        /* can not support in nommu mode */
        return -EINVAL;
@@ -2443,8 +2441,12 @@ SYSCALL_DEFINE0(fork)
 #ifdef __ARCH_WANT_SYS_VFORK
 SYSCALL_DEFINE0(vfork)
 {
-       return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
-                       0, NULL, NULL, 0);
+       struct kernel_clone_args args = {
+               .flags          = CLONE_VFORK | CLONE_VM,
+               .exit_signal    = SIGCHLD,
+       };
+
+       return _do_fork(&args);
 }
 #endif
 
@@ -2472,7 +2474,110 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 unsigned long, tls)
 #endif
 {
-       return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
+       struct kernel_clone_args args = {
+               .flags          = (clone_flags & ~CSIGNAL),
+               .pidfd          = parent_tidptr,
+               .child_tid      = child_tidptr,
+               .parent_tid     = parent_tidptr,
+               .exit_signal    = (clone_flags & CSIGNAL),
+               .stack          = newsp,
+               .tls            = tls,
+       };
+
+       /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
+       if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID))
+               return -EINVAL;
+
+       return _do_fork(&args);
+}
+
+noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
+                                             struct clone_args __user *uargs,
+                                             size_t size)
+{
+       struct clone_args args;
+
+       if (unlikely(size > PAGE_SIZE))
+               return -E2BIG;
+
+       if (unlikely(size < sizeof(struct clone_args)))
+               return -EINVAL;
+
+       if (unlikely(!access_ok(uargs, size)))
+               return -EFAULT;
+
+       if (size > sizeof(struct clone_args)) {
+               unsigned char __user *addr;
+               unsigned char __user *end;
+               unsigned char val;
+
+               addr = (void __user *)uargs + sizeof(struct clone_args);
+               end = (void __user *)uargs + size;
+
+               for (; addr < end; addr++) {
+                       if (get_user(val, addr))
+                               return -EFAULT;
+                       if (val)
+                               return -E2BIG;
+               }
+
+               size = sizeof(struct clone_args);
+       }
+
+       if (copy_from_user(&args, uargs, size))
+               return -EFAULT;
+
+       *kargs = (struct kernel_clone_args){
+               .flags          = args.flags,
+               .pidfd          = u64_to_user_ptr(args.pidfd),
+               .child_tid      = u64_to_user_ptr(args.child_tid),
+               .parent_tid     = u64_to_user_ptr(args.parent_tid),
+               .exit_signal    = args.exit_signal,
+               .stack          = args.stack,
+               .stack_size     = args.stack_size,
+               .tls            = args.tls,
+       };
+
+       return 0;
+}
+
+static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+{
+       /*
+        * All lower bits of the flag word are taken.
+        * Verify that no other unknown flags are passed along.
+        */
+       if (kargs->flags & ~CLONE_LEGACY_FLAGS)
+               return false;
+
+       /*
+        * - make the CLONE_DETACHED bit reuseable for clone3
+        * - make the CSIGNAL bits reuseable for clone3
+        */
+       if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
+               return false;
+
+       if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
+           kargs->exit_signal)
+               return false;
+
+       return true;
+}
+
+SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
+{
+       int err;
+
+       struct kernel_clone_args kargs;
+
+       err = copy_clone_args_from_user(&kargs, uargs, size);
+       if (err)
+               return err;
+
+       if (!clone3_args_valid(&kargs))
+               return -EINVAL;
+
+       return _do_fork(&kargs);
 }
 #endif