Merge tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)
diff --git a/fs/namespace.c b/fs/namespace.c

index a6baee3..6d499ab 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1786,6 +1786,11 @@ static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
         return container_of(ns, struct mnt_namespace, ns);
  }
  
+struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
+{
+       return &mnt->ns;
+}
+
  static bool mnt_ns_loop(struct dentry *dentry)
  {
         /* Could bind mounting the mount namespace inode cause a
@@ -4013,16 +4018,18 @@ static void mntns_put(struct ns_common *ns)
         put_mnt_ns(to_mnt_ns(ns));
  }
  
-static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int mntns_install(struct nsset *nsset, struct ns_common *ns)
  {
-       struct fs_struct *fs = current->fs;
+       struct nsproxy *nsproxy = nsset->nsproxy;
+       struct fs_struct *fs = nsset->fs;
         struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
+       struct user_namespace *user_ns = nsset->cred->user_ns;
         struct path root;
         int err;
  
         if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(user_ns, CAP_SYS_CHROOT) ||
+           !ns_capable(user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         if (is_anon_ns(mnt_ns))
diff --git a/fs/nsfs.c b/fs/nsfs.c

index 4f12057..800c1d0 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -229,6 +229,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
         return res;
  }
  
+bool proc_ns_file(const struct file *file)
+{
+       return file->f_op == &ns_file_operations;
+}
+
  struct file *proc_ns_fget(int fd)
  {
         struct file *file;
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h

index 3594208..8f882f5 100644 (file)
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -6,10 +6,12 @@
  struct mnt_namespace;
  struct fs_struct;
  struct user_namespace;
+struct ns_common;
  
  extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
                 struct user_namespace *, struct fs_struct *);
  extern void put_mnt_ns(struct mnt_namespace *ns);
+extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
  
  extern const struct file_operations proc_mounts_operations;
  extern const struct file_operations proc_mountinfo_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h

index 074f395..cdb171e 100644 (file)
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,6 +42,30 @@ struct nsproxy {
  extern struct nsproxy init_nsproxy;
  
  /*
+ * A structure to encompass all bits needed to install
+ * a partial or complete new set of namespaces.
+ *
+ * If a new user namespace is requested cred will
+ * point to a modifiable set of credentials. If a pointer
+ * to a modifiable set is needed nsset_cred() must be
+ * used and tested.
+ */
+struct nsset {
+       unsigned flags;
+       struct nsproxy *nsproxy;
+       struct fs_struct *fs;
+       const struct cred *cred;
+};
+
+static inline struct cred *nsset_cred(struct nsset *set)
+{
+       if (set->flags & CLONE_NEWUSER)
+               return (struct cred *)set->cred;
+
+       return NULL;
+}
+
+/*
   * the namespaces access rules are:
   *
   *  1. only current task is allowed to change tsk->nsproxy pointer or
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index 45c05fd..0cfc44d 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,4 +179,6 @@ static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
         return inode->i_sb->s_fs_info;
  }
  
+bool proc_ns_file(const struct file *file);
+
  #endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h

index 6abe85c..75807ec 100644 (file)
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -8,7 +8,7 @@
  #include <linux/ns_common.h>
  
  struct pid_namespace;
-struct nsproxy;
+struct nsset;
  struct path;
  struct task_struct;
  struct inode;
@@ -19,7 +19,7 @@ struct proc_ns_operations {
         int type;
         struct ns_common *(*get)(struct task_struct *task);
         void (*put)(struct ns_common *ns);
-       int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+       int (*install)(struct nsset *nsset, struct ns_common *ns);
         struct user_namespace *(*owner)(struct ns_common *ns);
         struct ns_common *(*get_parent)(struct ns_common *ns);
  } __randomize_layout;
diff --git a/ipc/namespace.c b/ipc/namespace.c

index b3ca147..fdc3b5f 100644 (file)
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
         return put_ipc_ns(to_ipc_ns(ns));
  }
  
-static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int ipcns_install(struct nsset *nsset, struct ns_common *new)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct ipc_namespace *ns = to_ipc_ns(new);
         if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
-       /* Ditch state from the old ipc namespace */
-       exit_sem(current);
         put_ipc_ns(nsproxy->ipc_ns);
         nsproxy->ipc_ns = get_ipc_ns(ns);
         return 0;
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c

index b05f1dd..812a61a 100644 (file)
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -95,11 +95,12 @@ static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
         return container_of(ns, struct cgroup_namespace, ns);
  }
  
-static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
  
-       if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
+       if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
             !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c

index ed98821..b03df67 100644 (file)
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -19,6 +19,8 @@
  #include <net/net_namespace.h>
  #include <linux/ipc_namespace.h>
  #include <linux/time_namespace.h>
+#include <linux/fs_struct.h>
+#include <linux/proc_fs.h>
  #include <linux/proc_ns.h>
  #include <linux/file.h>
  #include <linux/syscalls.h>
@@ -257,37 +259,296 @@ void exit_task_namespaces(struct task_struct *p)
         switch_task_namespaces(p, NULL);
  }
  
-SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+static int check_setns_flags(unsigned long flags)
  {
-       struct task_struct *tsk = current;
-       struct nsproxy *new_nsproxy;
-       struct file *file;
-       struct ns_common *ns;
-       int err;
+       if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+                                CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID |
+                                CLONE_NEWCGROUP)))
+               return -EINVAL;
  
-       file = proc_ns_fget(fd);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
+#ifndef CONFIG_USER_NS
+       if (flags & CLONE_NEWUSER)
+               return -EINVAL;
+#endif
+#ifndef CONFIG_PID_NS
+       if (flags & CLONE_NEWPID)
+               return -EINVAL;
+#endif
+#ifndef CONFIG_UTS_NS
+       if (flags & CLONE_NEWUTS)
+               return -EINVAL;
+#endif
+#ifndef CONFIG_IPC_NS
+       if (flags & CLONE_NEWIPC)
+               return -EINVAL;
+#endif
+#ifndef CONFIG_CGROUPS
+       if (flags & CLONE_NEWCGROUP)
+               return -EINVAL;
+#endif
+#ifndef CONFIG_NET_NS
+       if (flags & CLONE_NEWNET)
+               return -EINVAL;
+#endif
  
-       err = -EINVAL;
-       ns = get_proc_ns(file_inode(file));
-       if (nstype && (ns->ops->type != nstype))
-               goto out;
+       return 0;
+}
+
+static void put_nsset(struct nsset *nsset)
+{
+       unsigned flags = nsset->flags;
+
+       if (flags & CLONE_NEWUSER)
+               put_cred(nsset_cred(nsset));
+       /*
+        * We only created a temporary copy if we attached to more than just
+        * the mount namespace.
+        */
+       if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
+               free_fs_struct(nsset->fs);
+       if (nsset->nsproxy)
+               free_nsproxy(nsset->nsproxy);
+}
+
+static int prepare_nsset(unsigned flags, struct nsset *nsset)
+{
+       struct task_struct *me = current;
+
+       nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
+       if (IS_ERR(nsset->nsproxy))
+               return PTR_ERR(nsset->nsproxy);
  
-       new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
-       if (IS_ERR(new_nsproxy)) {
-               err = PTR_ERR(new_nsproxy);
+       if (flags & CLONE_NEWUSER)
+               nsset->cred = prepare_creds();
+       else
+               nsset->cred = current_cred();
+       if (!nsset->cred)
                 goto out;
+
+       /* Only create a temporary copy of fs_struct if we really need to. */
+       if (flags == CLONE_NEWNS) {
+               nsset->fs = me->fs;
+       } else if (flags & CLONE_NEWNS) {
+               nsset->fs = copy_fs_struct(me->fs);
+               if (!nsset->fs)
+                       goto out;
         }
  
-       err = ns->ops->install(new_nsproxy, ns);
-       if (err) {
-               free_nsproxy(new_nsproxy);
-               goto out;
+       nsset->flags = flags;
+       return 0;
+
+out:
+       put_nsset(nsset);
+       return -ENOMEM;
+}
+
+static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
+{
+       return ns->ops->install(nsset, ns);
+}
+
+/*
+ * This is the inverse operation to unshare().
+ * Ordering is equivalent to the standard ordering used everywhere else
+ * during unshare and process creation. The switch to the new set of
+ * namespaces occurs at the point of no return after installation of
+ * all requested namespaces was successful in commit_nsset().
+ */
+static int validate_nsset(struct nsset *nsset, struct pid *pid)
+{
+       int ret = 0;
+       unsigned flags = nsset->flags;
+       struct user_namespace *user_ns = NULL;
+       struct pid_namespace *pid_ns = NULL;
+       struct nsproxy *nsp;
+       struct task_struct *tsk;
+
+       /* Take a "snapshot" of the target task's namespaces. */
+       rcu_read_lock();
+       tsk = pid_task(pid, PIDTYPE_PID);
+       if (!tsk) {
+               rcu_read_unlock();
+               return -ESRCH;
+       }
+
+       if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
+               rcu_read_unlock();
+               return -EPERM;
+       }
+
+       task_lock(tsk);
+       nsp = tsk->nsproxy;
+       if (nsp)
+               get_nsproxy(nsp);
+       task_unlock(tsk);
+       if (!nsp) {
+               rcu_read_unlock();
+               return -ESRCH;
+       }
+
+#ifdef CONFIG_PID_NS
+       if (flags & CLONE_NEWPID) {
+               pid_ns = task_active_pid_ns(tsk);
+               if (unlikely(!pid_ns)) {
+                       rcu_read_unlock();
+                       ret = -ESRCH;
+                       goto out;
+               }
+               get_pid_ns(pid_ns);
+       }
+#endif
+
+#ifdef CONFIG_USER_NS
+       if (flags & CLONE_NEWUSER)
+               user_ns = get_user_ns(__task_cred(tsk)->user_ns);
+#endif
+       rcu_read_unlock();
+
+       /*
+        * Install requested namespaces. The caller will have
+        * verified earlier that the requested namespaces are
+        * supported on this kernel. We don't report errors here
+        * if a namespace is requested that isn't supported.
+        */
+#ifdef CONFIG_USER_NS
+       if (flags & CLONE_NEWUSER) {
+               ret = validate_ns(nsset, &user_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+       if (flags & CLONE_NEWNS) {
+               ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
+               if (ret)
+                       goto out;
+       }
+
+#ifdef CONFIG_UTS_NS
+       if (flags & CLONE_NEWUTS) {
+               ret = validate_ns(nsset, &nsp->uts_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+#ifdef CONFIG_IPC_NS
+       if (flags & CLONE_NEWIPC) {
+               ret = validate_ns(nsset, &nsp->ipc_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+#ifdef CONFIG_PID_NS
+       if (flags & CLONE_NEWPID) {
+               ret = validate_ns(nsset, &pid_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+#ifdef CONFIG_CGROUPS
+       if (flags & CLONE_NEWCGROUP) {
+               ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+#ifdef CONFIG_NET_NS
+       if (flags & CLONE_NEWNET) {
+               ret = validate_ns(nsset, &nsp->net_ns->ns);
+               if (ret)
+                       goto out;
+       }
+#endif
+
+out:
+       if (pid_ns)
+               put_pid_ns(pid_ns);
+       if (nsp)
+               put_nsproxy(nsp);
+       put_user_ns(user_ns);
+
+       return ret;
+}
+
+/*
+ * This is the point of no return. There are just a few namespaces
+ * that do some actual work here and it's sufficiently minimal that
+ * a separate ns_common operation seems unnecessary for now.
+ * Unshare is doing the same thing. If we'll end up needing to do
+ * more in a given namespace or a helper here is ultimately not
+ * exported anymore a simple commit handler for each namespace
+ * should be added to ns_common.
+ */
+static void commit_nsset(struct nsset *nsset)
+{
+       unsigned flags = nsset->flags;
+       struct task_struct *me = current;
+
+#ifdef CONFIG_USER_NS
+       if (flags & CLONE_NEWUSER) {
+               /* transfer ownership */
+               commit_creds(nsset_cred(nsset));
+               nsset->cred = NULL;
+       }
+#endif
+
+       /* We only need to commit if we have used a temporary fs_struct. */
+       if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
+               set_fs_root(me->fs, &nsset->fs->root);
+               set_fs_pwd(me->fs, &nsset->fs->pwd);
         }
-       switch_task_namespaces(tsk, new_nsproxy);
  
-       perf_event_namespaces(tsk);
+#ifdef CONFIG_IPC_NS
+       if (flags & CLONE_NEWIPC)
+               exit_sem(me);
+#endif
+
+       /* transfer ownership */
+       switch_task_namespaces(me, nsset->nsproxy);
+       nsset->nsproxy = NULL;
+}
+
+SYSCALL_DEFINE2(setns, int, fd, int, flags)
+{
+       struct file *file;
+       struct ns_common *ns = NULL;
+       struct nsset nsset = {};
+       int err = 0;
+
+       file = fget(fd);
+       if (!file)
+               return -EBADF;
+
+       if (proc_ns_file(file)) {
+               ns = get_proc_ns(file_inode(file));
+               if (flags && (ns->ops->type != flags))
+                       err = -EINVAL;
+               flags = ns->ops->type;
+       } else if (!IS_ERR(pidfd_pid(file))) {
+               err = check_setns_flags(flags);
+       } else {
+               err = -EBADF;
+       }
+       if (err)
+               goto out;
+
+       err = prepare_nsset(flags, &nsset);
+       if (err)
+               goto out;
+
+       if (proc_ns_file(file))
+               err = validate_ns(&nsset, ns);
+       else
+               err = validate_nsset(&nsset, file->private_data);
+       if (!err) {
+               commit_nsset(&nsset);
+               perf_event_namespaces(current);
+       }
+       put_nsset(&nsset);
  out:
         fput(file);
         return err;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

index 01f8ba3..11db2bd 100644 (file)
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -378,13 +378,14 @@ static void pidns_put(struct ns_common *ns)
         put_pid_ns(to_pid_ns(ns));
  }
  
-static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int pidns_install(struct nsset *nsset, struct ns_common *ns)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct pid_namespace *active = task_active_pid_ns(current);
         struct pid_namespace *ancestor, *new = to_pid_ns(ns);
  
         if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         /*
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c

index 53bce34..5d9fc22 100644 (file)
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -280,8 +280,9 @@ static void timens_put(struct ns_common *ns)
         put_time_ns(to_time_ns(ns));
  }
  
-static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int timens_install(struct nsset *nsset, struct ns_common *new)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct time_namespace *ns = to_time_ns(new);
         int err;
  
@@ -289,7 +290,7 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
                 return -EUSERS;
  
         if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         timens_set_vvar_page(current, ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index 8eadadc..87804e0 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1253,7 +1253,7 @@ static void userns_put(struct ns_common *ns)
         put_user_ns(to_user_ns(ns));
  }
  
-static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int userns_install(struct nsset *nsset, struct ns_common *ns)
  {
         struct user_namespace *user_ns = to_user_ns(ns);
         struct cred *cred;
@@ -1274,14 +1274,14 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
         if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
-       cred = prepare_creds();
+       cred = nsset_cred(nsset);
         if (!cred)
-               return -ENOMEM;
+               return -EINVAL;
  
         put_user_ns(cred->user_ns);
         set_cred_user_ns(cred, get_user_ns(user_ns));
  
-       return commit_creds(cred);
+       return 0;
  }
  
  struct ns_common *ns_get_owner(struct ns_common *ns)
diff --git a/kernel/utsname.c b/kernel/utsname.c

index f0e4911..e488d0e 100644 (file)
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -140,12 +140,13 @@ static void utsns_put(struct ns_common *ns)
         put_uts_ns(to_uts_ns(ns));
  }
  
-static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int utsns_install(struct nsset *nsset, struct ns_common *new)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct uts_namespace *ns = to_uts_ns(new);
  
         if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         get_uts_ns(ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index 190ca66..dcd61ac 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -1353,12 +1353,13 @@ static void netns_put(struct ns_common *ns)
         put_net(to_net_ns(ns));
  }
  
-static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+static int netns_install(struct nsset *nsset, struct ns_common *ns)
  {
+       struct nsproxy *nsproxy = nsset->nsproxy;
         struct net *net = to_net_ns(ns);
  
         if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
-           !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+           !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         put_net(nsproxy->net_ns);
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore

index 2d4db5a..973198a 100644 (file)
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -5,3 +5,4 @@ pidfd_test
  pidfd_wait
  pidfd_fdinfo_test
  pidfd_getfd_test
+pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile

index 75a5458..f4a2f28 100644 (file)
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,8 @@
  # SPDX-License-Identifier: GPL-2.0-only
  CFLAGS += -g -I../../../../usr/include/ -pthread
  
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
+       pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
  
  include ../lib.mk
  
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config

new file mode 100644 (file)

index 0000000..bb11de9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/config
@@ -0,0 +1,6 @@
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c

new file mode 100644 (file)

index 0000000..133ec5b
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../clone3/clone3_selftests.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+enum {
+       PIDFD_NS_USER,
+       PIDFD_NS_MNT,
+       PIDFD_NS_PID,
+       PIDFD_NS_UTS,
+       PIDFD_NS_IPC,
+       PIDFD_NS_NET,
+       PIDFD_NS_CGROUP,
+       PIDFD_NS_PIDCLD,
+       PIDFD_NS_MAX
+};
+
+const struct ns_info {
+       const char *name;
+       int flag;
+} ns_info[] = {
+       [PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
+       [PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
+       [PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
+       [PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
+       [PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
+       [PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
+       [PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
+       [PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
+};
+
+FIXTURE(current_nsset)
+{
+       pid_t pid;
+       int pidfd;
+       int nsfds[PIDFD_NS_MAX];
+
+       pid_t child_pid_exited;
+       int child_pidfd_exited;
+
+       pid_t child_pid1;
+       int child_pidfd1;
+       int child_nsfds1[PIDFD_NS_MAX];
+
+       pid_t child_pid2;
+       int child_pidfd2;
+       int child_nsfds2[PIDFD_NS_MAX];
+};
+
+static int sys_waitid(int which, pid_t pid, int options)
+{
+       return syscall(__NR_waitid, which, pid, NULL, options, NULL);
+}
+
+pid_t create_child(int *pidfd, unsigned flags)
+{
+       struct clone_args args = {
+               .flags          = CLONE_PIDFD | flags,
+               .exit_signal    = SIGCHLD,
+               .pidfd          = ptr_to_u64(pidfd),
+       };
+
+       return sys_clone3(&args, sizeof(struct clone_args));
+}
+
+FIXTURE_SETUP(current_nsset)
+{
+       int i, proc_fd, ret;
+
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               self->nsfds[i]          = -EBADF;
+               self->child_nsfds1[i]   = -EBADF;
+               self->child_nsfds2[i]   = -EBADF;
+       }
+
+       proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
+       ASSERT_GE(proc_fd, 0) {
+               TH_LOG("%m - Failed to open /proc/self/ns");
+       }
+
+       self->pid = getpid();
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+               self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+               if (self->nsfds[i] < 0) {
+                       EXPECT_EQ(errno, ENOENT) {
+                               TH_LOG("%m - Failed to open %s namespace for process %d",
+                                      info->name, self->pid);
+                       }
+               }
+       }
+
+       self->pidfd = sys_pidfd_open(self->pid, 0);
+       EXPECT_GT(self->pidfd, 0) {
+               TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+       }
+
+       /* Create task that exits right away. */
+       self->child_pid_exited = create_child(&self->child_pidfd_exited,
+                                             CLONE_NEWUSER | CLONE_NEWNET);
+       EXPECT_GT(self->child_pid_exited, 0);
+
+       if (self->child_pid_exited == 0)
+               _exit(EXIT_SUCCESS);
+
+       ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+
+       self->pidfd = sys_pidfd_open(self->pid, 0);
+       EXPECT_GE(self->pidfd, 0) {
+               TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+       }
+
+       /* Create tasks that will be stopped. */
+       self->child_pid1 = create_child(&self->child_pidfd1,
+                                       CLONE_NEWUSER | CLONE_NEWNS |
+                                       CLONE_NEWCGROUP | CLONE_NEWIPC |
+                                       CLONE_NEWUTS | CLONE_NEWPID |
+                                       CLONE_NEWNET);
+       EXPECT_GE(self->child_pid1, 0);
+
+       if (self->child_pid1 == 0) {
+               pause();
+               _exit(EXIT_SUCCESS);
+       }
+
+       self->child_pid2 = create_child(&self->child_pidfd2,
+                                       CLONE_NEWUSER | CLONE_NEWNS |
+                                       CLONE_NEWCGROUP | CLONE_NEWIPC |
+                                       CLONE_NEWUTS | CLONE_NEWPID |
+                                       CLONE_NEWNET);
+       EXPECT_GE(self->child_pid2, 0);
+
+       if (self->child_pid2 == 0) {
+               pause();
+               _exit(EXIT_SUCCESS);
+       }
+
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               char p[100];
+
+               const struct ns_info *info = &ns_info[i];
+
+               self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+               if (self->nsfds[i] < 0) {
+                       EXPECT_EQ(errno, ENOENT) {
+                               TH_LOG("%m - Failed to open %s namespace for process %d",
+                                      info->name, self->pid);
+                       }
+               }
+
+               ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+                              self->child_pid1, info->name);
+               EXPECT_GT(ret, 0);
+               EXPECT_LT(ret, sizeof(p));
+
+               self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
+               if (self->child_nsfds1[i] < 0) {
+                       EXPECT_EQ(errno, ENOENT) {
+                               TH_LOG("%m - Failed to open %s namespace for process %d",
+                                      info->name, self->child_pid1);
+                       }
+               }
+
+               ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+                              self->child_pid2, info->name);
+               EXPECT_GT(ret, 0);
+               EXPECT_LT(ret, sizeof(p));
+
+               self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
+               if (self->child_nsfds2[i] < 0) {
+                       EXPECT_EQ(errno, ENOENT) {
+                               TH_LOG("%m - Failed to open %s namespace for process %d",
+                                      info->name, self->child_pid1);
+                       }
+               }
+       }
+
+       close(proc_fd);
+}
+
+FIXTURE_TEARDOWN(current_nsset)
+{
+       int i;
+
+       ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
+                                       SIGKILL, NULL, 0), 0);
+       ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
+                                       SIGKILL, NULL, 0), 0);
+
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               if (self->nsfds[i] >= 0)
+                       close(self->nsfds[i]);
+               if (self->child_nsfds1[i] >= 0)
+                       close(self->child_nsfds1[i]);
+               if (self->child_nsfds2[i] >= 0)
+                       close(self->child_nsfds2[i]);
+       }
+
+       if (self->child_pidfd1 >= 0)
+               EXPECT_EQ(0, close(self->child_pidfd1));
+       if (self->child_pidfd2 >= 0)
+               EXPECT_EQ(0, close(self->child_pidfd2));
+       ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
+       ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
+       ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+}
+
+static int preserve_ns(const int pid, const char *ns)
+{
+       int ret;
+       char path[50];
+
+       ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
+       if (ret < 0 || (size_t)ret >= sizeof(path))
+               return -EIO;
+
+       return open(path, O_RDONLY | O_CLOEXEC);
+}
+
+static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
+{
+       int ns_fd2 = -EBADF;
+       int ret = -1;
+       struct stat ns_st1, ns_st2;
+
+       ret = fstat(ns_fd1, &ns_st1);
+       if (ret < 0)
+               return -1;
+
+       ns_fd2 = preserve_ns(pid2, ns);
+       if (ns_fd2 < 0)
+               return -1;
+
+       ret = fstat(ns_fd2, &ns_st2);
+       close(ns_fd2);
+       if (ret < 0)
+               return -1;
+
+       /* processes are in the same namespace */
+       if ((ns_st1.st_dev == ns_st2.st_dev) &&
+           (ns_st1.st_ino == ns_st2.st_ino))
+               return 1;
+
+       /* processes are in different namespaces */
+       return 0;
+}
+
+/* Test that we can't pass garbage to the kernel. */
+TEST_F(current_nsset, invalid_flags)
+{
+       ASSERT_NE(setns(self->pidfd, 0), 0);
+       EXPECT_EQ(errno, EINVAL);
+
+       ASSERT_NE(setns(self->pidfd, -1), 0);
+       EXPECT_EQ(errno, EINVAL);
+
+       ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
+       EXPECT_EQ(errno, EINVAL);
+
+       ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
+       EXPECT_EQ(errno, EINVAL);
+}
+
+/* Test that we can't attach to a task that has already exited. */
+TEST_F(current_nsset, pidfd_exited_child)
+{
+       int i;
+       pid_t pid;
+
+       ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
+                 0);
+       EXPECT_EQ(errno, ESRCH);
+
+       pid = getpid();
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+               /* Verify that we haven't changed any namespaces. */
+               if (self->nsfds[i] >= 0)
+                       ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
+       }
+}
+
+TEST_F(current_nsset, pidfd_incremental_setns)
+{
+       int i;
+       pid_t pid;
+
+       pid = getpid();
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+               int nsfd;
+
+               if (self->child_nsfds1[i] < 0)
+                       continue;
+
+               if (info->flag) {
+                       ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
+                               TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
+                                      info->name, self->child_pid1,
+                                      self->child_pidfd1);
+                       }
+               }
+
+               /* Verify that we have changed to the correct namespaces. */
+               if (info->flag == CLONE_NEWPID)
+                       nsfd = self->nsfds[i];
+               else
+                       nsfd = self->child_nsfds1[i];
+               ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+                       TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
+                              info->name, self->child_pid1,
+                              self->child_pidfd1);
+               }
+               TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
+                      info->name, self->child_pid1, self->child_pidfd1);
+       }
+}
+
+TEST_F(current_nsset, nsfd_incremental_setns)
+{
+       int i;
+       pid_t pid;
+
+       pid = getpid();
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+               int nsfd;
+
+               if (self->child_nsfds1[i] < 0)
+                       continue;
+
+               if (info->flag) {
+                       ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
+                               TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+                                      info->name, self->child_pid1,
+                                      self->child_nsfds1[i]);
+                       }
+               }
+
+               /* Verify that we have changed to the correct namespaces. */
+               if (info->flag == CLONE_NEWPID)
+                       nsfd = self->nsfds[i];
+               else
+                       nsfd = self->child_nsfds1[i];
+               ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+                       TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+                              info->name, self->child_pid1,
+                              self->child_nsfds1[i]);
+               }
+               TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+                      info->name, self->child_pid1, self->child_nsfds1[i]);
+       }
+}
+
+TEST_F(current_nsset, pidfd_one_shot_setns)
+{
+       unsigned flags = 0;
+       int i;
+       pid_t pid;
+
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+
+               if (self->child_nsfds1[i] < 0)
+                       continue;
+
+               flags |= info->flag;
+               TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+                      info->name, self->child_pid1);
+       }
+
+       ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+               TH_LOG("%m - Failed to setns to namespaces of %d",
+                      self->child_pid1);
+       }
+
+       pid = getpid();
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+               int nsfd;
+
+               if (self->child_nsfds1[i] < 0)
+                       continue;
+
+               /* Verify that we have changed to the correct namespaces. */
+               if (info->flag == CLONE_NEWPID)
+                       nsfd = self->nsfds[i];
+               else
+                       nsfd = self->child_nsfds1[i];
+               ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+                       TH_LOG("setns failed to place us correctly into %s namespace of %d",
+                              info->name, self->child_pid1);
+               }
+               TH_LOG("Managed to correctly setns to %s namespace of %d",
+                      info->name, self->child_pid1);
+       }
+}
+
+TEST_F(current_nsset, no_foul_play)
+{
+       unsigned flags = 0;
+       int i;
+
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+
+               if (self->child_nsfds1[i] < 0)
+                       continue;
+
+               flags |= info->flag;
+               if (info->flag) /* No use logging pid_for_children. */
+                       TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+                              info->name, self->child_pid1);
+       }
+
+       ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+               TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
+                      self->child_pid1, self->child_pidfd1);
+       }
+
+       /*
+        * Can't setns to a user namespace outside of our hierarchy since we
+        * don't have caps in there and didn't create it. That means that under
+        * no circumstances should we be able to setns to any of the other
+        * ones since they aren't owned by our user namespace.
+        */
+       for (i = 0; i < PIDFD_NS_MAX; i++) {
+               const struct ns_info *info = &ns_info[i];
+
+               if (self->child_nsfds2[i] < 0 || !info->flag)
+                       continue;
+
+               ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
+                       TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
+                              info->name, self->child_pid2,
+                              self->child_pidfd2);
+               }
+               TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
+                      info->name, self->child_pid2,
+                      self->child_pidfd2);
+
+               ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
+                       TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+                              info->name, self->child_pid2,
+                              self->child_nsfds2[i]);
+               }
+               TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+                      info->name, self->child_pid2,
+                      self->child_nsfds2[i]);
+       }
+}
+
+TEST_HARNESS_MAIN
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jun 2020 20:12:57 +0000 (13:12 -0700)
fs/namespace.c		patch \| blob \| history
fs/nsfs.c		patch \| blob \| history
include/linux/mnt_namespace.h		patch \| blob \| history
include/linux/nsproxy.h		patch \| blob \| history
include/linux/proc_fs.h		patch \| blob \| history
include/linux/proc_ns.h		patch \| blob \| history
ipc/namespace.c		patch \| blob \| history
kernel/cgroup/namespace.c		patch \| blob \| history
kernel/nsproxy.c		patch \| blob \| history
kernel/pid_namespace.c		patch \| blob \| history
kernel/time/namespace.c		patch \| blob \| history
kernel/user_namespace.c		patch \| blob \| history
kernel/utsname.c		patch \| blob \| history
net/core/net_namespace.c		patch \| blob \| history
tools/testing/selftests/pidfd/.gitignore		patch \| blob \| history
tools/testing/selftests/pidfd/Makefile		patch \| blob \| history
tools/testing/selftests/pidfd/config	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/pidfd/pidfd_setns_test.c	[new file with mode: 0644]	patch \| blob