OSDN Git Service

power: supply: ltc2941-battery-gauge: fix use-after-free
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / namespace.c
index 0570729..88c5d5b 100644 (file)
@@ -27,6 +27,9 @@
 #include "pnode.h"
 #include "internal.h"
 
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
 static unsigned int m_hash_mask __read_mostly;
 static unsigned int m_hash_shift __read_mostly;
 static unsigned int mp_hash_mask __read_mostly;
@@ -234,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
                INIT_LIST_HEAD(&mnt->mnt_slave);
                INIT_HLIST_NODE(&mnt->mnt_mp_list);
+               INIT_LIST_HEAD(&mnt->mnt_umounting);
 #ifdef CONFIG_FSNOTIFY
                INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
@@ -599,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
                return 0;
        mnt = real_mount(bastard);
        mnt_add_count(mnt, 1);
+       smp_mb();                       // see mntput_no_expire()
        if (likely(!read_seqretry(&mount_lock, seq)))
                return 0;
        if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                mnt_add_count(mnt, -1);
                return 1;
        }
+       lock_mount_hash();
+       if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+               mnt_add_count(mnt, -1);
+               unlock_mount_hash();
+               return 1;
+       }
+       unlock_mount_hash();
+       /* caller will mntput() */
        return -1;
 }
 
@@ -638,28 +651,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 }
 
 /*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
-       struct mount *p, *res = NULL;
-       p = __lookup_mnt(mnt, dentry);
-       if (!p)
-               goto out;
-       if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-               res = p;
-       hlist_for_each_entry_continue(p, mnt_hash) {
-               if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
-                       break;
-               if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-                       res = p;
-       }
-out:
-       return res;
-}
-
-/*
  * lookup_mnt - Return the first child mount mounted at path
  *
  * "First" means first mounted chronologically.  If you create the
@@ -743,26 +734,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
        return NULL;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
 {
-       struct hlist_head *chain = mp_hash(dentry);
-       struct mountpoint *mp;
+       struct mountpoint *mp, *new = NULL;
        int ret;
 
-       mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-       if (!mp)
+       if (d_mountpoint(dentry)) {
+mountpoint:
+               read_seqlock_excl(&mount_lock);
+               mp = lookup_mountpoint(dentry);
+               read_sequnlock_excl(&mount_lock);
+               if (mp)
+                       goto done;
+       }
+
+       if (!new)
+               new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+       if (!new)
                return ERR_PTR(-ENOMEM);
 
+
+       /* Exactly one processes may set d_mounted */
        ret = d_set_mounted(dentry);
-       if (ret) {
-               kfree(mp);
-               return ERR_PTR(ret);
-       }
 
-       mp->m_dentry = dentry;
-       mp->m_count = 1;
-       hlist_add_head(&mp->m_hash, chain);
-       INIT_HLIST_HEAD(&mp->m_list);
+       /* Someone else set d_mounted? */
+       if (ret == -EBUSY)
+               goto mountpoint;
+
+       /* The dentry is not available as a mountpoint? */
+       mp = ERR_PTR(ret);
+       if (ret)
+               goto done;
+
+       /* Add the new mountpoint to the hash table */
+       read_seqlock_excl(&mount_lock);
+       new->m_dentry = dentry;
+       new->m_count = 1;
+       hlist_add_head(&new->m_hash, mp_hash(dentry));
+       INIT_HLIST_HEAD(&new->m_list);
+       read_sequnlock_excl(&mount_lock);
+
+       mp = new;
+       new = NULL;
+done:
+       kfree(new);
        return mp;
 }
 
@@ -855,6 +870,13 @@ void mnt_set_mountpoint(struct mount *mnt,
        hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
 }
 
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+       hlist_add_head_rcu(&mnt->mnt_hash,
+                          m_hash(&parent->mnt, mnt->mnt_mountpoint));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
+
 /*
  * vfsmount lock must be held for write
  */
@@ -863,28 +885,45 @@ static void attach_mnt(struct mount *mnt,
                        struct mountpoint *mp)
 {
        mnt_set_mountpoint(parent, mp, mnt);
-       hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
-       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+       __attach_mnt(mnt, parent);
 }
 
-static void attach_shadowed(struct mount *mnt,
-                       struct mount *parent,
-                       struct mount *shadows)
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
 {
-       if (shadows) {
-               hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
-               list_add(&mnt->mnt_child, &shadows->mnt_child);
-       } else {
-               hlist_add_head_rcu(&mnt->mnt_hash,
-                               m_hash(&parent->mnt, mnt->mnt_mountpoint));
-               list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
-       }
+       struct mountpoint *old_mp = mnt->mnt_mp;
+       struct dentry *old_mountpoint = mnt->mnt_mountpoint;
+       struct mount *old_parent = mnt->mnt_parent;
+
+       list_del_init(&mnt->mnt_child);
+       hlist_del_init(&mnt->mnt_mp_list);
+       hlist_del_init_rcu(&mnt->mnt_hash);
+
+       attach_mnt(mnt, parent, mp);
+
+       put_mountpoint(old_mp);
+
+       /*
+        * Safely avoid even the suggestion this code might sleep or
+        * lock the mount hash by taking advantage of the knowledge that
+        * mnt_change_mountpoint will not release the final reference
+        * to a mountpoint.
+        *
+        * During mounting, the mount passed in as the parent mount will
+        * continue to use the old mountpoint and during unmounting, the
+        * old mountpoint will continue to exist until namespace_unlock,
+        * which happens well after mnt_change_mountpoint.
+        */
+       spin_lock(&old_mountpoint->d_lock);
+       old_mountpoint->d_lockref.count--;
+       spin_unlock(&old_mountpoint->d_lock);
+
+       mnt_add_count(old_parent, -1);
 }
 
 /*
  * vfsmount lock must be held for write
  */
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
 {
        struct mount *parent = mnt->mnt_parent;
        struct mount *m;
@@ -899,7 +938,10 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
 
        list_splice(&head, n->list.prev);
 
-       attach_shadowed(mnt, parent, shadows);
+       n->mounts += n->pending_mounts;
+       n->pending_mounts = 0;
+
+       __attach_mnt(mnt, parent);
        touch_mnt_namespace(n);
 }
 
@@ -985,7 +1027,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                        goto out_free;
        }
 
-       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags;
+       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
        /* Don't allow unprivileged users to change mount flags */
        if (flag & CL_UNPRIVILEGED) {
                mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
@@ -1090,12 +1133,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 static void mntput_no_expire(struct mount *mnt)
 {
        rcu_read_lock();
-       mnt_add_count(mnt, -1);
-       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+       if (likely(READ_ONCE(mnt->mnt_ns))) {
+               /*
+                * Since we don't do lock_mount_hash() here,
+                * ->mnt_ns can change under us.  However, if it's
+                * non-NULL, then there's a reference that won't
+                * be dropped until after an RCU delay done after
+                * turning ->mnt_ns NULL.  So if we observe it
+                * non-NULL under rcu_read_lock(), the reference
+                * we are dropping is not the final one.
+                */
+               mnt_add_count(mnt, -1);
                rcu_read_unlock();
                return;
        }
        lock_mount_hash();
+       /*
+        * make sure that if __legitimize_mnt() has not seen us grab
+        * mount_lock, we'll see their refcount increment here.
+        */
+       smp_mb();
+       mnt_add_count(mnt, -1);
        if (mnt_get_count(mnt)) {
                rcu_read_unlock();
                unlock_mount_hash();
@@ -1419,11 +1477,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
                propagate_umount(&tmp_list);
 
        while (!list_empty(&tmp_list)) {
+               struct mnt_namespace *ns;
                bool disconnect;
                p = list_first_entry(&tmp_list, struct mount, mnt_list);
                list_del_init(&p->mnt_expire);
                list_del_init(&p->mnt_list);
-               __touch_mnt_namespace(p->mnt_ns);
+               ns = p->mnt_ns;
+               if (ns) {
+                       ns->mounts--;
+                       __touch_mnt_namespace(ns);
+               }
                p->mnt_ns = NULL;
                if (how & UMOUNT_SYNC)
                        p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1521,8 +1584,13 @@ static int do_umount(struct mount *mnt, int flags)
 
        namespace_lock();
        lock_mount_hash();
-       event++;
 
+       /* Recheck MNT_LOCKED with the locks held */
+       retval = -EINVAL;
+       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+               goto out;
+
+       event++;
        if (flags & MNT_DETACH) {
                if (!list_empty(&mnt->mnt_list))
                        umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1536,6 +1604,7 @@ static int do_umount(struct mount *mnt, int flags)
                        retval = 0;
                }
        }
+out:
        unlock_mount_hash();
        namespace_unlock();
        return retval;
@@ -1557,11 +1626,12 @@ void __detach_mounts(struct dentry *dentry)
        struct mount *mnt;
 
        namespace_lock();
+       lock_mount_hash();
        mp = lookup_mountpoint(dentry);
        if (IS_ERR_OR_NULL(mp))
                goto out_unlock;
 
-       lock_mount_hash();
+       event++;
        while (!hlist_empty(&mp->m_list)) {
                mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
                if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -1570,9 +1640,9 @@ void __detach_mounts(struct dentry *dentry)
                }
                else umount_tree(mnt, UMOUNT_CONNECTED);
        }
-       unlock_mount_hash();
        put_mountpoint(mp);
 out_unlock:
+       unlock_mount_hash();
        namespace_unlock();
 }
 
@@ -1617,7 +1687,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                goto dput_and_out;
        if (!check_mnt(mnt))
                goto dput_and_out;
-       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+       if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                goto dput_and_out;
        retval = -EPERM;
        if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1693,11 +1763,16 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                        continue;
 
                for (s = r; s; s = next_mnt(s, r)) {
-                       struct mount *t = NULL;
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
-                               s = skip_mnt_tree(s);
-                               continue;
+                               if (s->mnt.mnt_flags & MNT_LOCKED) {
+                                       /* Both unbindable and locked. */
+                                       q = ERR_PTR(-EPERM);
+                                       goto out;
+                               } else {
+                                       s = skip_mnt_tree(s);
+                                       continue;
+                               }
                        }
                        if (!(flag & CL_COPY_MNT_NS_FILE) &&
                            is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1715,14 +1790,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                goto out;
                        lock_mount_hash();
                        list_add_tail(&q->mnt_list, &res->mnt_list);
-                       mnt_set_mountpoint(parent, p->mnt_mp, q);
-                       if (!list_empty(&parent->mnt_mounts)) {
-                               t = list_last_entry(&parent->mnt_mounts,
-                                       struct mount, mnt_child);
-                               if (t->mnt_mp != p->mnt_mp)
-                                       t = NULL;
-                       }
-                       attach_shadowed(q, parent, t);
+                       attach_mnt(q, parent, p->mnt_mp);
                        unlock_mount_hash();
                }
        }
@@ -1757,7 +1825,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
        namespace_lock();
        lock_mount_hash();
-       umount_tree(real_mount(mnt), UMOUNT_SYNC);
+       umount_tree(real_mount(mnt), 0);
        unlock_mount_hash();
        namespace_unlock();
 }
@@ -1831,6 +1899,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
        return 0;
 }
 
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+       unsigned int max = READ_ONCE(sysctl_mount_max);
+       unsigned int mounts = 0, old, pending, sum;
+       struct mount *p;
+
+       for (p = mnt; p; p = next_mnt(p, mnt))
+               mounts++;
+
+       old = ns->mounts;
+       pending = ns->pending_mounts;
+       sum = old + pending;
+       if ((old > sum) ||
+           (pending > sum) ||
+           (max < sum) ||
+           (mounts > (max - sum)))
+               return -ENOSPC;
+
+       ns->pending_mounts = pending + mounts;
+       return 0;
+}
+
 /*
  *  @source_mnt : mount tree to be attached
  *  @nd         : place the mount tree @source_mnt is attached
@@ -1900,10 +1990,26 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                        struct path *parent_path)
 {
        HLIST_HEAD(tree_list);
+       struct mnt_namespace *ns = dest_mnt->mnt_ns;
+       struct mountpoint *smp;
        struct mount *child, *p;
        struct hlist_node *n;
        int err;
 
+       /* Preallocate a mountpoint in case the new mounts need
+        * to be tucked under other mounts.
+        */
+       smp = get_mountpoint(source_mnt->mnt.mnt_root);
+       if (IS_ERR(smp))
+               return PTR_ERR(smp);
+
+       /* Is there space to add these mounts to the mount namespace? */
+       if (!parent_path) {
+               err = count_mounts(ns, source_mnt);
+               if (err)
+                       goto out;
+       }
+
        if (IS_MNT_SHARED(dest_mnt)) {
                err = invent_group_ids(source_mnt, true);
                if (err)
@@ -1923,16 +2029,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
-               commit_tree(source_mnt, NULL);
+               commit_tree(source_mnt);
        }
 
        hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
                struct mount *q;
                hlist_del_init(&child->mnt_hash);
-               q = __lookup_mnt_last(&child->mnt_parent->mnt,
-                                     child->mnt_mountpoint);
-               commit_tree(child, q);
+               q = __lookup_mnt(&child->mnt_parent->mnt,
+                                child->mnt_mountpoint);
+               if (q)
+                       mnt_change_mountpoint(child, smp, q);
+               commit_tree(child);
        }
+       put_mountpoint(smp);
        unlock_mount_hash();
 
        return 0;
@@ -1940,11 +2049,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
  out_cleanup_ids:
        while (!hlist_empty(&tree_list)) {
                child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+               child->mnt_parent->mnt_ns->pending_mounts = 0;
                umount_tree(child, UMOUNT_SYNC);
        }
        unlock_mount_hash();
        cleanup_group_ids(source_mnt, NULL);
  out:
+       ns->pending_mounts = 0;
+
+       read_seqlock_excl(&mount_lock);
+       put_mountpoint(smp);
+       read_sequnlock_excl(&mount_lock);
+
        return err;
 }
 
@@ -1961,9 +2077,7 @@ retry:
        namespace_lock();
        mnt = lookup_mnt(path);
        if (likely(!mnt)) {
-               struct mountpoint *mp = lookup_mountpoint(dentry);
-               if (!mp)
-                       mp = new_mountpoint(dentry);
+               struct mountpoint *mp = get_mountpoint(dentry);
                if (IS_ERR(mp)) {
                        namespace_unlock();
                        mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1982,7 +2096,11 @@ retry:
 static void unlock_mount(struct mountpoint *where)
 {
        struct dentry *dentry = where->m_dentry;
+
+       read_seqlock_excl(&mount_lock);
        put_mountpoint(where);
+       read_sequnlock_excl(&mount_lock);
+
        namespace_unlock();
        mutex_unlock(&dentry->d_inode->i_mutex);
 }
@@ -2401,8 +2519,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
                        mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
                }
                if (type->fs_flags & FS_USERNS_VISIBLE) {
-                       if (!fs_fully_visible(type, &mnt_flags))
+                       if (!fs_fully_visible(type, &mnt_flags)) {
+                               put_filesystem(type);
                                return -EPERM;
+                       }
                }
        }
 
@@ -2766,6 +2886,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
        init_waitqueue_head(&new_ns->poll);
        new_ns->event = 0;
        new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->mounts = 0;
+       new_ns->pending_mounts = 0;
        return new_ns;
 }
 
@@ -2815,6 +2937,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
        q = new;
        while (p) {
                q->mnt_ns = new_ns;
+               new_ns->mounts++;
                if (new_fs) {
                        if (&p->mnt == new_fs->root.mnt) {
                                new_fs->root.mnt = mntget(&q->mnt);
@@ -2853,6 +2976,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
                struct mount *mnt = real_mount(m);
                mnt->mnt_ns = new_ns;
                new_ns->root = mnt;
+               new_ns->mounts++;
                list_add(&mnt->mnt_list, &new_ns->list);
        } else {
                mntput(m);
@@ -3052,9 +3176,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        /* A moved mount should not expire automatically */
        list_del_init(&new_mnt->mnt_expire);
+       put_mountpoint(root_mp);
        unlock_mount_hash();
        chroot_fs_refs(&root, &new);
-       put_mountpoint(root_mp);
        error = 0;
 out4:
        unlock_mount(old_mp);
@@ -3236,6 +3360,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
                        mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
 
+               /* Don't miss readonly hidden in the superblock flags */
+               if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+                       mnt_flags |= MNT_LOCK_READONLY;
+
                /* Verify the mount flags are equal to or more permissive
                 * than the proposed new mount.
                 */
@@ -3262,7 +3390,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
                        struct inode *inode = child->mnt_mountpoint->d_inode;
                        /* Only worry about locked mounts */
-                       if (!(mnt_flags & MNT_LOCKED))
+                       if (!(child->mnt.mnt_flags & MNT_LOCKED))
                                continue;
                        /* Is the directory permanetly empty? */
                        if (!is_empty_dir_inode(inode))