power: supply: ltc2941-battery-gauge: fix use-after-free

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 0570729..88c5d5b 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
  #include "pnode.h"
  #include "internal.h"
  
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
  static unsigned int m_hash_mask __read_mostly;
  static unsigned int m_hash_shift __read_mostly;
  static unsigned int mp_hash_mask __read_mostly;
@@ -234,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
                 INIT_LIST_HEAD(&mnt->mnt_slave_list);
                 INIT_LIST_HEAD(&mnt->mnt_slave);
                 INIT_HLIST_NODE(&mnt->mnt_mp_list);
+               INIT_LIST_HEAD(&mnt->mnt_umounting);
  #ifdef CONFIG_FSNOTIFY
                 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
  #endif
@@ -599,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
                 return 0;
         mnt = real_mount(bastard);
         mnt_add_count(mnt, 1);
+       smp_mb();                       // see mntput_no_expire()
         if (likely(!read_seqretry(&mount_lock, seq)))
                 return 0;
         if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                 mnt_add_count(mnt, -1);
                 return 1;
         }
+       lock_mount_hash();
+       if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+               mnt_add_count(mnt, -1);
+               unlock_mount_hash();
+               return 1;
+       }
+       unlock_mount_hash();
+       /* caller will mntput() */
         return -1;
  }
  
@@ -638,28 +651,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
  }
  
  /*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
-       struct mount *p, *res = NULL;
-       p = __lookup_mnt(mnt, dentry);
-       if (!p)
-               goto out;
-       if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-               res = p;
-       hlist_for_each_entry_continue(p, mnt_hash) {
-               if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
-                       break;
-               if (!(p->mnt.mnt_flags & MNT_UMOUNT))
-                       res = p;
-       }
-out:
-       return res;
-}
-
-/*
   * lookup_mnt - Return the first child mount mounted at path
   *
   * "First" means first mounted chronologically.  If you create the
@@ -743,26 +734,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
         return NULL;
  }
  
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
  {
-       struct hlist_head *chain = mp_hash(dentry);
-       struct mountpoint *mp;
+       struct mountpoint *mp, *new = NULL;
         int ret;
  
-       mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-       if (!mp)
+       if (d_mountpoint(dentry)) {
+mountpoint:
+               read_seqlock_excl(&mount_lock);
+               mp = lookup_mountpoint(dentry);
+               read_sequnlock_excl(&mount_lock);
+               if (mp)
+                       goto done;
+       }
+
+       if (!new)
+               new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+       if (!new)
                 return ERR_PTR(-ENOMEM);
  
+
+       /* Exactly one processes may set d_mounted */
         ret = d_set_mounted(dentry);
-       if (ret) {
-               kfree(mp);
-               return ERR_PTR(ret);
-       }
  
-       mp->m_dentry = dentry;
-       mp->m_count = 1;
-       hlist_add_head(&mp->m_hash, chain);
-       INIT_HLIST_HEAD(&mp->m_list);
+       /* Someone else set d_mounted? */
+       if (ret == -EBUSY)
+               goto mountpoint;
+
+       /* The dentry is not available as a mountpoint? */
+       mp = ERR_PTR(ret);
+       if (ret)
+               goto done;
+
+       /* Add the new mountpoint to the hash table */
+       read_seqlock_excl(&mount_lock);
+       new->m_dentry = dentry;
+       new->m_count = 1;
+       hlist_add_head(&new->m_hash, mp_hash(dentry));
+       INIT_HLIST_HEAD(&new->m_list);
+       read_sequnlock_excl(&mount_lock);
+
+       mp = new;
+       new = NULL;
+done:
+       kfree(new);
         return mp;
  }
  
@@ -855,6 +870,13 @@ void mnt_set_mountpoint(struct mount *mnt,
         hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
  }
  
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+       hlist_add_head_rcu(&mnt->mnt_hash,
+                          m_hash(&parent->mnt, mnt->mnt_mountpoint));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
+
  /*
   * vfsmount lock must be held for write
   */
@@ -863,28 +885,45 @@ static void attach_mnt(struct mount *mnt,
                         struct mountpoint *mp)
  {
         mnt_set_mountpoint(parent, mp, mnt);
-       hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
-       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+       __attach_mnt(mnt, parent);
  }
  
-static void attach_shadowed(struct mount *mnt,
-                       struct mount *parent,
-                       struct mount *shadows)
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
  {
-       if (shadows) {
-               hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
-               list_add(&mnt->mnt_child, &shadows->mnt_child);
-       } else {
-               hlist_add_head_rcu(&mnt->mnt_hash,
-                               m_hash(&parent->mnt, mnt->mnt_mountpoint));
-               list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
-       }
+       struct mountpoint *old_mp = mnt->mnt_mp;
+       struct dentry *old_mountpoint = mnt->mnt_mountpoint;
+       struct mount *old_parent = mnt->mnt_parent;
+
+       list_del_init(&mnt->mnt_child);
+       hlist_del_init(&mnt->mnt_mp_list);
+       hlist_del_init_rcu(&mnt->mnt_hash);
+
+       attach_mnt(mnt, parent, mp);
+
+       put_mountpoint(old_mp);
+
+       /*
+        * Safely avoid even the suggestion this code might sleep or
+        * lock the mount hash by taking advantage of the knowledge that
+        * mnt_change_mountpoint will not release the final reference
+        * to a mountpoint.
+        *
+        * During mounting, the mount passed in as the parent mount will
+        * continue to use the old mountpoint and during unmounting, the
+        * old mountpoint will continue to exist until namespace_unlock,
+        * which happens well after mnt_change_mountpoint.
+        */
+       spin_lock(&old_mountpoint->d_lock);
+       old_mountpoint->d_lockref.count--;
+       spin_unlock(&old_mountpoint->d_lock);
+
+       mnt_add_count(old_parent, -1);
  }
  
  /*
   * vfsmount lock must be held for write
   */
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
  {
         struct mount *parent = mnt->mnt_parent;
         struct mount *m;
@@ -899,7 +938,10 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
  
         list_splice(&head, n->list.prev);
  
-       attach_shadowed(mnt, parent, shadows);
+       n->mounts += n->pending_mounts;
+       n->pending_mounts = 0;
+
+       __attach_mnt(mnt, parent);
         touch_mnt_namespace(n);
  }
  
@@ -985,7 +1027,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                         goto out_free;
         }
  
-       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags;
+       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
         /* Don't allow unprivileged users to change mount flags */
         if (flag & CL_UNPRIVILEGED) {
                 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
@@ -1090,12 +1133,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
  static void mntput_no_expire(struct mount *mnt)
  {
         rcu_read_lock();
-       mnt_add_count(mnt, -1);
-       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+       if (likely(READ_ONCE(mnt->mnt_ns))) {
+               /*
+                * Since we don't do lock_mount_hash() here,
+                * ->mnt_ns can change under us.  However, if it's
+                * non-NULL, then there's a reference that won't
+                * be dropped until after an RCU delay done after
+                * turning ->mnt_ns NULL.  So if we observe it
+                * non-NULL under rcu_read_lock(), the reference
+                * we are dropping is not the final one.
+                */
+               mnt_add_count(mnt, -1);
                 rcu_read_unlock();
                 return;
         }
         lock_mount_hash();
+       /*
+        * make sure that if __legitimize_mnt() has not seen us grab
+        * mount_lock, we'll see their refcount increment here.
+        */
+       smp_mb();
+       mnt_add_count(mnt, -1);
         if (mnt_get_count(mnt)) {
                 rcu_read_unlock();
                 unlock_mount_hash();
@@ -1419,11 +1477,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
                 propagate_umount(&tmp_list);
  
         while (!list_empty(&tmp_list)) {
+               struct mnt_namespace *ns;
                 bool disconnect;
                 p = list_first_entry(&tmp_list, struct mount, mnt_list);
                 list_del_init(&p->mnt_expire);
                 list_del_init(&p->mnt_list);
-               __touch_mnt_namespace(p->mnt_ns);
+               ns = p->mnt_ns;
+               if (ns) {
+                       ns->mounts--;
+                       __touch_mnt_namespace(ns);
+               }
                 p->mnt_ns = NULL;
                 if (how & UMOUNT_SYNC)
                         p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1521,8 +1584,13 @@ static int do_umount(struct mount *mnt, int flags)
  
         namespace_lock();
         lock_mount_hash();
-       event++;
  
+       /* Recheck MNT_LOCKED with the locks held */
+       retval = -EINVAL;
+       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+               goto out;
+
+       event++;
         if (flags & MNT_DETACH) {
                 if (!list_empty(&mnt->mnt_list))
                         umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1536,6 +1604,7 @@ static int do_umount(struct mount *mnt, int flags)
                         retval = 0;
                 }
         }
+out:
         unlock_mount_hash();
         namespace_unlock();
         return retval;
@@ -1557,11 +1626,12 @@ void __detach_mounts(struct dentry *dentry)
         struct mount *mnt;
  
         namespace_lock();
+       lock_mount_hash();
         mp = lookup_mountpoint(dentry);
         if (IS_ERR_OR_NULL(mp))
                 goto out_unlock;
  
-       lock_mount_hash();
+       event++;
         while (!hlist_empty(&mp->m_list)) {
                 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
                 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -1570,9 +1640,9 @@ void __detach_mounts(struct dentry *dentry)
                 }
                 else umount_tree(mnt, UMOUNT_CONNECTED);
         }
-       unlock_mount_hash();
         put_mountpoint(mp);
  out_unlock:
+       unlock_mount_hash();
         namespace_unlock();
  }
  
@@ -1617,7 +1687,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                 goto dput_and_out;
         if (!check_mnt(mnt))
                 goto dput_and_out;
-       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+       if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                 goto dput_and_out;
         retval = -EPERM;
         if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1693,11 +1763,16 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                         continue;
  
                 for (s = r; s; s = next_mnt(s, r)) {
-                       struct mount *t = NULL;
                         if (!(flag & CL_COPY_UNBINDABLE) &&
                             IS_MNT_UNBINDABLE(s)) {
-                               s = skip_mnt_tree(s);
-                               continue;
+                               if (s->mnt.mnt_flags & MNT_LOCKED) {
+                                       /* Both unbindable and locked. */
+                                       q = ERR_PTR(-EPERM);
+                                       goto out;
+                               } else {
+                                       s = skip_mnt_tree(s);
+                                       continue;
+                               }
                         }
                         if (!(flag & CL_COPY_MNT_NS_FILE) &&
                             is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1715,14 +1790,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                 goto out;
                         lock_mount_hash();
                         list_add_tail(&q->mnt_list, &res->mnt_list);
-                       mnt_set_mountpoint(parent, p->mnt_mp, q);
-                       if (!list_empty(&parent->mnt_mounts)) {
-                               t = list_last_entry(&parent->mnt_mounts,
-                                       struct mount, mnt_child);
-                               if (t->mnt_mp != p->mnt_mp)
-                                       t = NULL;
-                       }
-                       attach_shadowed(q, parent, t);
+                       attach_mnt(q, parent, p->mnt_mp);
                         unlock_mount_hash();
                 }
         }
@@ -1757,7 +1825,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
  {
         namespace_lock();
         lock_mount_hash();
-       umount_tree(real_mount(mnt), UMOUNT_SYNC);
+       umount_tree(real_mount(mnt), 0);
         unlock_mount_hash();
         namespace_unlock();
  }
@@ -1831,6 +1899,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
         return 0;
  }
  
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+       unsigned int max = READ_ONCE(sysctl_mount_max);
+       unsigned int mounts = 0, old, pending, sum;
+       struct mount *p;
+
+       for (p = mnt; p; p = next_mnt(p, mnt))
+               mounts++;
+
+       old = ns->mounts;
+       pending = ns->pending_mounts;
+       sum = old + pending;
+       if ((old > sum) ||
+           (pending > sum) ||
+           (max < sum) ||
+           (mounts > (max - sum)))
+               return -ENOSPC;
+
+       ns->pending_mounts = pending + mounts;
+       return 0;
+}
+
  /*
   *  @source_mnt : mount tree to be attached
   *  @nd         : place the mount tree @source_mnt is attached
@@ -1900,10 +1990,26 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                         struct path *parent_path)
  {
         HLIST_HEAD(tree_list);
+       struct mnt_namespace *ns = dest_mnt->mnt_ns;
+       struct mountpoint *smp;
         struct mount *child, *p;
         struct hlist_node *n;
         int err;
  
+       /* Preallocate a mountpoint in case the new mounts need
+        * to be tucked under other mounts.
+        */
+       smp = get_mountpoint(source_mnt->mnt.mnt_root);
+       if (IS_ERR(smp))
+               return PTR_ERR(smp);
+
+       /* Is there space to add these mounts to the mount namespace? */
+       if (!parent_path) {
+               err = count_mounts(ns, source_mnt);
+               if (err)
+                       goto out;
+       }
+
         if (IS_MNT_SHARED(dest_mnt)) {
                 err = invent_group_ids(source_mnt, true);
                 if (err)
@@ -1923,16 +2029,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                 touch_mnt_namespace(source_mnt->mnt_ns);
         } else {
                 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
-               commit_tree(source_mnt, NULL);
+               commit_tree(source_mnt);
         }
  
         hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
                 struct mount *q;
                 hlist_del_init(&child->mnt_hash);
-               q = __lookup_mnt_last(&child->mnt_parent->mnt,
-                                     child->mnt_mountpoint);
-               commit_tree(child, q);
+               q = __lookup_mnt(&child->mnt_parent->mnt,
+                                child->mnt_mountpoint);
+               if (q)
+                       mnt_change_mountpoint(child, smp, q);
+               commit_tree(child);
         }
+       put_mountpoint(smp);
         unlock_mount_hash();
  
         return 0;
@@ -1940,11 +2049,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
   out_cleanup_ids:
         while (!hlist_empty(&tree_list)) {
                 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+               child->mnt_parent->mnt_ns->pending_mounts = 0;
                 umount_tree(child, UMOUNT_SYNC);
         }
         unlock_mount_hash();
         cleanup_group_ids(source_mnt, NULL);
   out:
+       ns->pending_mounts = 0;
+
+       read_seqlock_excl(&mount_lock);
+       put_mountpoint(smp);
+       read_sequnlock_excl(&mount_lock);
+
         return err;
  }
  
@@ -1961,9 +2077,7 @@ retry:
         namespace_lock();
         mnt = lookup_mnt(path);
         if (likely(!mnt)) {
-               struct mountpoint *mp = lookup_mountpoint(dentry);
-               if (!mp)
-                       mp = new_mountpoint(dentry);
+               struct mountpoint *mp = get_mountpoint(dentry);
                 if (IS_ERR(mp)) {
                         namespace_unlock();
                         mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1982,7 +2096,11 @@ retry:
  static void unlock_mount(struct mountpoint *where)
  {
         struct dentry *dentry = where->m_dentry;
+
+       read_seqlock_excl(&mount_lock);
         put_mountpoint(where);
+       read_sequnlock_excl(&mount_lock);
+
         namespace_unlock();
         mutex_unlock(&dentry->d_inode->i_mutex);
  }
@@ -2401,8 +2519,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
                         mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
                 }
                 if (type->fs_flags & FS_USERNS_VISIBLE) {
-                       if (!fs_fully_visible(type, &mnt_flags))
+                       if (!fs_fully_visible(type, &mnt_flags)) {
+                               put_filesystem(type);
                                 return -EPERM;
+                       }
                 }
         }
  
@@ -2766,6 +2886,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
         init_waitqueue_head(&new_ns->poll);
         new_ns->event = 0;
         new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->mounts = 0;
+       new_ns->pending_mounts = 0;
         return new_ns;
  }
  
@@ -2815,6 +2937,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         q = new;
         while (p) {
                 q->mnt_ns = new_ns;
+               new_ns->mounts++;
                 if (new_fs) {
                         if (&p->mnt == new_fs->root.mnt) {
                                 new_fs->root.mnt = mntget(&q->mnt);
@@ -2853,6 +2976,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
                 struct mount *mnt = real_mount(m);
                 mnt->mnt_ns = new_ns;
                 new_ns->root = mnt;
+               new_ns->mounts++;
                 list_add(&mnt->mnt_list, &new_ns->list);
         } else {
                 mntput(m);
@@ -3052,9 +3176,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
         touch_mnt_namespace(current->nsproxy->mnt_ns);
         /* A moved mount should not expire automatically */
         list_del_init(&new_mnt->mnt_expire);
+       put_mountpoint(root_mp);
         unlock_mount_hash();
         chroot_fs_refs(&root, &new);
-       put_mountpoint(root_mp);
         error = 0;
  out4:
         unlock_mount(old_mp);
@@ -3236,6 +3360,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                 if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
                         mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
  
+               /* Don't miss readonly hidden in the superblock flags */
+               if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+                       mnt_flags |= MNT_LOCK_READONLY;
+
                 /* Verify the mount flags are equal to or more permissive
                  * than the proposed new mount.
                  */
@@ -3262,7 +3390,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
                 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
                         struct inode *inode = child->mnt_mountpoint->d_inode;
                         /* Only worry about locked mounts */
-                       if (!(mnt_flags & MNT_LOCKED))
+                       if (!(child->mnt.mnt_flags & MNT_LOCKED))
                                 continue;
                         /* Is the directory permanetly empty? */
                         if (!is_empty_dir_inode(inode))