X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=fs%2Fnamespace.c;h=88c5d5bddf746718959dc5cce07dbe9ce37891bb;hb=fd97ed8c874dc479d868ad33311ad227ff0c98ee;hp=0570729c87fd22dd299c50ee0303f43c384e4932;hpb=a6f83f00ad4b4e8a6c1cd14db495f40cf5ea7586;p=sagit-ice-cold%2Fkernel_xiaomi_msm8998.git diff --git a/fs/namespace.c b/fs/namespace.c index 0570729c87fd..88c5d5bddf74 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,9 @@ #include "pnode.h" #include "internal.h" +/* Maximum number of mounts in a mount namespace */ +unsigned int sysctl_mount_max __read_mostly = 100000; + static unsigned int m_hash_mask __read_mostly; static unsigned int m_hash_shift __read_mostly; static unsigned int mp_hash_mask __read_mostly; @@ -234,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); + INIT_LIST_HEAD(&mnt->mnt_umounting); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif @@ -599,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -638,28 +651,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) } /* - * find the last mount at @dentry on vfsmount @mnt. - * mount_lock must be held. - */ -struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) -{ - struct mount *p, *res = NULL; - p = __lookup_mnt(mnt, dentry); - if (!p) - goto out; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - hlist_for_each_entry_continue(p, mnt_hash) { - if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) - break; - if (!(p->mnt.mnt_flags & MNT_UMOUNT)) - res = p; - } -out: - return res; -} - -/* * lookup_mnt - Return the first child mount mounted at path * * "First" means first mounted chronologically. If you create the @@ -743,26 +734,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *get_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; + struct mountpoint *mp, *new = NULL; int ret; - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) + if (d_mountpoint(dentry)) { +mountpoint: + read_seqlock_excl(&mount_lock); + mp = lookup_mountpoint(dentry); + read_sequnlock_excl(&mount_lock); + if (mp) + goto done; + } + + if (!new) + new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); + + /* Exactly one processes may set d_mounted */ ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); - } - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); + /* Someone else set d_mounted? */ + if (ret == -EBUSY) + goto mountpoint; + + /* The dentry is not available as a mountpoint? */ + mp = ERR_PTR(ret); + if (ret) + goto done; + + /* Add the new mountpoint to the hash table */ + read_seqlock_excl(&mount_lock); + new->m_dentry = dentry; + new->m_count = 1; + hlist_add_head(&new->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&new->m_list); + read_sequnlock_excl(&mount_lock); + + mp = new; + new = NULL; +done: + kfree(new); return mp; } @@ -855,6 +870,13 @@ void mnt_set_mountpoint(struct mount *mnt, hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); } +static void __attach_mnt(struct mount *mnt, struct mount *parent) +{ + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); +} + /* * vfsmount lock must be held for write */ @@ -863,28 +885,45 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + __attach_mnt(mnt, parent); } -static void attach_shadowed(struct mount *mnt, - struct mount *parent, - struct mount *shadows) +void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) { - if (shadows) { - hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); - list_add(&mnt->mnt_child, &shadows->mnt_child); - } else { - hlist_add_head_rcu(&mnt->mnt_hash, - m_hash(&parent->mnt, mnt->mnt_mountpoint)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - } + struct mountpoint *old_mp = mnt->mnt_mp; + struct dentry *old_mountpoint = mnt->mnt_mountpoint; + struct mount *old_parent = mnt->mnt_parent; + + list_del_init(&mnt->mnt_child); + hlist_del_init(&mnt->mnt_mp_list); + hlist_del_init_rcu(&mnt->mnt_hash); + + attach_mnt(mnt, parent, mp); + + put_mountpoint(old_mp); + + /* + * Safely avoid even the suggestion this code might sleep or + * lock the mount hash by taking advantage of the knowledge that + * mnt_change_mountpoint will not release the final reference + * to a mountpoint. + * + * During mounting, the mount passed in as the parent mount will + * continue to use the old mountpoint and during unmounting, the + * old mountpoint will continue to exist until namespace_unlock, + * which happens well after mnt_change_mountpoint. + */ + spin_lock(&old_mountpoint->d_lock); + old_mountpoint->d_lockref.count--; + spin_unlock(&old_mountpoint->d_lock); + + mnt_add_count(old_parent, -1); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt, struct mount *shadows) +static void commit_tree(struct mount *mnt) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -899,7 +938,10 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); - attach_shadowed(mnt, parent, shadows); + n->mounts += n->pending_mounts; + n->pending_mounts = 0; + + __attach_mnt(mnt, parent); touch_mnt_namespace(n); } @@ -985,7 +1027,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; @@ -1090,12 +1133,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); @@ -1419,11 +1477,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) propagate_umount(&tmp_list); while (!list_empty(&tmp_list)) { + struct mnt_namespace *ns; bool disconnect; p = list_first_entry(&tmp_list, struct mount, mnt_list); list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); - __touch_mnt_namespace(p->mnt_ns); + ns = p->mnt_ns; + if (ns) { + ns->mounts--; + __touch_mnt_namespace(ns); + } p->mnt_ns = NULL; if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; @@ -1521,8 +1584,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1536,6 +1604,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1557,11 +1626,12 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); + event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { @@ -1570,9 +1640,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -1617,7 +1687,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) @@ -1693,11 +1763,16 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, continue; for (s = r; s; s = next_mnt(s, r)) { - struct mount *t = NULL; if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { @@ -1715,14 +1790,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, goto out; lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); - mnt_set_mountpoint(parent, p->mnt_mp, q); - if (!list_empty(&parent->mnt_mounts)) { - t = list_last_entry(&parent->mnt_mounts, - struct mount, mnt_child); - if (t->mnt_mp != p->mnt_mp) - t = NULL; - } - attach_shadowed(q, parent, t); + attach_mnt(q, parent, p->mnt_mp); unlock_mount_hash(); } } @@ -1757,7 +1825,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } @@ -1831,6 +1899,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse) return 0; } +int count_mounts(struct mnt_namespace *ns, struct mount *mnt) +{ + unsigned int max = READ_ONCE(sysctl_mount_max); + unsigned int mounts = 0, old, pending, sum; + struct mount *p; + + for (p = mnt; p; p = next_mnt(p, mnt)) + mounts++; + + old = ns->mounts; + pending = ns->pending_mounts; + sum = old + pending; + if ((old > sum) || + (pending > sum) || + (max < sum) || + (mounts > (max - sum))) + return -ENOSPC; + + ns->pending_mounts = pending + mounts; + return 0; +} + /* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached @@ -1900,10 +1990,26 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct path *parent_path) { HLIST_HEAD(tree_list); + struct mnt_namespace *ns = dest_mnt->mnt_ns; + struct mountpoint *smp; struct mount *child, *p; struct hlist_node *n; int err; + /* Preallocate a mountpoint in case the new mounts need + * to be tucked under other mounts. + */ + smp = get_mountpoint(source_mnt->mnt.mnt_root); + if (IS_ERR(smp)) + return PTR_ERR(smp); + + /* Is there space to add these mounts to the mount namespace? */ + if (!parent_path) { + err = count_mounts(ns, source_mnt); + if (err) + goto out; + } + if (IS_MNT_SHARED(dest_mnt)) { err = invent_group_ids(source_mnt, true); if (err) @@ -1923,16 +2029,19 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt, NULL); + commit_tree(source_mnt); } hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt_last(&child->mnt_parent->mnt, - child->mnt_mountpoint); - commit_tree(child, q); + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); + commit_tree(child); } + put_mountpoint(smp); unlock_mount_hash(); return 0; @@ -1940,11 +2049,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, out_cleanup_ids: while (!hlist_empty(&tree_list)) { child = hlist_entry(tree_list.first, struct mount, mnt_hash); + child->mnt_parent->mnt_ns->pending_mounts = 0; umount_tree(child, UMOUNT_SYNC); } unlock_mount_hash(); cleanup_group_ids(source_mnt, NULL); out: + ns->pending_mounts = 0; + + read_seqlock_excl(&mount_lock); + put_mountpoint(smp); + read_sequnlock_excl(&mount_lock); + return err; } @@ -1961,9 +2077,7 @@ retry: namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); + struct mountpoint *mp = get_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); mutex_unlock(&dentry->d_inode->i_mutex); @@ -1982,7 +2096,11 @@ retry: static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); mutex_unlock(&dentry->d_inode->i_mutex); } @@ -2401,8 +2519,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } @@ -2766,6 +2886,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) init_waitqueue_head(&new_ns->poll); new_ns->event = 0; new_ns->user_ns = get_user_ns(user_ns); + new_ns->mounts = 0; + new_ns->pending_mounts = 0; return new_ns; } @@ -2815,6 +2937,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, q = new; while (p) { q->mnt_ns = new_ns; + new_ns->mounts++; if (new_fs) { if (&p->mnt == new_fs->root.mnt) { new_fs->root.mnt = mntget(&q->mnt); @@ -2853,6 +2976,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; new_ns->root = mnt; + new_ns->mounts++; list_add(&mnt->mnt_list, &new_ns->list); } else { mntput(m); @@ -3052,9 +3176,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); /* A moved mount should not expire automatically */ list_del_init(&new_mnt->mnt_expire); + put_mountpoint(root_mp); unlock_mount_hash(); chroot_fs_refs(&root, &new); - put_mountpoint(root_mp); error = 0; out4: unlock_mount(old_mp); @@ -3236,6 +3360,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC) mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC); + /* Don't miss readonly hidden in the superblock flags */ + if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + /* Verify the mount flags are equal to or more permissive * than the proposed new mount. */ @@ -3262,7 +3390,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode))