4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
8 * - filesystem drivers list
10 * - umount system call
13 * GK 2/5/95 - Changed to support mounting the root fs via NFS
15 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
16 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
17 * Added options to /proc/mounts:
18 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
19 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
20 * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
23 #include <linux/config.h>
24 #include <linux/slab.h>
25 #include <linux/locks.h>
26 #include <linux/smp_lock.h>
27 #include <linux/devfs_fs_kernel.h>
28 #include <linux/major.h>
29 #include <linux/acct.h>
30 #include <linux/quotaops.h>
32 #include <asm/uaccess.h>
34 #include <linux/kmod.h>
35 #define __NO_VERSION__
36 #include <linux/module.h>
38 LIST_HEAD(super_blocks);
39 spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
42 * Handling of filesystem drivers list.
44 * Inclusion to/removals from/scanning of list are protected by spinlock.
45 * During the unload module must call unregister_filesystem().
46 * We can access the fields of list element if:
47 * 1) spinlock is held or
48 * 2) we hold the reference to the module.
49 * The latter can be guaranteed by call of try_inc_mod_count(); if it
50 * returned 0 we must skip the element, otherwise we got the reference.
51 * Once the reference is obtained we can drop the spinlock.
54 static struct file_system_type *file_systems;
55 static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
57 /* WARNING: This can be used only if we _already_ own a reference */
58 static void get_filesystem(struct file_system_type *fs)
61 __MOD_INC_USE_COUNT(fs->owner);
64 static void put_filesystem(struct file_system_type *fs)
67 __MOD_DEC_USE_COUNT(fs->owner);
70 static struct file_system_type **find_filesystem(const char *name)
72 struct file_system_type **p;
73 for (p=&file_systems; *p; p=&(*p)->next)
74 if (strcmp((*p)->name,name) == 0)
80 * register_filesystem - register a new filesystem
81 * @fs: the file system structure
83 * Adds the file system passed to the list of file systems the kernel
84 * is aware of for mount and other syscalls. Returns 0 on success,
85 * or a negative errno code on an error.
87 * The &struct file_system_type that is passed is linked into the kernel
88 * structures and must not be freed until the file system has been
92 int register_filesystem(struct file_system_type * fs)
95 struct file_system_type ** p;
101 INIT_LIST_HEAD(&fs->fs_supers);
102 write_lock(&file_systems_lock);
103 p = find_filesystem(fs->name);
108 write_unlock(&file_systems_lock);
113 * unregister_filesystem - unregister a file system
114 * @fs: filesystem to unregister
116 * Remove a file system that was previously successfully registered
117 * with the kernel. An error is returned if the file system is not found.
118 * Zero is returned on a success.
120 * Once this function has returned the &struct file_system_type structure
121 * may be freed or reused.
124 int unregister_filesystem(struct file_system_type * fs)
126 struct file_system_type ** tmp;
128 write_lock(&file_systems_lock);
134 write_unlock(&file_systems_lock);
139 write_unlock(&file_systems_lock);
143 static int fs_index(const char * __name)
145 struct file_system_type * tmp;
149 name = getname(__name);
155 read_lock(&file_systems_lock);
156 for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
157 if (strcmp(tmp->name,name) == 0) {
162 read_unlock(&file_systems_lock);
167 static int fs_name(unsigned int index, char * buf)
169 struct file_system_type * tmp;
172 read_lock(&file_systems_lock);
173 for (tmp = file_systems; tmp; tmp = tmp->next, index--)
174 if (index <= 0 && try_inc_mod_count(tmp->owner))
176 read_unlock(&file_systems_lock);
180 /* OK, we got the reference, so we can safely block */
181 len = strlen(tmp->name) + 1;
182 res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
187 static int fs_maxindex(void)
189 struct file_system_type * tmp;
192 read_lock(&file_systems_lock);
193 for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
195 read_unlock(&file_systems_lock);
200 * Whee.. Weird sysv syscall.
202 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
204 int retval = -EINVAL;
208 retval = fs_index((const char *) arg1);
212 retval = fs_name(arg1, (char *) arg2);
216 retval = fs_maxindex();
222 int get_filesystem_list(char * buf)
225 struct file_system_type * tmp;
227 read_lock(&file_systems_lock);
229 while (tmp && len < PAGE_SIZE - 80) {
230 len += sprintf(buf+len, "%s\t%s\n",
231 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
235 read_unlock(&file_systems_lock);
239 struct file_system_type *get_fs_type(const char *name)
241 struct file_system_type *fs;
243 read_lock(&file_systems_lock);
244 fs = *(find_filesystem(name));
245 if (fs && !try_inc_mod_count(fs->owner))
247 read_unlock(&file_systems_lock);
248 if (!fs && (request_module(name) == 0)) {
249 read_lock(&file_systems_lock);
250 fs = *(find_filesystem(name));
251 if (fs && !try_inc_mod_count(fs->owner))
253 read_unlock(&file_systems_lock);
259 * alloc_super - create new superblock
261 * Allocates and initializes a new &struct super_block. alloc_super()
262 * returns a pointer new superblock or %NULL if allocation had failed.
264 static struct super_block *alloc_super(void)
266 static struct super_operations empty_sops = {};
267 struct super_block *s = kmalloc(sizeof(struct super_block), GFP_USER);
269 memset(s, 0, sizeof(struct super_block));
270 INIT_LIST_HEAD(&s->s_dirty);
271 INIT_LIST_HEAD(&s->s_locked_inodes);
272 INIT_LIST_HEAD(&s->s_files);
273 INIT_LIST_HEAD(&s->s_instances);
274 init_rwsem(&s->s_umount);
275 sema_init(&s->s_lock, 1);
276 down_write(&s->s_umount);
278 atomic_set(&s->s_active, 1);
279 sema_init(&s->s_vfs_rename_sem,1);
280 sema_init(&s->s_nfsd_free_path_sem,1);
281 sema_init(&s->s_dquot.dqio_sem, 1);
282 sema_init(&s->s_dquot.dqoff_sem, 1);
283 s->s_maxbytes = MAX_NON_LFS;
284 s->s_op = &empty_sops;
285 s->dq_op = sb_dquot_ops;
286 s->s_qcop = sb_quotactl_ops;
292 * destroy_super - frees a superblock
293 * @s: superblock to free
295 * Frees a superblock.
297 static inline void destroy_super(struct super_block *s)
302 /* Superblock refcounting */
305 * deactivate_super - turn an active reference into temporary
306 * @s: superblock to deactivate
308 * Turns an active reference into temporary one. Returns 0 if there are
309 * other active references, 1 if we had deactivated the last one.
311 static inline int deactivate_super(struct super_block *s)
313 if (!atomic_dec_and_lock(&s->s_active, &sb_lock))
315 s->s_count -= S_BIAS-1;
316 spin_unlock(&sb_lock);
321 * put_super - drop a temporary reference to superblock
322 * @s: superblock in question
324 * Drops a temporary reference, frees superblock if there's no
327 static inline void put_super(struct super_block *s)
332 spin_unlock(&sb_lock);
336 * grab_super - acquire an active reference
337 * @s - reference we are trying to make active
339 * Tries to acquire an active reference. grab_super() is used when we
340 * had just found a superblock in super_blocks or fs_type->fs_supers
341 * and want to turn it into a full-blown active reference. grab_super()
342 * is called with sb_lock held and drops it. Returns 1 in case of
343 * success, 0 if we had failed (superblock contents was already dead or
344 * dying when grab_super() had been called).
346 static int grab_super(struct super_block *s)
349 spin_unlock(&sb_lock);
350 down_write(&s->s_umount);
353 if (s->s_count > S_BIAS) {
354 atomic_inc(&s->s_active);
356 spin_unlock(&sb_lock);
359 spin_unlock(&sb_lock);
361 up_write(&s->s_umount);
367 * insert_super - put superblock on the lists
368 * @s: superblock in question
369 * @type: filesystem type it will belong to
371 * Associates superblock with fs type and puts it on per-type and global
372 * superblocks' lists. Should be called with sb_lock held; drops it.
374 static void insert_super(struct super_block *s, struct file_system_type *type)
377 list_add(&s->s_list, super_blocks.prev);
378 list_add(&s->s_instances, &type->fs_supers);
379 spin_unlock(&sb_lock);
380 get_filesystem(type);
383 static void put_anon_dev(kdev_t dev);
386 * remove_super - makes superblock unreachable
387 * @s: superblock in question
389 * Removes superblock from the lists, unlocks it, drop the reference
390 * and releases the hosting device. @s should have no active
391 * references by that time and after remove_super() it's essentially
392 * in rundown mode - all remaining references are temporary, no new
393 * reference of any sort are going to appear and all holders of
394 * temporary ones will eventually drop them. At that point superblock
395 * itself will be destroyed; all its contents is already gone.
397 static void remove_super(struct super_block *s)
399 kdev_t dev = s->s_dev;
400 struct block_device *bdev = s->s_bdev;
401 struct file_system_type *fs = s->s_type;
404 list_del(&s->s_list);
405 list_del(&s->s_instances);
406 spin_unlock(&sb_lock);
407 up_write(&s->s_umount);
411 blkdev_put(bdev, BDEV_FS);
416 struct vfsmount *alloc_vfsmnt(char *name);
417 void free_vfsmnt(struct vfsmount *mnt);
419 static inline struct super_block * find_super(kdev_t dev)
423 list_for_each(p, &super_blocks) {
424 struct super_block * s = sb_entry(p);
425 if (s->s_dev == dev) {
433 void drop_super(struct super_block *sb)
435 up_read(&sb->s_umount);
439 static inline void write_super(struct super_block *sb)
442 if (sb->s_root && sb->s_dirt)
443 if (sb->s_op && sb->s_op->write_super)
444 sb->s_op->write_super(sb);
449 * Note: check the dirty flag before waiting, so we don't
450 * hold up the sync while mounting a device. (The newly
451 * mounted device won't need syncing.)
453 void sync_supers(kdev_t dev, int wait)
455 struct super_block * sb;
462 if (wait && sb->s_op && sb->s_op->sync_fs)
463 sb->s_op->sync_fs(sb);
470 sb = sb_entry(super_blocks.next);
471 while (sb != sb_entry(&super_blocks))
474 spin_unlock(&sb_lock);
475 down_read(&sb->s_umount);
477 if (wait && sb->s_root && sb->s_op && sb->s_op->sync_fs)
478 sb->s_op->sync_fs(sb);
482 sb = sb_entry(sb->s_list.next);
483 spin_unlock(&sb_lock);
487 * get_super - get the superblock of a device
488 * @dev: device to get the superblock for
490 * Scans the superblock list and finds the superblock of the file system
491 * mounted on the device given. %NULL is returned if no match is found.
494 struct super_block * get_super(kdev_t dev)
496 struct super_block * s;
504 spin_unlock(&sb_lock);
505 down_read(&s->s_umount);
511 spin_unlock(&sb_lock);
515 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
517 struct super_block *s;
522 s = get_super(to_kdev_t(dev));
525 err = vfs_statfs(s, &sbuf);
530 memset(&tmp,0,sizeof(struct ustat));
531 tmp.f_tfree = sbuf.f_bfree;
532 tmp.f_tinode = sbuf.f_ffree;
534 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
540 * do_remount_sb - asks filesystem to change mount options.
541 * @sb: superblock in question
542 * @flags: numeric part of options
543 * @data: the rest of options
545 * Alters the mount options of a mounted file system.
547 int do_remount_sb(struct super_block *sb, int flags, void *data)
551 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
553 /*flags |= MS_RDONLY;*/
554 if (flags & MS_RDONLY)
555 acct_auto_close(sb->s_dev);
556 shrink_dcache_sb(sb);
558 /* If we are remounting RDONLY, make sure there are no rw files open */
559 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
560 if (!fs_may_remount_ro(sb))
562 if (sb->s_op && sb->s_op->remount_fs) {
564 retval = sb->s_op->remount_fs(sb, &flags, data);
569 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
574 * Unnamed block devices are dummy devices used by virtual
575 * filesystems which don't use real block-devices. -- jrs
578 enum {Max_anon = 256};
579 static unsigned long unnamed_dev_in_use[Max_anon/(8*sizeof(unsigned long))];
580 static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
583 * put_anon_dev - release anonymous device number.
584 * @dev: device in question
586 static void put_anon_dev(kdev_t dev)
588 spin_lock(&unnamed_dev_lock);
589 clear_bit(MINOR(dev), unnamed_dev_in_use);
590 spin_unlock(&unnamed_dev_lock);
594 * get_anon_super - allocate a superblock for non-device fs
595 * @type: filesystem type
596 * @compare: check if existing superblock is what we want
597 * @data: argument for @compare.
599 * get_anon_super is a helper for non-blockdevice filesystems.
600 * It either finds and returns one of the superblocks of given type
601 * (if it can find one that would satisfy caller) or creates a new
602 * one. In the either case we return an active reference to superblock
603 * with ->s_umount locked. If superblock is new it gets a new
604 * anonymous device allocated for it and is inserted into lists -
605 * other initialization is left to caller.
607 * Rather than duplicating all that logics every time when
608 * we want something that doesn't fit "nodev" and "single" we pull
609 * the relevant code into common helper and let get_sb_...() call
612 * NB: get_sb_...() is going to become an fs type method, with
613 * current ->read_super() becoming a callback used by common instances.
615 struct super_block *get_anon_super(struct file_system_type *type,
616 int (*compare)(struct super_block *,void *), void *data)
618 struct super_block *s = alloc_super();
623 return ERR_PTR(-ENOMEM);
627 if (compare) list_for_each(p, &type->fs_supers) {
628 struct super_block *old;
629 old = list_entry(p, struct super_block, s_instances);
630 if (!compare(old, data))
632 if (!grab_super(old))
638 spin_lock(&unnamed_dev_lock);
639 dev = find_first_zero_bit(unnamed_dev_in_use, Max_anon);
640 if (dev == Max_anon) {
641 spin_unlock(&unnamed_dev_lock);
642 spin_unlock(&sb_lock);
644 return ERR_PTR(-EMFILE);
646 set_bit(dev, unnamed_dev_in_use);
647 spin_unlock(&unnamed_dev_lock);
650 insert_super(s, type);
654 static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
655 int flags, char *dev_name, void * data)
658 struct block_device *bdev;
659 struct block_device_operations *bdops;
661 struct super_block * s;
666 mode_t mode = FMODE_READ; /* we always need it ;-) */
668 /* What device it is? */
669 if (!dev_name || !*dev_name)
670 return ERR_PTR(-EINVAL);
671 error = path_lookup(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
673 return ERR_PTR(error);
674 inode = nd.dentry->d_inode;
676 if (!S_ISBLK(inode->i_mode))
679 if (nd.mnt->mnt_flags & MNT_NODEV)
682 bdev = inode->i_bdev;
683 de = devfs_get_handle_from_inode (inode);
684 bdops = devfs_get_ops (de); /* Increments module use count */
685 if (bdops) bdev->bd_op = bdops;
686 /* Done with lookups, semaphore down */
687 dev = to_kdev_t(bdev->bd_dev);
688 if (!(flags & MS_RDONLY))
690 error = blkdev_get(bdev, mode, 0, BDEV_FS);
691 devfs_put_ops (de); /* Decrement module use count now we're safe */
694 check_disk_change(dev);
696 if (!(flags & MS_RDONLY) && is_read_only(dev))
708 list_for_each(p, &super_blocks) {
709 struct super_block *old = sb_entry(p);
710 if (old->s_dev != dev)
712 if (old->s_type != fs_type ||
713 ((flags ^ old->s_flags) & MS_RDONLY)) {
714 spin_unlock(&sb_lock);
718 if (!grab_super(old))
721 blkdev_put(bdev, BDEV_FS);
728 insert_super(s, fs_type);
729 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
731 s->s_flags |= MS_ACTIVE;
741 blkdev_put(bdev, BDEV_FS);
744 return ERR_PTR(error);
747 static struct super_block *get_sb_nodev(struct file_system_type *fs_type,
748 int flags, char *dev_name, void *data)
750 struct super_block *s = get_anon_super(fs_type, NULL, NULL);
756 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
759 return ERR_PTR(-EINVAL);
761 s->s_flags |= MS_ACTIVE;
765 static int compare_single(struct super_block *s, void *p)
770 static struct super_block *get_sb_single(struct file_system_type *fs_type,
771 int flags, char *dev_name, void *data)
773 struct super_block *s = get_anon_super(fs_type, compare_single, NULL);
779 if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
782 return ERR_PTR(-EINVAL);
784 s->s_flags |= MS_ACTIVE;
786 do_remount_sb(s, flags, data);
791 do_kern_mount(const char *fstype, int flags, char *name, void *data)
793 struct file_system_type *type = get_fs_type(fstype);
794 struct super_block *sb = ERR_PTR(-ENOMEM);
795 struct vfsmount *mnt;
798 return ERR_PTR(-ENODEV);
800 mnt = alloc_vfsmnt(name);
803 if (type->fs_flags & FS_REQUIRES_DEV)
804 sb = get_sb_bdev(type, flags, name, data);
805 else if (type->fs_flags & FS_SINGLE)
806 sb = get_sb_single(type, flags, name, data);
808 sb = get_sb_nodev(type, flags, name, data);
811 if (type->fs_flags & FS_NOMOUNT)
812 sb->s_flags |= MS_NOUSER;
814 mnt->mnt_root = dget(sb->s_root);
815 mnt->mnt_mountpoint = sb->s_root;
816 mnt->mnt_parent = mnt;
817 up_write(&sb->s_umount);
818 put_filesystem(type);
823 put_filesystem(type);
824 return (struct vfsmount *)sb;
827 void kill_super(struct super_block *sb)
829 struct dentry *root = sb->s_root;
830 struct file_system_type *fs = sb->s_type;
831 struct super_operations *sop = sb->s_op;
833 if (!deactivate_super(sb))
836 down_write(&sb->s_umount);
838 /* Need to clean after the sucker */
839 if (fs->fs_flags & FS_LITTER)
841 shrink_dcache_parent(root);
846 sb->s_flags &= ~MS_ACTIVE;
847 invalidate_inodes(sb); /* bad name - it should be evict_inodes() */
849 if (sop->write_super && sb->s_dirt)
850 sop->write_super(sb);
855 /* Forget any remaining inodes */
856 if (invalidate_inodes(sb)) {
857 printk(KERN_ERR "VFS: Busy inodes after unmount. "
858 "Self-destruct in 5 seconds. Have a nice day...\n");
866 struct vfsmount *kern_mount(struct file_system_type *type)
868 return do_kern_mount(type->name, 0, (char *)type->name, NULL);