OSDN Git Service

[PATCH] x86_64: Make sure to validate all 64bits of ptrace information
[linux-kernel-docs/linux-2.4.36.git] / mm / shmem.c
1 /*
2  * Resizable virtual memory filesystem for Linux.
3  *
4  * Copyright (C) 2000 Linus Torvalds.
5  *               2000 Transmeta Corp.
6  *               2000-2001 Christoph Rohland
7  *               2000-2001 SAP AG
8  *               2002 Red Hat Inc.
9  * Copyright (C) 2002-2003 Hugh Dickins.
10  * Copyright (C) 2002-2003 VERITAS Software Corporation.
11  *
12  * This file is released under the GPL.
13  */
14
15 /*
16  * This virtual memory filesystem is heavily based on the ramfs. It
17  * extends ramfs by the ability to use swap and honor resource limits
18  * which makes it a completely usable filesystem.
19  */
20
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/devfs_fs_kernel.h>
25 #include <linux/fs.h>
26 #include <linux/mm.h>
27 #include <linux/file.h>
28 #include <linux/swap.h>
29 #include <linux/pagemap.h>
30 #include <linux/string.h>
31 #include <linux/locks.h>
32 #include <linux/smp_lock.h>
33
34 #include <asm/uaccess.h>
35 #include <asm/div64.h>
36
37 /* This magic number is used in glibc for posix shared memory */
38 #define TMPFS_MAGIC     0x01021994
39
40 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
41 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
42 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
43
44 #define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
45 #define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
46
47 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
48
49 /* info->flags needs VM_flags to handle pagein/truncate race efficiently */
50 #define SHMEM_PAGEIN     VM_READ
51 #define SHMEM_TRUNCATE   VM_WRITE
52
53 /* Pretend that each entry is of this size in directory's i_size */
54 #define BOGO_DIRENT_SIZE 20
55
56 #define SHMEM_SB(sb) (&sb->u.shmem_sb)
57
58 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
59 enum sgp_type {
60         SGP_READ,       /* don't exceed i_size, don't allocate page */
61         SGP_CACHE,      /* don't exceed i_size, may allocate page */
62         SGP_WRITE,      /* may exceed i_size, may allocate page */
63 };
64
65 static int shmem_getpage(struct inode *inode, unsigned long idx,
66                          struct page **pagep, enum sgp_type sgp);
67
68 static struct super_operations shmem_ops;
69 static struct address_space_operations shmem_aops;
70 static struct file_operations shmem_file_operations;
71 static struct inode_operations shmem_inode_operations;
72 static struct inode_operations shmem_dir_inode_operations;
73 static struct vm_operations_struct shmem_vm_ops;
74
75 LIST_HEAD(shmem_inodes);
76 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
77
78 static void shmem_free_block(struct inode *inode)
79 {
80         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
81         spin_lock(&sbinfo->stat_lock);
82         sbinfo->free_blocks++;
83         inode->i_blocks -= BLOCKS_PER_PAGE;
84         spin_unlock(&sbinfo->stat_lock);
85 }
86
87 static void shmem_removepage(struct page *page)
88 {
89         if (!PageLaunder(page) && !PageError(page))
90                 shmem_free_block(page->mapping->host);
91 }
92
93 /*
94  * shmem_swp_entry - find the swap vector position in the info structure
95  *
96  * @info:  info structure for the inode
97  * @index: index of the page to find
98  * @page:  optional page to add to the structure. Has to be preset to
99  *         all zeros
100  *
101  * If there is no space allocated yet it will return NULL when
102  * page is 0, else it will use the page for the needed block,
103  * setting it to 0 on return to indicate that it has been used.
104  *
105  * The swap vector is organized the following way:
106  *
107  * There are SHMEM_NR_DIRECT entries directly stored in the
108  * shmem_inode_info structure. So small files do not need an addional
109  * allocation.
110  *
111  * For pages with index > SHMEM_NR_DIRECT there is the pointer
112  * i_indirect which points to a page which holds in the first half
113  * doubly indirect blocks, in the second half triple indirect blocks:
114  *
115  * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
116  * following layout (for SHMEM_NR_DIRECT == 16):
117  *
118  * i_indirect -> dir --> 16-19
119  *            |      +-> 20-23
120  *            |
121  *            +-->dir2 --> 24-27
122  *            |        +-> 28-31
123  *            |        +-> 32-35
124  *            |        +-> 36-39
125  *            |
126  *            +-->dir3 --> 40-43
127  *                     +-> 44-47
128  *                     +-> 48-51
129  *                     +-> 52-55
130  */
131 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page)
132 {
133         unsigned long offset;
134         void **dir;
135
136         if (index < SHMEM_NR_DIRECT)
137                 return info->i_direct+index;
138         if (!info->i_indirect) {
139                 if (page) {
140                         info->i_indirect = (void **) *page;
141                         *page = 0;
142                 }
143                 return NULL;                    /* need another page */
144         }
145
146         index -= SHMEM_NR_DIRECT;
147         offset = index % ENTRIES_PER_PAGE;
148         index /= ENTRIES_PER_PAGE;
149         dir = info->i_indirect;
150
151         if (index >= ENTRIES_PER_PAGE/2) {
152                 index -= ENTRIES_PER_PAGE/2;
153                 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
154                 index %= ENTRIES_PER_PAGE;
155                 if (!*dir) {
156                         if (page) {
157                                 *dir = (void *) *page;
158                                 *page = 0;
159                         }
160                         return NULL;            /* need another page */
161                 }
162                 dir = (void **) *dir;
163         }
164
165         dir += index;
166         if (!*dir) {
167                 if (!page || !*page)
168                         return NULL;            /* need a page */
169                 *dir = (void *) *page;
170                 *page = 0;
171         }
172         return (swp_entry_t *) *dir + offset;
173 }
174
175 /*
176  * shmem_swp_alloc - get the position of the swap entry for the page.
177  *                   If it does not exist allocate the entry.
178  *
179  * @info:       info structure for the inode
180  * @index:      index of the page to find
181  * @sgp:        check and recheck i_size? skip allocation?
182  */
183 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
184 {
185         struct inode *inode = info->inode;
186         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
187         unsigned long page = 0;
188         swp_entry_t *entry;
189         static const swp_entry_t unswapped = {0};
190
191         if (sgp != SGP_WRITE &&
192             ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
193                 return ERR_PTR(-EINVAL);
194
195         while (!(entry = shmem_swp_entry(info, index, &page))) {
196                 if (sgp == SGP_READ)
197                         return (swp_entry_t *) &unswapped;
198                 /*
199                  * Test free_blocks against 1 not 0, since we have 1 data
200                  * page (and perhaps indirect index pages) yet to allocate:
201                  * a waste to allocate index if we cannot allocate data.
202                  */
203                 spin_lock(&sbinfo->stat_lock);
204                 if (sbinfo->free_blocks <= 1) {
205                         spin_unlock(&sbinfo->stat_lock);
206                         return ERR_PTR(-ENOSPC);
207                 }
208                 sbinfo->free_blocks--;
209                 inode->i_blocks += BLOCKS_PER_PAGE;
210                 spin_unlock(&sbinfo->stat_lock);
211
212                 spin_unlock(&info->lock);
213                 page = get_zeroed_page(GFP_USER);
214                 spin_lock(&info->lock);
215
216                 if (!page) {
217                         shmem_free_block(inode);
218                         return ERR_PTR(-ENOMEM);
219                 }
220                 if (sgp != SGP_WRITE &&
221                     ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
222                         entry = ERR_PTR(-EINVAL);
223                         break;
224                 }
225                 if (info->next_index <= index)
226                         info->next_index = index + 1;
227         }
228         if (page) {
229                 /* another task gave its page, or truncated the file */
230                 shmem_free_block(inode);
231                 free_page(page);
232         }
233         if (info->next_index <= index && !IS_ERR(entry))
234                 info->next_index = index + 1;
235         return entry;
236 }
237
238 /*
239  * shmem_free_swp - free some swap entries in a directory
240  *
241  * @dir:   pointer to the directory
242  * @edir:  pointer after last entry of the directory
243  */
244 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
245 {
246         swp_entry_t *ptr;
247         int freed = 0;
248
249         for (ptr = dir; ptr < edir; ptr++) {
250                 if (ptr->val) {
251                         free_swap_and_cache(*ptr);
252                         *ptr = (swp_entry_t){0};
253                         freed++;
254                 }
255         }
256         return freed;
257 }
258
259 /*
260  * shmem_truncate_direct - free the swap entries of a whole doubly
261  *                         indirect block
262  *
263  * @info:       the info structure of the inode
264  * @dir:        pointer to the pointer to the block
265  * @start:      offset to start from (in pages)
266  * @len:        how many pages are stored in this block
267  */
268 static inline unsigned long
269 shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
270 {
271         swp_entry_t **last, **ptr;
272         unsigned long off, freed_swp, freed = 0;
273
274         last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE;
275         off = start % ENTRIES_PER_PAGE;
276
277         for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
278                 if (!*ptr)
279                         continue;
280
281                 if (info->swapped) {
282                         freed_swp = shmem_free_swp(*ptr + off,
283                                                 *ptr + ENTRIES_PER_PAGE);
284                         info->swapped -= freed_swp;
285                         freed += freed_swp;
286                 }
287
288                 if (!off) {
289                         freed++;
290                         free_page((unsigned long) *ptr);
291                         *ptr = 0;
292                 }
293         }
294
295         if (!start) {
296                 freed++;
297                 free_page((unsigned long) *dir);
298                 *dir = 0;
299         }
300         return freed;
301 }
302
303 /*
304  * shmem_truncate_indirect - truncate an inode
305  *
306  * @info:  the info structure of the inode
307  * @index: the index to truncate
308  *
309  * This function locates the last doubly indirect block and calls
310  * then shmem_truncate_direct to do the real work
311  */
312 static inline unsigned long
313 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
314 {
315         swp_entry_t ***base;
316         unsigned long baseidx, start;
317         unsigned long len = info->next_index;
318         unsigned long freed;
319
320         if (len <= SHMEM_NR_DIRECT) {
321                 info->next_index = index;
322                 if (!info->swapped)
323                         return 0;
324                 freed = shmem_free_swp(info->i_direct + index,
325                                         info->i_direct + len);
326                 info->swapped -= freed;
327                 return freed;
328         }
329
330         if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) {
331                 len -= SHMEM_NR_DIRECT;
332                 base = (swp_entry_t ***) &info->i_indirect;
333                 baseidx = SHMEM_NR_DIRECT;
334         } else {
335                 len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
336                 BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2);
337                 baseidx = len - 1;
338                 baseidx -= baseidx % ENTRIES_PER_PAGEPAGE;
339                 base = (swp_entry_t ***) info->i_indirect +
340                         ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE;
341                 len -= baseidx;
342                 baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
343         }
344
345         if (index > baseidx) {
346                 info->next_index = index;
347                 start = index - baseidx;
348         } else {
349                 info->next_index = baseidx;
350                 start = 0;
351         }
352         return *base? shmem_truncate_direct(info, base, start, len): 0;
353 }
354
355 static void shmem_truncate(struct inode *inode)
356 {
357         struct shmem_inode_info *info = SHMEM_I(inode);
358         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
359         unsigned long freed = 0;
360         unsigned long index;
361
362         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
363         index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
364         if (index >= info->next_index)
365                 return;
366
367         spin_lock(&info->lock);
368         while (index < info->next_index)
369                 freed += shmem_truncate_indirect(info, index);
370         BUG_ON(info->swapped > info->next_index);
371
372         if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
373                 /*
374                  * Call truncate_inode_pages again: racing shmem_unuse_inode
375                  * may have swizzled a page in from swap since vmtruncate or
376                  * generic_delete_inode did it, before we lowered next_index.
377                  * Also, though shmem_getpage checks i_size before adding to
378                  * cache, no recheck after: so fix the narrow window there too.
379                  */
380                 info->flags |= SHMEM_TRUNCATE;
381                 spin_unlock(&info->lock);
382                 truncate_inode_pages(inode->i_mapping, inode->i_size);
383                 spin_lock(&info->lock);
384                 info->flags &= ~SHMEM_TRUNCATE;
385         }
386
387         spin_unlock(&info->lock);
388         spin_lock(&sbinfo->stat_lock);
389         sbinfo->free_blocks += freed;
390         inode->i_blocks -= freed*BLOCKS_PER_PAGE;
391         spin_unlock(&sbinfo->stat_lock);
392 }
393
394 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
395 {
396         struct inode *inode = dentry->d_inode;
397         struct page *page = NULL;
398         int error;
399
400         if (attr->ia_valid & ATTR_SIZE) {
401                 if (attr->ia_size < inode->i_size) {
402                         /*
403                          * If truncating down to a partial page, then
404                          * if that page is already allocated, hold it
405                          * in memory until the truncation is over, so
406                          * truncate_partial_page cannnot miss it were
407                          * it assigned to swap.
408                          */
409                         if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
410                                 (void) shmem_getpage(inode,
411                                         attr->ia_size>>PAGE_CACHE_SHIFT,
412                                                 &page, SGP_READ);
413                         }
414                         /*
415                          * Reset SHMEM_PAGEIN flag so that shmem_truncate can
416                          * detect if any pages might have been added to cache
417                          * after truncate_inode_pages.  But we needn't bother
418                          * if it's being fully truncated to zero-length: the
419                          * nrpages check is efficient enough in that case.
420                          */
421                         if (attr->ia_size) {
422                                 struct shmem_inode_info *info = SHMEM_I(inode);
423                                 spin_lock(&info->lock);
424                                 info->flags &= ~SHMEM_PAGEIN;
425                                 spin_unlock(&info->lock);
426                         }
427                 }
428         }
429
430         error = inode_change_ok(inode, attr);
431         if (!error)
432                 error = inode_setattr(inode, attr);
433         if (page)
434                 page_cache_release(page);
435         return error;
436 }
437
438 static void shmem_delete_inode(struct inode *inode)
439 {
440         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
441         struct shmem_inode_info *info = SHMEM_I(inode);
442
443         if (inode->i_op->truncate == shmem_truncate) {
444                 spin_lock(&shmem_ilock);
445                 list_del(&info->list);
446                 spin_unlock(&shmem_ilock);
447                 inode->i_size = 0;
448                 shmem_truncate(inode);
449         }
450         BUG_ON(inode->i_blocks);
451         spin_lock(&sbinfo->stat_lock);
452         sbinfo->free_inodes++;
453         spin_unlock(&sbinfo->stat_lock);
454         clear_inode(inode);
455 }
456
457 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
458 {
459         swp_entry_t *ptr;
460
461         for (ptr = dir; ptr < edir; ptr++) {
462                 if (ptr->val == entry.val)
463                         return ptr - dir;
464         }
465         return -1;
466 }
467
468 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
469 {
470         struct inode *inode;
471         struct address_space *mapping;
472         swp_entry_t *ptr;
473         unsigned long idx;
474         int offset;
475
476         idx = 0;
477         ptr = info->i_direct;
478         spin_lock(&info->lock);
479         offset = info->next_index;
480         if (offset > SHMEM_NR_DIRECT)
481                 offset = SHMEM_NR_DIRECT;
482         offset = shmem_find_swp(entry, ptr, ptr + offset);
483         if (offset >= 0)
484                 goto found;
485
486         for (idx = SHMEM_NR_DIRECT; idx < info->next_index;
487              idx += ENTRIES_PER_PAGE) {
488                 ptr = shmem_swp_entry(info, idx, NULL);
489                 if (!ptr)
490                         continue;
491                 offset = info->next_index - idx;
492                 if (offset > ENTRIES_PER_PAGE)
493                         offset = ENTRIES_PER_PAGE;
494                 offset = shmem_find_swp(entry, ptr, ptr + offset);
495                 if (offset >= 0)
496                         goto found;
497         }
498         spin_unlock(&info->lock);
499         return 0;
500 found:
501         idx += offset;
502         inode = info->inode;
503         mapping = inode->i_mapping;
504         delete_from_swap_cache(page);
505         if (add_to_page_cache_unique(page,
506                         mapping, idx, page_hash(mapping, idx)) == 0) {
507                 info->flags |= SHMEM_PAGEIN;
508                 ptr[offset].val = 0;
509                 info->swapped--;
510         } else if (add_to_swap_cache(page, entry) != 0)
511                 BUG();
512         spin_unlock(&info->lock);
513         SetPageUptodate(page);
514         /*
515          * Decrement swap count even when the entry is left behind:
516          * try_to_unuse will skip over mms, then reincrement count.
517          */
518         swap_free(entry);
519         return 1;
520 }
521
522 /*
523  * shmem_unuse() search for an eventually swapped out shmem page.
524  */
525 int shmem_unuse(swp_entry_t entry, struct page *page)
526 {
527         struct list_head *p;
528         struct shmem_inode_info *info;
529         int found = 0;
530
531         spin_lock(&shmem_ilock);
532         list_for_each(p, &shmem_inodes) {
533                 info = list_entry(p, struct shmem_inode_info, list);
534
535                 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
536                         /* move head to start search for next from here */
537                         list_move_tail(&shmem_inodes, &info->list);
538                         found = 1;
539                         break;
540                 }
541         }
542         spin_unlock(&shmem_ilock);
543         return found;
544 }
545
546 /*
547  * Move the page from the page cache to the swap cache.
548  */
549 static int shmem_writepage(struct page *page)
550 {
551         struct shmem_inode_info *info;
552         swp_entry_t *entry, swap;
553         struct address_space *mapping;
554         unsigned long index;
555         struct inode *inode;
556
557         BUG_ON(!PageLocked(page));
558         if (!PageLaunder(page))
559                 goto fail;
560
561         mapping = page->mapping;
562         index = page->index;
563         inode = mapping->host;
564         info = SHMEM_I(inode);
565         if (info->flags & VM_LOCKED)
566                 goto fail;
567 getswap:
568         swap = get_swap_page();
569         if (!swap.val)
570                 goto fail;
571
572         spin_lock(&info->lock);
573         if (index >= info->next_index) {
574                 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
575                 spin_unlock(&info->lock);
576                 swap_free(swap);
577                 goto fail;
578         }
579         entry = shmem_swp_entry(info, index, NULL);
580         BUG_ON(!entry);
581         BUG_ON(entry->val);
582
583         /* Remove it from the page cache */
584         remove_inode_page(page);
585         page_cache_release(page);
586
587         /* Add it to the swap cache */
588         if (add_to_swap_cache(page, swap) != 0) {
589                 /*
590                  * Raced with "speculative" read_swap_cache_async.
591                  * Add page back to page cache, unref swap, try again.
592                  */
593                 add_to_page_cache_locked(page, mapping, index);
594                 info->flags |= SHMEM_PAGEIN;
595                 spin_unlock(&info->lock);
596                 swap_free(swap);
597                 goto getswap;
598         }
599
600         *entry = swap;
601         info->swapped++;
602         spin_unlock(&info->lock);
603         SetPageUptodate(page);
604         set_page_dirty(page);
605         UnlockPage(page);
606         return 0;
607 fail:
608         return fail_writepage(page);
609 }
610
611 /*
612  * shmem_getpage - either get the page from swap or allocate a new one
613  *
614  * If we allocate a new one we do not mark it dirty. That's up to the
615  * vm. If we swap it in we mark it dirty since we also free the swap
616  * entry since a page cannot live in both the swap and page cache
617  */
618 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
619 {
620         struct address_space *mapping = inode->i_mapping;
621         struct shmem_inode_info *info = SHMEM_I(inode);
622         struct shmem_sb_info *sbinfo;
623         struct page *filepage = *pagep;
624         struct page *swappage;
625         swp_entry_t *entry;
626         swp_entry_t swap;
627         int error = 0;
628
629         if (idx >= SHMEM_MAX_INDEX) {
630                 error = -EFBIG;
631                 goto failed;
632         }
633
634         /*
635          * Normally, filepage is NULL on entry, and either found
636          * uptodate immediately, or allocated and zeroed, or read
637          * in under swappage, which is then assigned to filepage.
638          * But shmem_readpage and shmem_prepare_write pass in a locked
639          * filepage, which may be found not uptodate by other callers
640          * too, and may need to be copied from the swappage read in.
641          */
642 repeat:
643         if (!filepage)
644                 filepage = find_lock_page(mapping, idx);
645         if (filepage && Page_Uptodate(filepage))
646                 goto done;
647
648         spin_lock(&info->lock);
649         entry = shmem_swp_alloc(info, idx, sgp);
650         if (IS_ERR(entry)) {
651                 spin_unlock(&info->lock);
652                 error = PTR_ERR(entry);
653                 goto failed;
654         }
655         swap = *entry;
656
657         if (swap.val) {
658                 /* Look it up and read it in.. */
659                 swappage = lookup_swap_cache(swap);
660                 if (!swappage) {
661                         spin_unlock(&info->lock);
662                         swapin_readahead(swap);
663                         swappage = read_swap_cache_async(swap);
664                         if (!swappage) {
665                                 spin_lock(&info->lock);
666                                 entry = shmem_swp_alloc(info, idx, sgp);
667                                 if (IS_ERR(entry))
668                                         error = PTR_ERR(entry);
669                                 else if (entry->val == swap.val)
670                                         error = -ENOMEM;
671                                 spin_unlock(&info->lock);
672                                 if (error)
673                                         goto failed;
674                                 goto repeat;
675                         }
676                         wait_on_page(swappage);
677                         page_cache_release(swappage);
678                         goto repeat;
679                 }
680
681                 /* We have to do this with page locked to prevent races */
682                 if (TryLockPage(swappage)) {
683                         spin_unlock(&info->lock);
684                         wait_on_page(swappage);
685                         page_cache_release(swappage);
686                         goto repeat;
687                 }
688                 if (!Page_Uptodate(swappage)) {
689                         spin_unlock(&info->lock);
690                         UnlockPage(swappage);
691                         page_cache_release(swappage);
692                         error = -EIO;
693                         goto failed;
694                 }
695
696                 delete_from_swap_cache(swappage);
697                 if (filepage) {
698                         entry->val = 0;
699                         info->swapped--;
700                         spin_unlock(&info->lock);
701                         flush_page_to_ram(swappage);
702                         copy_highpage(filepage, swappage);
703                         UnlockPage(swappage);
704                         page_cache_release(swappage);
705                         flush_dcache_page(filepage);
706                         SetPageUptodate(filepage);
707                         SetPageDirty(filepage);
708                         swap_free(swap);
709                 } else if (add_to_page_cache_unique(swappage,
710                         mapping, idx, page_hash(mapping, idx)) == 0) {
711                         info->flags |= SHMEM_PAGEIN;
712                         entry->val = 0;
713                         info->swapped--;
714                         spin_unlock(&info->lock);
715                         filepage = swappage;
716                         SetPageUptodate(filepage);
717                         SetPageDirty(filepage);
718                         swap_free(swap);
719                 } else {
720                         if (add_to_swap_cache(swappage, swap) != 0)
721                                 BUG();
722                         spin_unlock(&info->lock);
723                         SetPageUptodate(swappage);
724                         SetPageDirty(swappage);
725                         UnlockPage(swappage);
726                         page_cache_release(swappage);
727                         goto repeat;
728                 }
729         } else if (sgp == SGP_READ && !filepage) {
730                 filepage = find_get_page(mapping, idx);
731                 if (filepage &&
732                     (!Page_Uptodate(filepage) || TryLockPage(filepage))) {
733                         spin_unlock(&info->lock);
734                         wait_on_page(filepage);
735                         page_cache_release(filepage);
736                         filepage = NULL;
737                         goto repeat;
738                 }
739                 spin_unlock(&info->lock);
740         } else {
741                 sbinfo = SHMEM_SB(inode->i_sb);
742                 spin_lock(&sbinfo->stat_lock);
743                 if (sbinfo->free_blocks == 0) {
744                         spin_unlock(&sbinfo->stat_lock);
745                         spin_unlock(&info->lock);
746                         error = -ENOSPC;
747                         goto failed;
748                 }
749                 sbinfo->free_blocks--;
750                 inode->i_blocks += BLOCKS_PER_PAGE;
751                 spin_unlock(&sbinfo->stat_lock);
752
753                 if (!filepage) {
754                         spin_unlock(&info->lock);
755                         filepage = page_cache_alloc(mapping);
756                         if (!filepage) {
757                                 shmem_free_block(inode);
758                                 error = -ENOMEM;
759                                 goto failed;
760                         }
761
762                         spin_lock(&info->lock);
763                         entry = shmem_swp_alloc(info, idx, sgp);
764                         if (IS_ERR(entry))
765                                 error = PTR_ERR(entry);
766                         if (error || entry->val ||
767                             add_to_page_cache_unique(filepage,
768                             mapping, idx, page_hash(mapping, idx)) != 0) {
769                                 spin_unlock(&info->lock);
770                                 page_cache_release(filepage);
771                                 shmem_free_block(inode);
772                                 filepage = NULL;
773                                 if (error)
774                                         goto failed;
775                                 goto repeat;
776                         }
777                         info->flags |= SHMEM_PAGEIN;
778                 }
779
780                 spin_unlock(&info->lock);
781                 clear_highpage(filepage);
782                 flush_dcache_page(filepage);
783                 SetPageUptodate(filepage);
784         }
785 done:
786         if (!*pagep) {
787                 if (filepage)
788                         UnlockPage(filepage);
789                 else
790                         filepage = ZERO_PAGE(0);
791                 *pagep = filepage;
792         }
793         if (PageError(filepage))
794                 ClearPageError(filepage);
795         return 0;
796
797 failed:
798         if (filepage) {
799                 if (*pagep == filepage)
800                         SetPageError(filepage);
801                 else {
802                         UnlockPage(filepage);
803                         page_cache_release(filepage);
804                 }
805         }
806         return error;
807 }
808
809 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
810 {
811         struct inode *inode = vma->vm_file->f_dentry->d_inode;
812         struct page *page = NULL;
813         unsigned long idx;
814         int error;
815
816         idx = (address - vma->vm_start) >> PAGE_SHIFT;
817         idx += vma->vm_pgoff;
818         idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
819
820         error = shmem_getpage(inode, idx, &page, SGP_CACHE);
821         if (error)
822                 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
823
824         mark_page_accessed(page);
825         flush_page_to_ram(page);
826         return page;
827 }
828
829 void shmem_lock(struct file *file, int lock)
830 {
831         struct inode *inode = file->f_dentry->d_inode;
832         struct shmem_inode_info *info = SHMEM_I(inode);
833
834         spin_lock(&info->lock);
835         if (lock)
836                 info->flags |= VM_LOCKED;
837         else
838                 info->flags &= ~VM_LOCKED;
839         spin_unlock(&info->lock);
840 }
841
842 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
843 {
844         struct vm_operations_struct *ops;
845         struct inode *inode = file->f_dentry->d_inode;
846
847         ops = &shmem_vm_ops;
848         if (!S_ISREG(inode->i_mode))
849                 return -EACCES;
850         UPDATE_ATIME(inode);
851         vma->vm_ops = ops;
852         return 0;
853 }
854
855 static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
856 {
857         struct inode *inode;
858         struct shmem_inode_info *info;
859         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
860
861         spin_lock(&sbinfo->stat_lock);
862         if (!sbinfo->free_inodes) {
863                 spin_unlock(&sbinfo->stat_lock);
864                 return NULL;
865         }
866         sbinfo->free_inodes--;
867         spin_unlock(&sbinfo->stat_lock);
868
869         inode = new_inode(sb);
870         if (inode) {
871                 inode->i_mode = mode;
872                 inode->i_uid = current->fsuid;
873                 inode->i_gid = current->fsgid;
874                 inode->i_blksize = PAGE_CACHE_SIZE;
875                 inode->i_blocks = 0;
876                 inode->i_rdev = NODEV;
877                 inode->i_mapping->a_ops = &shmem_aops;
878                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
879                 info = SHMEM_I(inode);
880                 info->inode = inode;
881                 spin_lock_init(&info->lock);
882                 switch (mode & S_IFMT) {
883                 default:
884                         init_special_inode(inode, mode, dev);
885                         break;
886                 case S_IFREG:
887                         inode->i_op = &shmem_inode_operations;
888                         inode->i_fop = &shmem_file_operations;
889                         spin_lock(&shmem_ilock);
890                         list_add_tail(&info->list, &shmem_inodes);
891                         spin_unlock(&shmem_ilock);
892                         break;
893                 case S_IFDIR:
894                         inode->i_nlink++;
895                         /* Some things misbehave if size == 0 on a directory */
896                         inode->i_size = 2 * BOGO_DIRENT_SIZE;
897                         inode->i_op = &shmem_dir_inode_operations;
898                         inode->i_fop = &dcache_dir_ops;
899                         break;
900                 case S_IFLNK:
901                         break;
902                 }
903         }
904         return inode;
905 }
906
907 static int shmem_set_size(struct shmem_sb_info *info,
908                           unsigned long max_blocks, unsigned long max_inodes)
909 {
910         int error;
911         unsigned long blocks, inodes;
912
913         spin_lock(&info->stat_lock);
914         blocks = info->max_blocks - info->free_blocks;
915         inodes = info->max_inodes - info->free_inodes;
916         error = -EINVAL;
917         if (max_blocks < blocks)
918                 goto out;
919         if (max_inodes < inodes)
920                 goto out;
921         error = 0;
922         info->max_blocks  = max_blocks;
923         info->free_blocks = max_blocks - blocks;
924         info->max_inodes  = max_inodes;
925         info->free_inodes = max_inodes - inodes;
926 out:
927         spin_unlock(&info->stat_lock);
928         return error;
929 }
930
931 #ifdef CONFIG_TMPFS
932
933 static struct inode_operations shmem_symlink_inode_operations;
934 static struct inode_operations shmem_symlink_inline_operations;
935
936 /*
937  * tmpfs itself makes no use of generic_file_read, generic_file_mmap
938  * or generic_file_write; but shmem_readpage, shmem_prepare_write and
939  * shmem_commit_write let a tmpfs file be used below the loop driver,
940  * and shmem_readpage lets a tmpfs file be used by sendfile.
941  */
942 static int
943 shmem_readpage(struct file *file, struct page *page)
944 {
945         struct inode *inode = page->mapping->host;
946         int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
947         UnlockPage(page);
948         return error;
949 }
950
951 static int
952 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
953 {
954         struct inode *inode = page->mapping->host;
955         return shmem_getpage(inode, page->index, &page, SGP_WRITE);
956 }
957
958 static int
959 shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
960 {
961         struct inode *inode = page->mapping->host;
962         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
963
964         if (pos > inode->i_size)
965                 inode->i_size = pos;
966         SetPageDirty(page);
967         return 0;
968 }
969
970 static ssize_t
971 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
972 {
973         struct inode    *inode = file->f_dentry->d_inode;
974         loff_t          pos;
975         unsigned long   written;
976         ssize_t         err;
977
978         if ((ssize_t) count < 0)
979                 return -EINVAL;
980
981         if (!access_ok(VERIFY_READ, buf, count))
982                 return -EFAULT;
983
984         down(&inode->i_sem);
985
986         pos = *ppos;
987         written = 0;
988
989         err = precheck_file_write(file, inode, &count, &pos);
990         if (err || !count)
991                 goto out;
992
993         remove_suid(inode);
994         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
995
996         do {
997                 struct page *page = NULL;
998                 unsigned long bytes, index, offset;
999                 char *kaddr;
1000                 int left;
1001
1002                 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1003                 index = pos >> PAGE_CACHE_SHIFT;
1004                 bytes = PAGE_CACHE_SIZE - offset;
1005                 if (bytes > count)
1006                         bytes = count;
1007
1008                 /*
1009                  * We don't hold page lock across copy from user -
1010                  * what would it guard against? - so no deadlock here.
1011                  */
1012
1013                 err = shmem_getpage(inode, index, &page, SGP_WRITE);
1014                 if (err)
1015                         break;
1016
1017                 kaddr = kmap(page);
1018                 left = __copy_from_user(kaddr + offset, buf, bytes);
1019                 kunmap(page);
1020
1021                 written += bytes;
1022                 count -= bytes;
1023                 pos += bytes;
1024                 buf += bytes;
1025                 if (pos > inode->i_size)
1026                         inode->i_size = pos;
1027
1028                 flush_dcache_page(page);
1029                 SetPageDirty(page);
1030                 SetPageReferenced(page);
1031                 page_cache_release(page);
1032
1033                 if (left) {
1034                         pos -= left;
1035                         written -= left;
1036                         err = -EFAULT;
1037                         break;
1038                 }
1039         } while (count);
1040
1041         *ppos = pos;
1042         if (written)
1043                 err = written;
1044 out:
1045         up(&inode->i_sem);
1046         return err;
1047 }
1048
1049 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
1050 {
1051         struct inode *inode = filp->f_dentry->d_inode;
1052         struct address_space *mapping = inode->i_mapping;
1053         unsigned long index, offset;
1054         loff_t pos = *ppos;
1055
1056         if (unlikely(pos < 0))
1057                 return;
1058
1059         index = pos >> PAGE_CACHE_SHIFT;
1060         offset = pos & ~PAGE_CACHE_MASK;
1061
1062         for (;;) {
1063                 struct page *page = NULL;
1064                 unsigned long end_index, nr, ret;
1065
1066                 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1067                 if (index > end_index)
1068                         break;
1069                 if (index == end_index) {
1070                         nr = inode->i_size & ~PAGE_CACHE_MASK;
1071                         if (nr <= offset)
1072                                 break;
1073                 }
1074
1075                 desc->error = shmem_getpage(inode, index, &page, SGP_READ);
1076                 if (desc->error) {
1077                         if (desc->error == -EINVAL)
1078                                 desc->error = 0;
1079                         break;
1080                 }
1081
1082                 /*
1083                  * We must evaluate after, since reads (unlike writes)
1084                  * are called without i_sem protection against truncate
1085                  */
1086                 nr = PAGE_CACHE_SIZE;
1087                 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1088                 if (index == end_index) {
1089                         nr = inode->i_size & ~PAGE_CACHE_MASK;
1090                         if (nr <= offset) {
1091                                 page_cache_release(page);
1092                                 break;
1093                         }
1094                 }
1095                 nr -= offset;
1096
1097                 if (page != ZERO_PAGE(0)) {
1098                         /*
1099                          * If users can be writing to this page using arbitrary
1100                          * virtual addresses, take care about potential aliasing
1101                          * before reading the page on the kernel side.
1102                          */
1103                         if (mapping->i_mmap_shared != NULL)
1104                                 flush_dcache_page(page);
1105                         /*
1106                          * Mark the page accessed if we read the
1107                          * beginning or we just did an lseek.
1108                          */
1109                         if (!offset || !filp->f_reada)
1110                                 mark_page_accessed(page);
1111                 }
1112
1113                 /*
1114                  * Ok, we have the page, and it's up-to-date, so
1115                  * now we can copy it to user space...
1116                  *
1117                  * The actor routine returns how many bytes were actually used..
1118                  * NOTE! This may not be the same as how much of a user buffer
1119                  * we filled up (we may be padding etc), so we can only update
1120                  * "pos" here (the actor routine has to update the user buffer
1121                  * pointers and the remaining count).
1122                  */
1123                 ret = file_read_actor(desc, page, offset, nr);
1124                 offset += ret;
1125                 index += offset >> PAGE_CACHE_SHIFT;
1126                 offset &= ~PAGE_CACHE_MASK;
1127
1128                 page_cache_release(page);
1129                 if (ret != nr || !desc->count)
1130                         break;
1131         }
1132
1133         *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1134         filp->f_reada = 1;
1135         UPDATE_ATIME(inode);
1136 }
1137
1138 static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
1139 {
1140         read_descriptor_t desc;
1141
1142         if ((ssize_t) count < 0)
1143                 return -EINVAL;
1144         if (!access_ok(VERIFY_WRITE, buf, count))
1145                 return -EFAULT;
1146         if (!count)
1147                 return 0;
1148
1149         desc.written = 0;
1150         desc.count = count;
1151         desc.buf = buf;
1152         desc.error = 0;
1153
1154         do_shmem_file_read(filp, ppos, &desc);
1155         if (desc.written)
1156                 return desc.written;
1157         return desc.error;
1158 }
1159
1160 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
1161 {
1162         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1163
1164         buf->f_type = TMPFS_MAGIC;
1165         buf->f_bsize = PAGE_CACHE_SIZE;
1166         spin_lock(&sbinfo->stat_lock);
1167         buf->f_blocks = sbinfo->max_blocks;
1168         buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1169         buf->f_files = sbinfo->max_inodes;
1170         buf->f_ffree = sbinfo->free_inodes;
1171         spin_unlock(&sbinfo->stat_lock);
1172         buf->f_namelen = NAME_MAX;
1173         return 0;
1174 }
1175
1176 /*
1177  * Retaining negative dentries for an in-memory filesystem just wastes
1178  * memory and lookup time: arrange for them to be deleted immediately.
1179  */
1180 static int shmem_delete_dentry(struct dentry *dentry)
1181 {
1182         return 1;
1183 }
1184
1185 /*
1186  * Lookup the data. This is trivial - if the dentry didn't already
1187  * exist, we know it is negative.  Set d_op to delete negative dentries.
1188  */
1189 static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry)
1190 {
1191         static struct dentry_operations shmem_dentry_operations = {
1192                 .d_delete = shmem_delete_dentry,
1193         };
1194
1195         if (dentry->d_name.len > NAME_MAX)
1196                 return ERR_PTR(-ENAMETOOLONG);
1197         dentry->d_op = &shmem_dentry_operations;
1198         d_add(dentry, NULL);
1199         return NULL;
1200 }
1201
1202 /*
1203  * File creation. Allocate an inode, and we're done..
1204  */
1205 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1206 {
1207         struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1208         int error = -ENOSPC;
1209
1210         if (inode) {
1211                 if (dir->i_mode & S_ISGID) {
1212                         inode->i_gid = dir->i_gid;
1213                         if (S_ISDIR(mode))
1214                                 inode->i_mode |= S_ISGID;
1215                 }
1216                 dir->i_size += BOGO_DIRENT_SIZE;
1217                 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1218                 d_instantiate(dentry, inode);
1219                 dget(dentry); /* Extra count - pin the dentry in core */
1220                 error = 0;
1221         }
1222         return error;
1223 }
1224
1225 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1226 {
1227         int error;
1228
1229         if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1230                 return error;
1231         dir->i_nlink++;
1232         return 0;
1233 }
1234
1235 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1236 {
1237         return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1238 }
1239
1240 /*
1241  * Link a file..
1242  */
1243 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1244 {
1245         struct inode *inode = old_dentry->d_inode;
1246
1247         if (S_ISDIR(inode->i_mode))
1248                 return -EPERM;
1249
1250         dir->i_size += BOGO_DIRENT_SIZE;
1251         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1252         inode->i_nlink++;
1253         atomic_inc(&inode->i_count);    /* New dentry reference */
1254         dget(dentry);           /* Extra pinning count for the created dentry */
1255         d_instantiate(dentry, inode);
1256         return 0;
1257 }
1258
1259 static inline int shmem_positive(struct dentry *dentry)
1260 {
1261         return dentry->d_inode && !d_unhashed(dentry);
1262 }
1263
1264 /*
1265  * Check that a directory is empty (this works
1266  * for regular files too, they'll just always be
1267  * considered empty..).
1268  *
1269  * Note that an empty directory can still have
1270  * children, they just all have to be negative..
1271  */
1272 static int shmem_empty(struct dentry *dentry)
1273 {
1274         struct list_head *list;
1275
1276         spin_lock(&dcache_lock);
1277         list = dentry->d_subdirs.next;
1278
1279         while (list != &dentry->d_subdirs) {
1280                 struct dentry *de = list_entry(list, struct dentry, d_child);
1281
1282                 if (shmem_positive(de)) {
1283                         spin_unlock(&dcache_lock);
1284                         return 0;
1285                 }
1286                 list = list->next;
1287         }
1288         spin_unlock(&dcache_lock);
1289         return 1;
1290 }
1291
1292 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1293 {
1294         struct inode *inode = dentry->d_inode;
1295
1296         dir->i_size -= BOGO_DIRENT_SIZE;
1297         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1298         inode->i_nlink--;
1299         dput(dentry);   /* Undo the count from "create" - this does all the work */
1300         return 0;
1301 }
1302
1303 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1304 {
1305         if (!shmem_empty(dentry))
1306                 return -ENOTEMPTY;
1307
1308         dir->i_nlink--;
1309         return shmem_unlink(dir, dentry);
1310 }
1311
1312 /*
1313  * The VFS layer already does all the dentry stuff for rename,
1314  * we just have to decrement the usage count for the target if
1315  * it exists so that the VFS layer correctly free's it when it
1316  * gets overwritten.
1317  */
1318 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1319 {
1320         struct inode *inode = old_dentry->d_inode;
1321         int they_are_dirs = S_ISDIR(inode->i_mode);
1322
1323         if (!shmem_empty(new_dentry))
1324                 return -ENOTEMPTY;
1325
1326         if (new_dentry->d_inode) {
1327                 (void) shmem_unlink(new_dir, new_dentry);
1328                 if (they_are_dirs)
1329                         old_dir->i_nlink--;
1330         } else if (they_are_dirs) {
1331                 old_dir->i_nlink--;
1332                 new_dir->i_nlink++;
1333         }
1334
1335         old_dir->i_size -= BOGO_DIRENT_SIZE;
1336         new_dir->i_size += BOGO_DIRENT_SIZE;
1337         old_dir->i_ctime = old_dir->i_mtime =
1338         new_dir->i_ctime = new_dir->i_mtime =
1339         inode->i_ctime = CURRENT_TIME;
1340         return 0;
1341 }
1342
1343 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1344 {
1345         int error;
1346         int len;
1347         struct inode *inode;
1348         struct page *page = NULL;
1349         char *kaddr;
1350         struct shmem_inode_info *info;
1351
1352         len = strlen(symname) + 1;
1353         if (len > PAGE_CACHE_SIZE)
1354                 return -ENAMETOOLONG;
1355
1356         inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1357         if (!inode)
1358                 return -ENOSPC;
1359
1360         info = SHMEM_I(inode);
1361         inode->i_size = len-1;
1362         if (len <= sizeof(struct shmem_inode_info)) {
1363                 /* do it inline */
1364                 memcpy(info, symname, len);
1365                 inode->i_op = &shmem_symlink_inline_operations;
1366         } else {
1367                 error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1368                 if (error) {
1369                         iput(inode);
1370                         return error;
1371                 }
1372                 inode->i_op = &shmem_symlink_inode_operations;
1373                 spin_lock(&shmem_ilock);
1374                 list_add_tail(&info->list, &shmem_inodes);
1375                 spin_unlock(&shmem_ilock);
1376                 kaddr = kmap(page);
1377                 memcpy(kaddr, symname, len);
1378                 kunmap(page);
1379                 SetPageDirty(page);
1380                 page_cache_release(page);
1381         }
1382         if (dir->i_mode & S_ISGID)
1383                 inode->i_gid = dir->i_gid;
1384         dir->i_size += BOGO_DIRENT_SIZE;
1385         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1386         d_instantiate(dentry, inode);
1387         dget(dentry);
1388         return 0;
1389 }
1390
1391 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1392 {
1393         return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1394 }
1395
1396 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1397 {
1398         return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1399 }
1400
1401 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1402 {
1403         struct page *page = NULL;
1404         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1405         if (res)
1406                 return res;
1407         res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1408         kunmap(page);
1409         mark_page_accessed(page);
1410         page_cache_release(page);
1411         return res;
1412 }
1413
1414 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1415 {
1416         struct page *page = NULL;
1417         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1418         if (res)
1419                 return res;
1420         res = vfs_follow_link(nd, kmap(page));
1421         kunmap(page);
1422         mark_page_accessed(page);
1423         page_cache_release(page);
1424         return res;
1425 }
1426
1427 static struct inode_operations shmem_symlink_inline_operations = {
1428         readlink:       shmem_readlink_inline,
1429         follow_link:    shmem_follow_link_inline,
1430 };
1431
1432 static struct inode_operations shmem_symlink_inode_operations = {
1433         truncate:       shmem_truncate,
1434         readlink:       shmem_readlink,
1435         follow_link:    shmem_follow_link,
1436 };
1437
1438 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1439 {
1440         char *this_char, *value, *rest;
1441
1442         while ((this_char = strsep(&options, ",")) != NULL) {
1443                 if (!*this_char)
1444                         continue;
1445                 if ((value = strchr(this_char,'=')) != NULL) {
1446                         *value++ = 0;
1447                 } else {
1448                         printk(KERN_ERR
1449                             "tmpfs: No value for mount option '%s'\n",
1450                             this_char);
1451                         return 1;
1452                 }
1453
1454                 if (!strcmp(this_char,"size")) {
1455                         unsigned long long size;
1456                         size = memparse(value,&rest);
1457                         if (*rest == '%') {
1458                                 struct sysinfo si;
1459                                 si_meminfo(&si);
1460                                 size <<= PAGE_SHIFT;
1461                                 size *= si.totalram;
1462                                 do_div(size, 100);
1463                                 rest++;
1464                         }
1465                         if (*rest)
1466                                 goto bad_val;
1467                         *blocks = size >> PAGE_CACHE_SHIFT;
1468                 } else if (!strcmp(this_char,"nr_blocks")) {
1469                         *blocks = memparse(value,&rest);
1470                         if (*rest)
1471                                 goto bad_val;
1472                 } else if (!strcmp(this_char,"nr_inodes")) {
1473                         *inodes = memparse(value,&rest);
1474                         if (*rest)
1475                                 goto bad_val;
1476                 } else if (!strcmp(this_char,"mode")) {
1477                         if (!mode)
1478                                 continue;
1479                         *mode = simple_strtoul(value,&rest,8);
1480                         if (*rest)
1481                                 goto bad_val;
1482                 } else if (!strcmp(this_char,"uid")) {
1483                         if (!uid)
1484                                 continue;
1485                         *uid = simple_strtoul(value,&rest,0);
1486                         if (*rest)
1487                                 goto bad_val;
1488                 } else if (!strcmp(this_char,"gid")) {
1489                         if (!gid)
1490                                 continue;
1491                         *gid = simple_strtoul(value,&rest,0);
1492                         if (*rest)
1493                                 goto bad_val;
1494                 } else {
1495                         printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1496                                this_char);
1497                         return 1;
1498                 }
1499         }
1500         return 0;
1501
1502 bad_val:
1503         printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1504                value, this_char);
1505         return 1;
1506 }
1507
1508 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1509 {
1510         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1511         unsigned long max_blocks = sbinfo->max_blocks;
1512         unsigned long max_inodes = sbinfo->max_inodes;
1513
1514         if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1515                 return -EINVAL;
1516         return shmem_set_size(sbinfo, max_blocks, max_inodes);
1517 }
1518
1519 static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
1520 {
1521         return 0;
1522 }
1523 #endif
1524
1525 static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
1526 {
1527         struct inode *inode;
1528         struct dentry *root;
1529         unsigned long blocks, inodes;
1530         int mode   = S_IRWXUGO | S_ISVTX;
1531         uid_t uid = current->fsuid;
1532         gid_t gid = current->fsgid;
1533         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1534         struct sysinfo si;
1535
1536         /*
1537          * Per default we only allow half of the physical ram per
1538          * tmpfs instance
1539          */
1540         si_meminfo(&si);
1541         blocks = inodes = si.totalram / 2;
1542
1543 #ifdef CONFIG_TMPFS
1544         if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
1545                 return NULL;
1546 #endif
1547
1548         spin_lock_init(&sbinfo->stat_lock);
1549         sbinfo->max_blocks = blocks;
1550         sbinfo->free_blocks = blocks;
1551         sbinfo->max_inodes = inodes;
1552         sbinfo->free_inodes = inodes;
1553         sb->s_maxbytes = SHMEM_MAX_BYTES;
1554         sb->s_blocksize = PAGE_CACHE_SIZE;
1555         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1556         sb->s_magic = TMPFS_MAGIC;
1557         sb->s_op = &shmem_ops;
1558         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1559         if (!inode)
1560                 return NULL;
1561
1562         inode->i_uid = uid;
1563         inode->i_gid = gid;
1564         root = d_alloc_root(inode);
1565         if (!root) {
1566                 iput(inode);
1567                 return NULL;
1568         }
1569         sb->s_root = root;
1570         return sb;
1571 }
1572
1573 static struct address_space_operations shmem_aops = {
1574         removepage:     shmem_removepage,
1575         writepage:      shmem_writepage,
1576 #ifdef CONFIG_TMPFS
1577         readpage:       shmem_readpage,
1578         prepare_write:  shmem_prepare_write,
1579         commit_write:   shmem_commit_write,
1580 #endif
1581 };
1582
1583 static struct file_operations shmem_file_operations = {
1584         mmap:           shmem_mmap,
1585 #ifdef CONFIG_TMPFS
1586         read:           shmem_file_read,
1587         write:          shmem_file_write,
1588         fsync:          shmem_sync_file,
1589 #endif
1590 };
1591
1592 static struct inode_operations shmem_inode_operations = {
1593         truncate:       shmem_truncate,
1594         setattr:        shmem_notify_change,
1595 };
1596
1597 static struct inode_operations shmem_dir_inode_operations = {
1598 #ifdef CONFIG_TMPFS
1599         create:         shmem_create,
1600         lookup:         shmem_lookup,
1601         link:           shmem_link,
1602         unlink:         shmem_unlink,
1603         symlink:        shmem_symlink,
1604         mkdir:          shmem_mkdir,
1605         rmdir:          shmem_rmdir,
1606         mknod:          shmem_mknod,
1607         rename:         shmem_rename,
1608 #endif
1609 };
1610
1611 static struct super_operations shmem_ops = {
1612 #ifdef CONFIG_TMPFS
1613         statfs:         shmem_statfs,
1614         remount_fs:     shmem_remount_fs,
1615 #endif
1616         delete_inode:   shmem_delete_inode,
1617         put_inode:      force_delete,
1618 };
1619
1620 static struct vm_operations_struct shmem_vm_ops = {
1621         nopage:         shmem_nopage,
1622 };
1623
1624 #ifdef CONFIG_TMPFS
1625 /* type "shm" will be tagged obsolete in 2.5 */
1626 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1627 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1628 #else
1629 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1630 #endif
1631 static struct vfsmount *shm_mnt;
1632
1633 static int __init init_tmpfs(void)
1634 {
1635         int error;
1636
1637         error = register_filesystem(&tmpfs_fs_type);
1638         if (error) {
1639                 printk(KERN_ERR "Could not register tmpfs\n");
1640                 goto out3;
1641         }
1642 #ifdef CONFIG_TMPFS
1643         error = register_filesystem(&shmem_fs_type);
1644         if (error) {
1645                 printk(KERN_ERR "Could not register shm fs\n");
1646                 goto out2;
1647         }
1648         devfs_mk_dir(NULL, "shm", NULL);
1649 #endif
1650         shm_mnt = kern_mount(&tmpfs_fs_type);
1651         if (IS_ERR(shm_mnt)) {
1652                 error = PTR_ERR(shm_mnt);
1653                 printk(KERN_ERR "Could not kern_mount tmpfs\n");
1654                 goto out1;
1655         }
1656
1657         /* The internal instance should not do size checking */
1658         shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1659         return 0;
1660
1661 out1:
1662 #ifdef CONFIG_TMPFS
1663         unregister_filesystem(&shmem_fs_type);
1664 out2:
1665 #endif
1666         unregister_filesystem(&tmpfs_fs_type);
1667 out3:
1668         shm_mnt = ERR_PTR(error);
1669         return error;
1670 }
1671 module_init(init_tmpfs)
1672
1673 /*
1674  * shmem_file_setup - get an unlinked file living in tmpfs
1675  *
1676  * @name: name for dentry (to be seen in /proc/<pid>/maps
1677  * @size: size to be set for the file
1678  *
1679  */
1680 struct file *shmem_file_setup(char *name, loff_t size)
1681 {
1682         int error;
1683         struct file *file;
1684         struct inode *inode;
1685         struct dentry *dentry, *root;
1686         struct qstr this;
1687         int vm_enough_memory(long pages);
1688
1689         if (IS_ERR(shm_mnt))
1690                 return (void *)shm_mnt;
1691
1692         if (size > SHMEM_MAX_BYTES)
1693                 return ERR_PTR(-EINVAL);
1694
1695         if (!vm_enough_memory(VM_ACCT(size)))
1696                 return ERR_PTR(-ENOMEM);
1697
1698         this.name = name;
1699         this.len = strlen(name);
1700         this.hash = 0; /* will go */
1701         root = shm_mnt->mnt_root;
1702         dentry = d_alloc(root, &this);
1703         if (!dentry)
1704                 return ERR_PTR(-ENOMEM);
1705
1706         error = -ENFILE;
1707         file = get_empty_filp();
1708         if (!file)
1709                 goto put_dentry;
1710
1711         error = -ENOSPC;
1712         inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1713         if (!inode)
1714                 goto close_file;
1715
1716         d_instantiate(dentry, inode);
1717         inode->i_size = size;
1718         inode->i_nlink = 0;     /* It is unlinked */
1719         file->f_vfsmnt = mntget(shm_mnt);
1720         file->f_dentry = dentry;
1721         file->f_op = &shmem_file_operations;
1722         file->f_mode = FMODE_WRITE | FMODE_READ;
1723         return file;
1724
1725 close_file:
1726         put_filp(file);
1727 put_dentry:
1728         dput(dentry);
1729         return ERR_PTR(error);
1730 }
1731
1732 /*
1733  * shmem_zero_setup - setup a shared anonymous mapping
1734  *
1735  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1736  */
1737 int shmem_zero_setup(struct vm_area_struct *vma)
1738 {
1739         struct file *file;
1740         loff_t size = vma->vm_end - vma->vm_start;
1741
1742         file = shmem_file_setup("dev/zero", size);
1743         if (IS_ERR(file))
1744                 return PTR_ERR(file);
1745
1746         if (vma->vm_file)
1747                 fput(vma->vm_file);
1748         vma->vm_file = file;
1749         vma->vm_ops = &shmem_vm_ops;
1750         return 0;
1751 }
1752
1753 EXPORT_SYMBOL(shmem_file_setup);