OSDN Git Service

f2fs: dax: fix races between page faults and truncating pages
authorQiuyang Sun <sunqiuyang@huawei.com>
Thu, 18 May 2017 03:06:45 +0000 (11:06 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Thu, 13 Jul 2017 23:56:34 +0000 (16:56 -0700)
Currently in F2FS, page faults and operations that truncate the pagecahe
or data blocks, are completely unsynchronized. This can result in page
fault faulting in a page into a range that we are changing after
truncating, and thus we can end up with a page mapped to disk blocks that
will be shortly freed. Filesystem corruption will shortly follow.

This patch fixes the problem by creating new rw semaphore i_mmap_sem in
f2fs_inode_info and grab it for functions removing blocks from extent tree
and for read over page faults. The mechanism is similar to that in ext4.

Signed-off-by: Qiuyang Sun <sunqiuyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
 Conflicts:
fs/f2fs/file.c

fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/super.c

index f61ab75..5004898 100644 (file)
@@ -1799,8 +1799,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
        loff_t i_size = i_size_read(inode);
 
        if (to > i_size) {
+               down_write(&F2FS_I(inode)->i_mmap_sem);
                truncate_pagecache(inode, i_size);
                truncate_blocks(inode, i_size, true);
+               up_write(&F2FS_I(inode)->i_mmap_sem);
        }
 }
 
index f354732..da7bb61 100644 (file)
@@ -586,6 +586,7 @@ struct f2fs_inode_info {
        struct mutex inmem_lock;        /* lock for inmemory pages */
        struct extent_tree *extent_tree;        /* cached extent_tree entry */
        struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+       struct rw_semaphore i_mmap_sem;
 };
 
 static inline void get_extent_info(struct extent_info *ext,
index f87eeb0..447dd12 100644 (file)
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+static int f2fs_filemap_fault(struct vm_area_struct *vma,
+                                       struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&F2FS_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&F2FS_I(inode)->i_mmap_sem);
+
+       return err;
+}
+
 static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
                                                struct vm_fault *vmf)
 {
@@ -61,13 +74,14 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
        f2fs_balance_fs(sbi, dn.node_changed);
 
        file_update_time(vma->vm_file);
+       down_read(&F2FS_I(inode)->i_mmap_sem);
        lock_page(page);
        if (unlikely(page->mapping != inode->i_mapping ||
                        page_offset(page) > i_size_read(inode) ||
                        !PageUptodate(page))) {
                unlock_page(page);
                err = -EFAULT;
-               goto out;
+               goto out_sem;
        }
 
        /*
@@ -96,6 +110,8 @@ mapped:
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
 
+out_sem:
+       up_read(&F2FS_I(inode)->i_mmap_sem);
 out:
        sb_end_pagefault(inode->i_sb);
        f2fs_update_time(sbi, REQ_TIME);
@@ -103,7 +119,7 @@ out:
 }
 
 static const struct vm_operations_struct f2fs_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = f2fs_filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = f2fs_vm_page_mkwrite,
 };
@@ -681,8 +697,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                        return -EACCES;
 
                if (attr->ia_size <= i_size_read(inode)) {
+                       down_write(&F2FS_I(inode)->i_mmap_sem);
                        truncate_setsize(inode, attr->ia_size);
                        err = f2fs_truncate(inode);
+                       up_write(&F2FS_I(inode)->i_mmap_sem);
                        if (err)
                                return err;
                } else {
@@ -690,7 +708,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                         * do not trim all blocks after i_size if target size is
                         * larger than i_size.
                         */
+                       down_write(&F2FS_I(inode)->i_mmap_sem);
                        truncate_setsize(inode, attr->ia_size);
+                       up_write(&F2FS_I(inode)->i_mmap_sem);
 
                        /* should convert inline inode here */
                        if (!f2fs_may_inline_data(inode)) {
@@ -836,12 +856,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
                        blk_start = (loff_t)pg_start << PAGE_SHIFT;
                        blk_end = (loff_t)pg_end << PAGE_SHIFT;
+                       down_write(&F2FS_I(inode)->i_mmap_sem);
                        truncate_inode_pages_range(mapping, blk_start,
                                        blk_end - 1);
 
                        f2fs_lock_op(sbi);
                        ret = truncate_hole(inode, pg_start, pg_end);
                        f2fs_unlock_op(sbi);
+                       up_write(&F2FS_I(inode)->i_mmap_sem);
                }
        }
 
@@ -1080,16 +1102,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        pg_start = offset >> PAGE_SHIFT;
        pg_end = (offset + len) >> PAGE_SHIFT;
 
+       down_write(&F2FS_I(inode)->i_mmap_sem);
        /* write out all dirty pages from offset */
        ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
        if (ret)
-               return ret;
+               goto out;
 
        truncate_pagecache(inode, offset);
 
        ret = f2fs_do_collapse(inode, pg_start, pg_end);
        if (ret)
-               return ret;
+               goto out;
 
        /* write out all moved pages, if possible */
        filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1102,6 +1125,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (!ret)
                f2fs_i_size_write(inode, new_size);
 
+out:
+       up_write(&F2FS_I(inode)->i_mmap_sem);
        return ret;
 }
 
@@ -1166,9 +1191,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
        if (ret)
                return ret;
 
+       down_write(&F2FS_I(inode)->i_mmap_sem);
        ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
        if (ret)
-               return ret;
+               goto out_sem;
 
        truncate_pagecache_range(inode, offset, offset + len - 1);
 
@@ -1182,7 +1208,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
                ret = fill_zero(inode, pg_start, off_start,
                                                off_end - off_start);
                if (ret)
-                       return ret;
+                       goto out_sem;
 
                new_size = max_t(loff_t, new_size, offset + len);
        } else {
@@ -1190,7 +1216,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
                        ret = fill_zero(inode, pg_start++, off_start,
                                                PAGE_SIZE - off_start);
                        if (ret)
-                               return ret;
+                               goto out_sem;
 
                        new_size = max_t(loff_t, new_size,
                                        (loff_t)pg_start << PAGE_SHIFT);
@@ -1239,6 +1265,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 out:
        if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
                f2fs_i_size_write(inode, new_size);
+out_sem:
+       up_write(&F2FS_I(inode)->i_mmap_sem);
 
        return ret;
 }
@@ -1268,14 +1296,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
        f2fs_balance_fs(sbi, true);
 
+       down_write(&F2FS_I(inode)->i_mmap_sem);
        ret = truncate_blocks(inode, i_size_read(inode), true);
        if (ret)
-               return ret;
+               goto out;
 
        /* write out all dirty pages from offset */
        ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
        if (ret)
-               return ret;
+               goto out;
 
        truncate_pagecache(inode, offset);
 
@@ -1304,6 +1333,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
        if (!ret)
                f2fs_i_size_write(inode, new_size);
+out:
+       up_write(&F2FS_I(inode)->i_mmap_sem);
        return ret;
 }
 
index 9c9a01f..dc69af8 100644 (file)
@@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        mutex_init(&fi->inmem_lock);
        init_rwsem(&fi->dio_rwsem[READ]);
        init_rwsem(&fi->dio_rwsem[WRITE]);
+       init_rwsem(&fi->i_mmap_sem);
 
        /* Will be used by directory only */
        fi->i_dir_level = F2FS_SB(sb)->dir_level;