OSDN Git Service

ext4: move inode extension/truncate code out from ->iomap_end() callback
authorMatthew Bobrowski <mbobrowski@mbobrowski.org>
Tue, 5 Nov 2019 12:01:51 +0000 (23:01 +1100)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 5 Nov 2019 16:31:40 +0000 (11:31 -0500)
In preparation for implementing the iomap direct I/O modifications,
the inode extension/truncate code needs to be moved out from the
ext4_iomap_end() callback. For direct I/O, if the current code
remained, it would behave incorrrectly. Updating the inode size prior
to converting unwritten extents would potentially allow a racing
direct I/O read to find unwritten extents before being converted
correctly.

The inode extension/truncate code now resides within a new helper
ext4_handle_inode_extension(). This function has been designed so that
it can accommodate for both DAX and direct I/O extension/truncate
operations.

Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/d41ffa26e20b15b12895812c3cad7c91a6a59bc6.1572949325.git.mbobrowski@mbobrowski.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/file.c
fs/ext4/inode.c

index 440f4c6..ec54fec 100644 (file)
@@ -33,6 +33,7 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "truncate.h"
 
 static bool ext4_dio_supported(struct inode *inode)
 {
@@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
        return iov_iter_count(from);
 }
 
+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+                                          ssize_t written, size_t count)
+{
+       handle_t *handle;
+       bool truncate = false;
+       u8 blkbits = inode->i_blkbits;
+       ext4_lblk_t written_blk, end_blk;
+
+       /*
+        * Note that EXT4_I(inode)->i_disksize can get extended up to
+        * inode->i_size while the I/O was running due to writeback of delalloc
+        * blocks. But, the code in ext4_iomap_alloc() is careful to use
+        * zeroed/unwritten extents if this is possible; thus we won't leave
+        * uninitialized blocks in a file even if we didn't succeed in writing
+        * as much as we intended.
+        */
+       WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
+       if (offset + count <= EXT4_I(inode)->i_disksize) {
+               /*
+                * We need to ensure that the inode is removed from the orphan
+                * list if it has been added prematurely, due to writeback of
+                * delalloc blocks.
+                */
+               if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+                       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+
+                       if (IS_ERR(handle)) {
+                               ext4_orphan_del(NULL, inode);
+                               return PTR_ERR(handle);
+                       }
+
+                       ext4_orphan_del(handle, inode);
+                       ext4_journal_stop(handle);
+               }
+
+               return written;
+       }
+
+       if (written < 0)
+               goto truncate;
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+       if (IS_ERR(handle)) {
+               written = PTR_ERR(handle);
+               goto truncate;
+       }
+
+       if (ext4_update_inode_size(inode, offset + written))
+               ext4_mark_inode_dirty(handle, inode);
+
+       /*
+        * We may need to truncate allocated but not written blocks beyond EOF.
+        */
+       written_blk = ALIGN(offset + written, 1 << blkbits);
+       end_blk = ALIGN(offset + count, 1 << blkbits);
+       if (written_blk < end_blk && ext4_can_truncate(inode))
+               truncate = true;
+
+       /*
+        * Remove the inode from the orphan list if it has been extended and
+        * everything went OK.
+        */
+       if (!truncate && inode->i_nlink)
+               ext4_orphan_del(handle, inode);
+       ext4_journal_stop(handle);
+
+       if (truncate) {
+truncate:
+               ext4_truncate_failed_write(inode);
+               /*
+                * If the truncate operation failed early, then the inode may
+                * still be on the orphan list. In that case, we need to try
+                * remove the inode from the in-memory linked list.
+                */
+               if (inode->i_nlink)
+                       ext4_orphan_del(NULL, inode);
+       }
+
+       return written;
+}
+
 #ifdef CONFIG_FS_DAX
 static ssize_t
 ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-       struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
+       size_t count;
+       loff_t offset;
+       struct inode *inode = file_inode(iocb->ki_filp);
 
        if (!inode_trylock(inode)) {
                if (iocb->ki_flags & IOCB_NOWAIT)
@@ -256,7 +340,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (ret)
                goto out;
 
+       offset = iocb->ki_pos;
+       count = iov_iter_count(from);
        ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+       ret = ext4_handle_inode_extension(inode, offset, ret, count);
 out:
        inode_unlock(inode);
        if (ret > 0)
index 9bd80df..071a1f9 100644 (file)
@@ -3583,53 +3583,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
                          ssize_t written, unsigned flags, struct iomap *iomap)
 {
-       int ret = 0;
-       handle_t *handle;
-       int blkbits = inode->i_blkbits;
-       bool truncate = false;
-
-       if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
-               return 0;
-
-       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               goto orphan_del;
-       }
-       if (ext4_update_inode_size(inode, offset + written))
-               ext4_mark_inode_dirty(handle, inode);
-       /*
-        * We may need to truncate allocated but not written blocks beyond EOF.
-        */
-       if (iomap->offset + iomap->length > 
-           ALIGN(inode->i_size, 1 << blkbits)) {
-               ext4_lblk_t written_blk, end_blk;
-
-               written_blk = (offset + written) >> blkbits;
-               end_blk = (offset + length) >> blkbits;
-               if (written_blk < end_blk && ext4_can_truncate(inode))
-                       truncate = true;
-       }
-       /*
-        * Remove inode from orphan list if we were extending a inode and
-        * everything went fine.
-        */
-       if (!truncate && inode->i_nlink &&
-           !list_empty(&EXT4_I(inode)->i_orphan))
-               ext4_orphan_del(handle, inode);
-       ext4_journal_stop(handle);
-       if (truncate) {
-               ext4_truncate_failed_write(inode);
-orphan_del:
-               /*
-                * If truncate failed early the inode might still be on the
-                * orphan list; we need to make sure the inode is removed from
-                * the orphan list in that case.
-                */
-               if (inode->i_nlink)
-                       ext4_orphan_del(NULL, inode);
-       }
-       return ret;
+       return 0;
 }
 
 const struct iomap_ops ext4_iomap_ops = {