xfs: flush entire last page of old EOF on truncate up

author Dave Chinner <dchinner@redhat.com>

Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)

committer Dave Chinner <david@fromorbit.com>

Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)
author Dave Chinner <dchinner@redhat.com>
Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)
committer Dave Chinner <david@fromorbit.com>
Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 7212949..ec6dcdc 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -849,6 +849,36 @@ xfs_setattr_size(
                 return error;
         truncate_setsize(inode, newsize);
  
+       /*
+        * The "we can't serialise against page faults" pain gets worse.
+        *
+        * If the file is mapped then we have to clean the page at the old EOF
+        * when extending the file. Extending the file can expose changes the
+        * underlying page mapping (e.g. from beyond EOF to a hole or
+        * unwritten), and so on the next attempt to write to that page we need
+        * to remap it for write. i.e. we need .page_mkwrite() to be called.
+        * Hence we need to clean the page to clean the pte and so a new write
+        * fault will be triggered appropriately.
+        *
+        * If we do it before we change the inode size, then we can race with a
+        * page fault that maps the page with exactly the same problem. If we do
+        * it after we change the file size, then a new page fault can come in
+        * and allocate space before we've run the rest of the truncate
+        * transaction. That's kinda grotesque, but it's better than have data
+        * over a hole, and so that's the lesser evil that has been chosen here.
+        *
+        * The real solution, however, is to have some mechanism for locking out
+        * page faults while a truncate is in progress.
+        */
+       if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
+               error = filemap_write_and_wait_range(
+                               VFS_I(ip)->i_mapping,
+                               round_down(oldsize, PAGE_CACHE_SIZE),
+                               round_up(oldsize, PAGE_CACHE_SIZE) - 1);
+               if (error)
+                       return error;
+       }
+
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
         if (error)
author	Dave Chinner <dchinner@redhat.com>
	Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Tue, 23 Sep 2014 12:55:00 +0000 (22:55 +1000)