xfs: avoid COW fork extent lookups in writeback if the fork didn't change

author Christoph Hellwig <hch@lst.de>

Tue, 17 Jul 2018 23:51:52 +0000 (16:51 -0700)

committer Darrick J. Wong <darrick.wong@oracle.com>

Tue, 31 Jul 2018 20:18:09 +0000 (13:18 -0700)
author Christoph Hellwig <hch@lst.de>
Tue, 17 Jul 2018 23:51:52 +0000 (16:51 -0700)
committer Darrick J. Wong <darrick.wong@oracle.com>
Tue, 31 Jul 2018 20:18:09 +0000 (13:18 -0700)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 814100d..235b4dd 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
  struct xfs_writepage_ctx {
         struct xfs_bmbt_irec    imap;
         unsigned int            io_type;
+       unsigned int            cow_seq;
         struct xfs_ioend        *ioend;
  };
  
@@ -310,6 +311,7 @@ xfs_map_blocks(
         struct xfs_mount        *mp = ip->i_mount;
         ssize_t                 count = i_blocksize(inode);
         xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+       xfs_fileoff_t           cow_fsb = NULLFILEOFF;
         struct xfs_bmbt_irec    imap;
         int                     whichfork = XFS_DATA_FORK;
         struct xfs_iext_cursor  icur;
@@ -333,12 +335,23 @@ xfs_map_blocks(
          * COW fork blocks can overlap data fork blocks even if the blocks
          * aren't shared.  COW I/O always takes precedent, so we must always
          * check for overlap on reflink inodes unless the mapping is already a
-        * COW one.
+        * COW one, or the COW fork hasn't changed from the last time we looked
+        * at it.
+        *
+        * It's safe to check the COW fork if_seq here without the ILOCK because
+        * we've indirectly protected against concurrent updates: writeback has
+        * the page locked, which prevents concurrent invalidations by reflink
+        * and directio and prevents concurrent buffered writes to the same
+        * page.  Changes to if_seq always happen under i_lock, which protects
+        * against concurrent updates and provides a memory barrier on the way
+        * out that ensures that we always see the current value.
          */
         imap_valid = offset_fsb >= wpc->imap.br_startoff &&
                      offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
         if (imap_valid &&
-           (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+           (!xfs_inode_has_cow_data(ip) ||
+            wpc->io_type == XFS_IO_COW ||
+            wpc->cow_seq == ip->i_cowfp->if_seq))
                 return 0;
  
         if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +377,10 @@ xfs_map_blocks(
          * it directly instead of looking up anything in the data fork.
          */
         if (xfs_inode_has_cow_data(ip) &&
-           xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
-           imap.br_startoff <= offset_fsb) {
+           xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+               cow_fsb = imap.br_startoff;
+       if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+               wpc->cow_seq = ip->i_cowfp->if_seq;
                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
                 /*
                  * Truncate can race with writeback since writeback doesn't
@@ -411,6 +426,16 @@ xfs_map_blocks(
                 imap.br_startblock = HOLESTARTBLOCK;
                 wpc->io_type = XFS_IO_HOLE;
         } else {
+               /*
+                * Truncate to the next COW extent if there is one.  This is the
+                * only opportunity to do this because we can skip COW fork
+                * lookups for the subsequent blocks in the mapping; however,
+                * the requirement to treat the COW range separately remains.
+                */
+               if (cow_fsb != NULLFILEOFF &&
+                   cow_fsb < imap.br_startoff + imap.br_blockcount)
+                       imap.br_blockcount = cow_fsb - imap.br_startoff;
+
                 if (isnullstartblock(imap.br_startblock)) {
                         /* got a delalloc extent */
                         wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +452,12 @@ xfs_map_blocks(
         trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
         return 0;
  allocate_blocks:
-       error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+       error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+                       &wpc->cow_seq);
         if (error)
                 return error;
+       ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+              imap.br_startoff + imap.br_blockcount <= cow_fsb);
         wpc->imap = imap;
         trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
         return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 8e8ca9f..3282575 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -651,7 +651,8 @@ xfs_iomap_write_allocate(
         xfs_inode_t     *ip,
         int             whichfork,
         xfs_off_t       offset,
-       xfs_bmbt_irec_t *imap)
+       xfs_bmbt_irec_t *imap,
+       unsigned int    *cow_seq)
  {
         xfs_mount_t     *mp = ip->i_mount;
         xfs_fileoff_t   offset_fsb, last_block;
@@ -766,6 +767,8 @@ xfs_iomap_write_allocate(
                         if (error)
                                 goto error0;
  
+                       if (whichfork == XFS_COW_FORK)
+                               *cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
                 }
  
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h

index 83474c9..c617054 100644 (file)
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
  int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
                         struct xfs_bmbt_irec *, int);
  int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-                       struct xfs_bmbt_irec *);
+                       struct xfs_bmbt_irec *, unsigned int *);
  int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
  
  void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
author	Christoph Hellwig <hch@lst.de>
	Tue, 17 Jul 2018 23:51:52 +0000 (16:51 -0700)
committer	Darrick J. Wong <darrick.wong@oracle.com>
	Tue, 31 Jul 2018 20:18:09 +0000 (13:18 -0700)
fs/xfs/xfs_aops.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_iomap.h		patch \| blob \| history