btrfs: Handle delalloc error correctly to avoid ordered extent hang

author Qu Wenruo <quwenruo@cn.fujitsu.com>

Wed, 8 Mar 2017 02:25:52 +0000 (10:25 +0800)

committer Filipe Manana <fdmanana@suse.com>

Wed, 26 Apr 2017 15:27:21 +0000 (16:27 +0100)
author Qu Wenruo <quwenruo@cn.fujitsu.com>
Wed, 8 Mar 2017 02:25:52 +0000 (10:25 +0800)
committer Filipe Manana <fdmanana@suse.com>
Wed, 26 Apr 2017 15:27:21 +0000 (16:27 +0100)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 844bb89..b8b2a3c 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
                                        u64 ram_bytes, int compress_type,
                                        int type);
  
+static void __endio_write_update_ordered(struct inode *inode,
+                                        const u64 offset, const u64 bytes,
+                                        const bool uptodate);
+
+/*
+ * Cleanup all submitted ordered extents in specified range to handle errors
+ * from the fill_dellaloc() callback.
+ *
+ * NOTE: caller must ensure that when an error happens, it can not call
+ * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
+ * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
+ * to be released, which we want to happen only when finishing the ordered
+ * extent (btrfs_finish_ordered_io()). Also note that the caller of the
+ * fill_delalloc() callback already does proper cleanup for the first page of
+ * the range, that is, it invokes the callback writepage_end_io_hook() for the
+ * range of the first page.
+ */
+static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+                                                const u64 offset,
+                                                const u64 bytes)
+{
+       return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
+                                           bytes - PAGE_SIZE, false);
+}
+
  static int btrfs_dirty_inode(struct inode *inode);
  
  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -1536,6 +1561,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
                 ret = cow_file_range_async(inode, locked_page, start, end,
                                            page_started, nr_written);
         }
+       if (ret)
+               btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
         return ret;
  }
  
@@ -8154,17 +8181,26 @@ static void btrfs_endio_direct_read(struct bio *bio)
         bio_put(bio);
  }
  
-static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
-                                                   const u64 offset,
-                                                   const u64 bytes,
-                                                   const int uptodate)
+static void __endio_write_update_ordered(struct inode *inode,
+                                        const u64 offset, const u64 bytes,
+                                        const bool uptodate)
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
         struct btrfs_ordered_extent *ordered = NULL;
+       struct btrfs_workqueue *wq;
+       btrfs_work_func_t func;
         u64 ordered_offset = offset;
         u64 ordered_bytes = bytes;
         int ret;
  
+       if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+               wq = fs_info->endio_freespace_worker;
+               func = btrfs_freespace_write_helper;
+       } else {
+               wq = fs_info->endio_write_workers;
+               func = btrfs_endio_write_helper;
+       }
+
  again:
         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                    &ordered_offset,
@@ -8173,9 +8209,8 @@ again:
         if (!ret)
                 goto out_test;
  
-       btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
-                       finish_ordered_fn, NULL, NULL);
-       btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
+       btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
+       btrfs_queue_work(wq, &ordered->work);
  out_test:
         /*
          * our bio might span multiple ordered extents.  If we haven't
@@ -8193,10 +8228,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
         struct btrfs_dio_private *dip = bio->bi_private;
         struct bio *dio_bio = dip->dio_bio;
  
-       btrfs_endio_direct_write_update_ordered(dip->inode,
-                                               dip->logical_offset,
-                                               dip->bytes,
-                                               !bio->bi_error);
+       __endio_write_update_ordered(dip->inode, dip->logical_offset,
+                                    dip->bytes, !bio->bi_error);
  
         kfree(dip);
  
@@ -8557,10 +8590,10 @@ free_ordered:
                 io_bio = NULL;
         } else {
                 if (write)
-                       btrfs_endio_direct_write_update_ordered(inode,
+                       __endio_write_update_ordered(inode,
                                                 file_offset,
                                                 dio_bio->bi_iter.bi_size,
-                                               0);
+                                               false);
                 else
                         unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                               file_offset + dio_bio->bi_iter.bi_size - 1);
@@ -8695,11 +8728,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                          */
                         if (dio_data.unsubmitted_oe_range_start <
                             dio_data.unsubmitted_oe_range_end)
-                               btrfs_endio_direct_write_update_ordered(inode,
+                               __endio_write_update_ordered(inode,
                                         dio_data.unsubmitted_oe_range_start,
                                         dio_data.unsubmitted_oe_range_end -
                                         dio_data.unsubmitted_oe_range_start,
-                                       0);
+                                       false);
                 } else if (ret >= 0 && (size_t)ret < count)
                         btrfs_delalloc_release_space(inode, offset,
                                                      count - (size_t)ret);
author	Qu Wenruo <quwenruo@cn.fujitsu.com>
	Wed, 8 Mar 2017 02:25:52 +0000 (10:25 +0800)
committer	Filipe Manana <fdmanana@suse.com>
	Wed, 26 Apr 2017 15:27:21 +0000 (16:27 +0100)