Merge branch 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h

index 81220b2..0ef5cc1 100644 (file)
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,8 +44,6 @@
  #define BTRFS_INODE_IN_DELALLOC_LIST           9
  #define BTRFS_INODE_READDIO_NEED_LOCK          10
  #define BTRFS_INODE_HAS_PROPS                  11
-/* DIO is ready to submit */
-#define BTRFS_INODE_DIO_READY                  12
  /*
   * The following 3 bits are meant only for the btree inode.
   * When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 0d98aee..295795a 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3765,9 +3765,7 @@ void close_ctree(struct btrfs_root *root)
                  * block groups queued for removal, the deletion will be
                  * skipped when we quit the cleaner thread.
                  */
-               mutex_lock(&root->fs_info->cleaner_mutex);
                 btrfs_delete_unused_bgs(root->fs_info);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
  
                 ret = btrfs_commit_super(root);
                 if (ret)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 5411f0a..9f96042 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3742,10 +3742,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
         found->bytes_reserved = 0;
         found->bytes_readonly = 0;
         found->bytes_may_use = 0;
-       if (total_bytes > 0)
-               found->full = 0;
-       else
-               found->full = 1;
+       found->full = 0;
         found->force_alloc = CHUNK_ALLOC_NO_FORCE;
         found->chunk_alloc = 0;
         found->flush = 0;
@@ -8668,7 +8665,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
         }
  
         if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
-               btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
+               btrfs_add_dropped_root(trans, root);
         } else {
                 free_extent_buffer(root->node);
                 free_extent_buffer(root->commit_root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index f1018cf..e2357e3 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                               bio_end_io_t end_io_func,
                               int mirror_num,
                               unsigned long prev_bio_flags,
-                             unsigned long bio_flags)
+                             unsigned long bio_flags,
+                             bool force_bio_submit)
  {
         int ret = 0;
         struct bio *bio;
@@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                         contig = bio_end_sector(bio) == sector;
  
                 if (prev_bio_flags != bio_flags || !contig ||
+                   force_bio_submit ||
                     merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
                     bio_add_page(bio, page, page_size, offset) < page_size) {
                         ret = submit_one_bio(rw, bio, mirror_num,
@@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,
                          get_extent_t *get_extent,
                          struct extent_map **em_cached,
                          struct bio **bio, int mirror_num,
-                        unsigned long *bio_flags, int rw)
+                        unsigned long *bio_flags, int rw,
+                        u64 *prev_em_start)
  {
         struct inode *inode = page->mapping->host;
         u64 start = page_offset(page);
@@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,
         }
         while (cur <= end) {
                 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+               bool force_bio_submit = false;
  
                 if (cur >= last_byte) {
                         char *userpage;
@@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,
                 block_start = em->block_start;
                 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                         block_start = EXTENT_MAP_HOLE;
+
+               /*
+                * If we have a file range that points to a compressed extent
+                * and it's followed by a consecutive file range that points to
+                * to the same compressed extent (possibly with a different
+                * offset and/or length, so it either points to the whole extent
+                * or only part of it), we must make sure we do not submit a
+                * single bio to populate the pages for the 2 ranges because
+                * this makes the compressed extent read zero out the pages
+                * belonging to the 2nd range. Imagine the following scenario:
+                *
+                *  File layout
+                *  [0 - 8K]                     [8K - 24K]
+                *    |                               |
+                *    |                               |
+                * points to extent X,         points to extent X,
+                * offset 4K, length of 8K     offset 0, length 16K
+                *
+                * [extent X, compressed length = 4K uncompressed length = 16K]
+                *
+                * If the bio to read the compressed extent covers both ranges,
+                * it will decompress extent X into the pages belonging to the
+                * first range and then it will stop, zeroing out the remaining
+                * pages that belong to the other range that points to extent X.
+                * So here we make sure we submit 2 bios, one for the first
+                * range and another one for the third range. Both will target
+                * the same physical extent from disk, but we can't currently
+                * make the compressed bio endio callback populate the pages
+                * for both ranges because each compressed bio is tightly
+                * coupled with a single extent map, and each range can have
+                * an extent map with a different offset value relative to the
+                * uncompressed data of our extent and different lengths. This
+                * is a corner case so we prioritize correctness over
+                * non-optimal behavior (submitting 2 bios for the same extent).
+                */
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+                   prev_em_start && *prev_em_start != (u64)-1 &&
+                   *prev_em_start != em->orig_start)
+                       force_bio_submit = true;
+
+               if (prev_em_start)
+                       *prev_em_start = em->orig_start;
+
                 free_extent_map(em);
                 em = NULL;
  
@@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,
                                          bdev, bio, pnr,
                                          end_bio_extent_readpage, mirror_num,
                                          *bio_flags,
-                                        this_bio_flag);
+                                        this_bio_flag,
+                                        force_bio_submit);
                 if (!ret) {
                         nr++;
                         *bio_flags = this_bio_flag;
@@ -3089,6 +3137,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
         struct inode *inode;
         struct btrfs_ordered_extent *ordered;
         int index;
+       u64 prev_em_start = (u64)-1;
  
         inode = pages[0]->mapping->host;
         while (1) {
@@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
  
         for (index = 0; index < nr_pages; index++) {
                 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
-                             mirror_num, bio_flags, rw);
+                             mirror_num, bio_flags, rw, &prev_em_start);
                 page_cache_release(pages[index]);
         }
  }
@@ -3172,7 +3221,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
         }
  
         ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
-                           bio_flags, rw);
+                           bio_flags, rw, NULL);
         return ret;
  }
  
@@ -3198,7 +3247,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
         int ret;
  
         ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-                                     &bio_flags, READ);
+                           &bio_flags, READ, NULL);
         if (bio)
                 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
         return ret;
@@ -3451,7 +3500,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                                                  sector, iosize, pg_offset,
                                                  bdev, &epd->bio, max_nr,
                                                  end_bio_extent_writepage,
-                                                0, 0, 0);
+                                                0, 0, 0, false);
                         if (ret)
                                 SetPageError(page);
                 }
@@ -3754,7 +3803,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
                 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
                                          PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                          -1, end_bio_extent_buffer_writepage,
-                                        0, epd->bio_flags, bio_flags);
+                                        0, epd->bio_flags, bio_flags, false);
                 epd->bio_flags = bio_flags;
                 if (ret) {
                         set_btree_ioerr(p);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index a0fa725..611b66d 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)
                 goto no_delete;
         }
         /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
-       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+       if (!special_file(inode->i_mode))
+               btrfs_wait_ordered_range(inode, 0, (u64)-1);
  
         btrfs_free_io_failure_record(inode, 0, (u64)-1);
  
@@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
         return em;
  }
  
+struct btrfs_dio_data {
+       u64 outstanding_extents;
+       u64 reserve;
+};
  
  static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
@@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
         struct extent_map *em;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct extent_state *cached_state = NULL;
+       struct btrfs_dio_data *dio_data = NULL;
         u64 start = iblock << inode->i_blkbits;
         u64 lockstart, lockend;
         u64 len = bh_result->b_size;
-       u64 *outstanding_extents = NULL;
         int unlock_bits = EXTENT_LOCKED;
         int ret = 0;
  
@@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                  * that anything that needs to check if there's a transction doesn't get
                  * confused.
                  */
-               outstanding_extents = current->journal_info;
+               dio_data = current->journal_info;
                 current->journal_info = NULL;
         }
  
@@ -7568,17 +7573,18 @@ unlock:
                  * within our reservation, otherwise we need to adjust our inode
                  * counter appropriately.
                  */
-               if (*outstanding_extents) {
-                       (*outstanding_extents)--;
+               if (dio_data->outstanding_extents) {
+                       (dio_data->outstanding_extents)--;
                 } else {
                         spin_lock(&BTRFS_I(inode)->lock);
                         BTRFS_I(inode)->outstanding_extents++;
                         spin_unlock(&BTRFS_I(inode)->lock);
                 }
  
-               current->journal_info = outstanding_extents;
                 btrfs_free_reserved_data_space(inode, len);
-               set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
+               WARN_ON(dio_data->reserve < len);
+               dio_data->reserve -= len;
+               current->journal_info = dio_data;
         }
  
         /*
@@ -7601,8 +7607,8 @@ unlock:
  unlock_err:
         clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                          unlock_bits, 1, 0, &cached_state, GFP_NOFS);
-       if (outstanding_extents)
-               current->journal_info = outstanding_extents;
+       if (dio_data)
+               current->journal_info = dio_data;
         return ret;
  }
  
@@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
  {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
-       u64 outstanding_extents = 0;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_dio_data dio_data = { 0 };
         size_t count = 0;
         int flags = 0;
         bool wakeup = true;
@@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 ret = btrfs_delalloc_reserve_space(inode, count);
                 if (ret)
                         goto out;
-               outstanding_extents = div64_u64(count +
+               dio_data.outstanding_extents = div64_u64(count +
                                                 BTRFS_MAX_EXTENT_SIZE - 1,
                                                 BTRFS_MAX_EXTENT_SIZE);
  
@@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                  * do the accounting properly if we go over the number we
                  * originally calculated.  Abuse current->journal_info for this.
                  */
-               current->journal_info = &outstanding_extents;
+               dio_data.reserve = round_up(count, root->sectorsize);
+               current->journal_info = &dio_data;
         } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                      &BTRFS_I(inode)->runtime_flags)) {
                 inode_dio_end(inode);
@@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         if (iov_iter_rw(iter) == WRITE) {
                 current->journal_info = NULL;
                 if (ret < 0 && ret != -EIOCBQUEUED) {
-                       /*
-                        * If the error comes from submitting stage,
-                        * btrfs_get_blocsk_direct() has free'd data space,
-                        * and metadata space will be handled by
-                        * finish_ordered_fn, don't do that again to make
-                        * sure bytes_may_use is correct.
-                        */
-                       if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
-                                    &BTRFS_I(inode)->runtime_flags))
-                               btrfs_delalloc_release_space(inode, count);
+                       if (dio_data.reserve)
+                               btrfs_delalloc_release_space(inode,
+                                                       dio_data.reserve);
                 } else if (ret >= 0 && (size_t)ret < count)
                         btrfs_delalloc_release_space(inode,
                                                      count - (size_t)ret);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 2b07b35..11d1eab 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                  * groups on disk until we're mounted read-write again
                  * unless we clean them up here.
                  */
-               mutex_lock(&root->fs_info->cleaner_mutex);
                 btrfs_delete_unused_bgs(fs_info);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
  
                 btrfs_dev_replace_suspend_for_unmount(fs_info);
                 btrfs_scrub_cancel(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 8f259b3..74bc333 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
                         btrfs_unpin_free_ino(root);
                 clear_btree_io_tree(&root->dirty_log_pages);
         }
+
+       /* We can free old roots now. */
+       spin_lock(&trans->dropped_roots_lock);
+       while (!list_empty(&trans->dropped_roots)) {
+               root = list_first_entry(&trans->dropped_roots,
+                                       struct btrfs_root, root_list);
+               list_del_init(&root->root_list);
+               spin_unlock(&trans->dropped_roots_lock);
+               btrfs_drop_and_free_fs_root(fs_info, root);
+               spin_lock(&trans->dropped_roots_lock);
+       }
+       spin_unlock(&trans->dropped_roots_lock);
         up_write(&fs_info->commit_root_sem);
  }
  
@@ -255,11 +267,13 @@ loop:
         INIT_LIST_HEAD(&cur_trans->pending_ordered);
         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
         INIT_LIST_HEAD(&cur_trans->io_bgs);
+       INIT_LIST_HEAD(&cur_trans->dropped_roots);
         mutex_init(&cur_trans->cache_write_mutex);
         cur_trans->num_dirty_bgs = 0;
         spin_lock_init(&cur_trans->dirty_bgs_lock);
         INIT_LIST_HEAD(&cur_trans->deleted_bgs);
         spin_lock_init(&cur_trans->deleted_bgs_lock);
+       spin_lock_init(&cur_trans->dropped_roots_lock);
         list_add_tail(&cur_trans->list, &fs_info->trans_list);
         extent_io_tree_init(&cur_trans->dirty_pages,
                              fs_info->btree_inode->i_mapping);
@@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
  }
  
  
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+
+       /* Add ourselves to the transaction dropped list */
+       spin_lock(&cur_trans->dropped_roots_lock);
+       list_add_tail(&root->root_list, &cur_trans->dropped_roots);
+       spin_unlock(&cur_trans->dropped_roots_lock);
+
+       /* Make sure we don't try to update the root at commit time */
+       spin_lock(&root->fs_info->fs_roots_radix_lock);
+       radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
+                            (unsigned long)root->root_key.objectid,
+                            BTRFS_ROOT_TRANS_TAG);
+       spin_unlock(&root->fs_info->fs_roots_radix_lock);
+}
+
  int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root)
  {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index edc2fbc..87964bf 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -65,6 +65,7 @@ struct btrfs_transaction {
         struct list_head switch_commits;
         struct list_head dirty_bgs;
         struct list_head io_bgs;
+       struct list_head dropped_roots;
         u64 num_dirty_bgs;
  
         /*
@@ -76,6 +77,7 @@ struct btrfs_transaction {
         spinlock_t dirty_bgs_lock;
         struct list_head deleted_bgs;
         spinlock_t deleted_bgs_lock;
+       spinlock_t dropped_roots_lock;
         struct btrfs_delayed_ref_root delayed_refs;
         int aborted;
         int dirty_bg_run;
@@ -216,5 +218,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);
  int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
  void btrfs_put_transaction(struct btrfs_transaction *transaction);
  void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
-
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root);
  #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
fs/btrfs/btrfs_inode.h		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/transaction.h		patch \| blob \| history