OSDN Git Service

Merge tag 'for-5.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Dec 2019 21:27:02 +0000 (13:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Dec 2019 21:27:02 +0000 (13:27 -0800)
Pull btrfs fixes from David Sterba:
 "A mix of regression fixes and regular fixes for stable trees:

   - fix swapped error messages for qgroup enable/rescan

   - fixes for NO_HOLES feature with clone range

   - fix deadlock between iget/srcu lock/synchronize srcu while freeing
     an inode

   - fix double lock on subvolume cross-rename

   - tree log fixes
      * fix missing data checksums after replaying a log tree
      * also teach tree-checker about this problem
      * skip log replay on orphaned roots

   - fix maximum devices constraints for RAID1C -3 and -4

   - send: don't print warning on read-only mount regarding orphan
     cleanup

   - error handling fixes"

* tag 'for-5.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: send: remove WARN_ON for readonly mount
  btrfs: do not leak reloc root if we fail to read the fs root
  btrfs: skip log replay on orphaned roots
  btrfs: handle ENOENT in btrfs_uuid_tree_iterate
  btrfs: abort transaction after failed inode updates in create_subvol
  Btrfs: fix hole extent items with a zero size after range cloning
  Btrfs: fix removal logic of the tree mod log that leads to use-after-free issues
  Btrfs: make tree checker detect checksum items with overlapping ranges
  Btrfs: fix missing data checksums after replaying a log tree
  btrfs: return error pointer from alloc_test_extent_buffer
  btrfs: fix devs_max constraints for raid1c3 and raid1c4
  btrfs: tree-checker: Fix error format string for size_t
  btrfs: don't double lock the subvol_sem for rename exchange
  btrfs: handle error in btrfs_cache_block_group
  btrfs: do not call synchronize_srcu() in inode_tree_del
  Btrfs: fix cloning range with a hole when using the NO_HOLES feature
  btrfs: Fix error messages in qgroup_rescan_init

17 files changed:
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/tests/free-space-tree-tests.c
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/tree-checker.c
fs/btrfs/tree-log.c
fs/btrfs/uuid-tree.c
fs/btrfs/volumes.c

index 5b6e86a..24658b5 100644 (file)
@@ -379,7 +379,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
        for (node = rb_first(tm_root); node; node = next) {
                next = rb_next(node);
                tm = rb_entry(node, struct tree_mod_elem, node);
-               if (tm->seq > min_seq)
+               if (tm->seq >= min_seq)
                        continue;
                rb_erase(node, tm_root);
                kfree(tm);
index b2e8fd8..54efb21 100644 (file)
@@ -2787,7 +2787,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
 /* file-item.c */
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
-                   struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
+                   struct btrfs_root *root, u64 bytenr, u64 len);
 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
                                   u8 *dst);
 blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
index 153f71a..274318e 100644 (file)
@@ -1869,8 +1869,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
                btrfs_pin_extent(fs_info, head->bytenr,
                                 head->num_bytes, 1);
                if (head->is_data) {
-                       ret = btrfs_del_csums(trans, fs_info, head->bytenr,
-                                             head->num_bytes);
+                       ret = btrfs_del_csums(trans, fs_info->csum_root,
+                                             head->bytenr, head->num_bytes);
                }
        }
 
@@ -3175,7 +3175,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                btrfs_release_path(path);
 
                if (is_data) {
-                       ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
+                       ret = btrfs_del_csums(trans, info->csum_root, bytenr,
+                                             num_bytes);
                        if (ret) {
                                btrfs_abort_transaction(trans, ret);
                                goto out;
@@ -3799,6 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
                                u64 flags, int delalloc)
 {
        int ret = 0;
+       int cache_block_group_error = 0;
        struct btrfs_free_cluster *last_ptr = NULL;
        struct btrfs_block_group *block_group = NULL;
        struct find_free_extent_ctl ffe_ctl = {0};
@@ -3958,7 +3960,20 @@ have_block_group:
                if (unlikely(!ffe_ctl.cached)) {
                        ffe_ctl.have_caching_bg = true;
                        ret = btrfs_cache_block_group(block_group, 0);
-                       BUG_ON(ret < 0);
+
+                       /*
+                        * If we get ENOMEM here or something else we want to
+                        * try other block groups, because it may not be fatal.
+                        * However if we can't find anything else we need to
+                        * save our return here so that we return the actual
+                        * error that caused problems, not ENOSPC.
+                        */
+                       if (ret < 0) {
+                               if (!cache_block_group_error)
+                                       cache_block_group_error = ret;
+                               ret = 0;
+                               goto loop;
+                       }
                        ret = 0;
                }
 
@@ -4045,7 +4060,7 @@ loop:
        if (ret > 0)
                goto search;
 
-       if (ret == -ENOSPC) {
+       if (ret == -ENOSPC && !cache_block_group_error) {
                /*
                 * Use ffe_ctl->total_free_space as fallback if we can't find
                 * any contiguous hole.
@@ -4056,6 +4071,8 @@ loop:
                space_info->max_extent_size = ffe_ctl.max_extent_size;
                spin_unlock(&space_info->lock);
                ins->offset = ffe_ctl.max_extent_size;
+       } else if (ret == -ENOSPC) {
+               ret = cache_block_group_error;
        }
        return ret;
 }
index eb8bd02..2f4802f 100644 (file)
@@ -5074,12 +5074,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
                return eb;
        eb = alloc_dummy_extent_buffer(fs_info, start);
        if (!eb)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
        eb->fs_info = fs_info;
 again:
        ret = radix_tree_preload(GFP_NOFS);
-       if (ret)
+       if (ret) {
+               exists = ERR_PTR(ret);
                goto free_eb;
+       }
        spin_lock(&fs_info->buffer_lock);
        ret = radix_tree_insert(&fs_info->buffer_radix,
                                start >> PAGE_SHIFT, eb);
index 3270a40..b1bfdc5 100644 (file)
@@ -590,9 +590,9 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
  * range of bytes.
  */
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
-                   struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+                   struct btrfs_root *root, u64 bytenr, u64 len)
 {
-       struct btrfs_root *root = fs_info->csum_root;
+       struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_path *path;
        struct btrfs_key key;
        u64 end_byte = bytenr + len;
@@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
        u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int blocksize_bits = fs_info->sb->s_blocksize_bits;
 
+       ASSERT(root == fs_info->csum_root ||
+              root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
index 0cb43b6..8d47c76 100644 (file)
@@ -2599,8 +2599,8 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
                        }
                }
 
-               if (clone_info) {
-                       u64 clone_len = drop_end - cur_offset;
+               if (clone_info && drop_end > clone_info->file_offset) {
+                       u64 clone_len = drop_end - clone_info->file_offset;
 
                        ret = btrfs_insert_clone_extent(trans, inode, path,
                                                        clone_info, clone_len);
index 56032c5..e3c7664 100644 (file)
@@ -5728,7 +5728,6 @@ static void inode_tree_add(struct inode *inode)
 
 static void inode_tree_del(struct inode *inode)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int empty = 0;
 
@@ -5741,7 +5740,6 @@ static void inode_tree_del(struct inode *inode)
        spin_unlock(&root->inode_lock);
 
        if (empty && btrfs_root_refs(&root->root_item) == 0) {
-               synchronize_srcu(&fs_info->subvol_srcu);
                spin_lock(&root->inode_lock);
                empty = RB_EMPTY_ROOT(&root->inode_tree);
                spin_unlock(&root->inode_lock);
@@ -9556,9 +9554,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        btrfs_init_log_ctx(&ctx_dest, new_inode);
 
        /* close the race window with snapshot create/destroy ioctl */
-       if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
-               down_read(&fs_info->subvol_sem);
-       if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+       if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+           new_ino == BTRFS_FIRST_FREE_OBJECTID)
                down_read(&fs_info->subvol_sem);
 
        /*
@@ -9792,9 +9789,8 @@ out_fail:
                ret = ret ? ret : ret2;
        }
 out_notrans:
-       if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
-               up_read(&fs_info->subvol_sem);
-       if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+       if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
+           old_ino == BTRFS_FIRST_FREE_OBJECTID)
                up_read(&fs_info->subvol_sem);
 
        ASSERT(list_empty(&ctx_root.list));
index a1ee0b7..18e328c 100644 (file)
@@ -704,11 +704,17 @@ static noinline int create_subvol(struct inode *dir,
 
        btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
        ret = btrfs_update_inode(trans, root, dir);
-       BUG_ON(ret);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+       }
 
        ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
                                 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
-       BUG_ON(ret);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto fail;
+       }
 
        ret = btrfs_uuid_tree_add(trans, root_item->uuid,
                                  BTRFS_UUID_KEY_SUBVOL, objectid);
@@ -3720,24 +3726,18 @@ process_slot:
        ret = 0;
 
        if (last_dest_end < destoff + len) {
-               struct btrfs_clone_extent_info clone_info = { 0 };
                /*
-                * We have an implicit hole (NO_HOLES feature is enabled) that
-                * fully or partially overlaps our cloning range at its end.
+                * We have an implicit hole that fully or partially overlaps our
+                * cloning range at its end. This means that we either have the
+                * NO_HOLES feature enabled or the implicit hole happened due to
+                * mixing buffered and direct IO writes against this file.
                 */
                btrfs_release_path(path);
                path->leave_spinning = 0;
 
-               /*
-                * We are dealing with a hole and our clone_info already has a
-                * disk_offset of 0, we only need to fill the data length and
-                * file offset.
-                */
-               clone_info.data_len = destoff + len - last_dest_end;
-               clone_info.file_offset = last_dest_end;
                ret = btrfs_punch_hole_range(inode, path,
                                             last_dest_end, destoff + len - 1,
-                                            &clone_info, &trans);
+                                            NULL, &trans);
                if (ret)
                        goto out;
 
index 93aeb2e..d4282e1 100644 (file)
@@ -3232,12 +3232,12 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                if (!(fs_info->qgroup_flags &
                      BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
                        btrfs_warn(fs_info,
-                       "qgroup rescan init failed, qgroup is not enabled");
+                       "qgroup rescan init failed, qgroup rescan is not queued");
                        ret = -EINVAL;
                } else if (!(fs_info->qgroup_flags &
                             BTRFS_QGROUP_STATUS_FLAG_ON)) {
                        btrfs_warn(fs_info,
-                       "qgroup rescan init failed, qgroup rescan is not queued");
+                       "qgroup rescan init failed, qgroup is not enabled");
                        ret = -EINVAL;
                }
 
index d897a8e..c582457 100644 (file)
@@ -4552,6 +4552,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
                fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
                if (IS_ERR(fs_root)) {
                        err = PTR_ERR(fs_root);
+                       list_add_tail(&reloc_root->root_list, &reloc_roots);
                        goto out_free;
                }
 
index ae2db5e..091e5bc 100644 (file)
@@ -7084,12 +7084,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
        spin_unlock(&send_root->root_item_lock);
 
        /*
-        * This is done when we lookup the root, it should already be complete
-        * by the time we get here.
-        */
-       WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
-
-       /*
         * Userspace tools do the checks and warn the user if it's
         * not RO.
         */
index 1a846bf..914eea5 100644 (file)
@@ -452,9 +452,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
        root->fs_info->tree_root = root;
 
        root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
-       if (!root->node) {
+       if (IS_ERR(root->node)) {
                test_std_err(TEST_ALLOC_EXTENT_BUFFER);
-               ret = -ENOMEM;
+               ret = PTR_ERR(root->node);
                goto out;
        }
        btrfs_set_header_level(root->node, 0);
index 09aaca1..ac035a6 100644 (file)
@@ -484,9 +484,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
         * *cough*backref walking code*cough*
         */
        root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
-       if (!root->node) {
+       if (IS_ERR(root->node)) {
                test_err("couldn't allocate dummy buffer");
-               ret = -ENOMEM;
+               ret = PTR_ERR(root->node);
                goto out;
        }
        btrfs_set_header_level(root->node, 0);
index 493d4d9..97f3520 100644 (file)
@@ -227,7 +227,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
         */
        if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
                file_extent_err(leaf, slot,
-                               "invalid item size, have %u expect [%lu, %u)",
+                               "invalid item size, have %u expect [%zu, %u)",
                                item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
                                SZ_4K);
                return -EUCLEAN;
@@ -332,7 +332,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
 }
 
 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
-                          int slot)
+                          int slot, struct btrfs_key *prev_key)
 {
        struct btrfs_fs_info *fs_info = leaf->fs_info;
        u32 sectorsize = fs_info->sectorsize;
@@ -356,6 +356,20 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
                        btrfs_item_size_nr(leaf, slot), csumsize);
                return -EUCLEAN;
        }
+       if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
+               u64 prev_csum_end;
+               u32 prev_item_size;
+
+               prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
+               prev_csum_end = (prev_item_size / csumsize) * sectorsize;
+               prev_csum_end += prev_key->offset;
+               if (prev_csum_end > key->offset) {
+                       generic_err(leaf, slot - 1,
+"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
+                                   prev_csum_end, key->offset);
+                       return -EUCLEAN;
+               }
+       }
        return 0;
 }
 
@@ -1355,7 +1369,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
                ret = check_extent_data_item(leaf, key, slot, prev_key);
                break;
        case BTRFS_EXTENT_CSUM_KEY:
-               ret = check_csum_item(leaf, key, slot);
+               ret = check_csum_item(leaf, key, slot, prev_key);
                break;
        case BTRFS_DIR_ITEM_KEY:
        case BTRFS_DIR_INDEX_KEY:
index 6f75736..d3f1159 100644 (file)
@@ -808,7 +808,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                                                struct btrfs_ordered_sum,
                                                list);
                                if (!ret)
-                                       ret = btrfs_del_csums(trans, fs_info,
+                                       ret = btrfs_del_csums(trans,
+                                                             fs_info->csum_root,
                                                              sums->bytenr,
                                                              sums->len);
                                if (!ret)
@@ -3909,6 +3910,28 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static int log_csums(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *log_root,
+                    struct btrfs_ordered_sum *sums)
+{
+       int ret;
+
+       /*
+        * Due to extent cloning, we might have logged a csum item that covers a
+        * subrange of a cloned extent, and later we can end up logging a csum
+        * item for a larger subrange of the same extent or the entire range.
+        * This would leave csum items in the log tree that cover the same range
+        * and break the searches for checksums in the log tree, resulting in
+        * some checksums missing in the fs/subvolume tree. So just delete (or
+        * trim and adjust) any existing csum items in the log for this range.
+        */
+       ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
+       if (ret)
+               return ret;
+
+       return btrfs_csum_file_blocks(trans, log_root, sums);
+}
+
 static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct btrfs_inode *inode,
                               struct btrfs_path *dst_path,
@@ -4054,7 +4077,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                   struct btrfs_ordered_sum,
                                                   list);
                if (!ret)
-                       ret = btrfs_csum_file_blocks(trans, log, sums);
+                       ret = log_csums(trans, log, sums);
                list_del(&sums->list);
                kfree(sums);
        }
@@ -4274,7 +4297,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
                                                   struct btrfs_ordered_sum,
                                                   list);
                if (!ret)
-                       ret = btrfs_csum_file_blocks(trans, log_root, sums);
+                       ret = log_csums(trans, log_root, sums);
                list_del(&sums->list);
                kfree(sums);
        }
@@ -6294,9 +6317,28 @@ again:
                wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
                if (IS_ERR(wc.replay_dest)) {
                        ret = PTR_ERR(wc.replay_dest);
+
+                       /*
+                        * We didn't find the subvol, likely because it was
+                        * deleted.  This is ok, simply skip this log and go to
+                        * the next one.
+                        *
+                        * We need to exclude the root because we can't have
+                        * other log replays overwriting this log as we'll read
+                        * it back in a few more times.  This will keep our
+                        * block from being modified, and we'll just bail for
+                        * each subsequent pass.
+                        */
+                       if (ret == -ENOENT)
+                               ret = btrfs_pin_extent_for_log_replay(fs_info,
+                                                       log->node->start,
+                                                       log->node->len);
                        free_extent_buffer(log->node);
                        free_extent_buffer(log->commit_root);
                        kfree(log);
+
+                       if (!ret)
+                               goto next;
                        btrfs_handle_fs_error(fs_info, ret,
                                "Couldn't read target root for tree log recovery.");
                        goto error;
@@ -6328,7 +6370,6 @@ again:
                                                  &root->highest_objectid);
                }
 
-               key.offset = found_key.offset - 1;
                wc.replay_dest->log_root = NULL;
                free_extent_buffer(log->node);
                free_extent_buffer(log->commit_root);
@@ -6336,9 +6377,10 @@ again:
 
                if (ret)
                        goto error;
-
+next:
                if (found_key.offset == 0)
                        break;
+               key.offset = found_key.offset - 1;
        }
        btrfs_release_path(path);
 
index 91caab6..76b84f2 100644 (file)
@@ -324,6 +324,8 @@ again_search_slot:
                                }
                                if (ret < 0 && ret != -ENOENT)
                                        goto out;
+                               key.offset++;
+                               goto again_search_slot;
                        }
                        item_size -= sizeof(subid_le);
                        offset += sizeof(subid_le);
index d8e5560..a6d3f08 100644 (file)
@@ -61,7 +61,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID1C3] = {
                .sub_stripes    = 1,
                .dev_stripes    = 1,
-               .devs_max       = 0,
+               .devs_max       = 3,
                .devs_min       = 3,
                .tolerated_failures = 2,
                .devs_increment = 3,
@@ -73,7 +73,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID1C4] = {
                .sub_stripes    = 1,
                .dev_stripes    = 1,
-               .devs_max       = 0,
+               .devs_max       = 4,
                .devs_min       = 4,
                .tolerated_failures = 3,
                .devs_increment = 4,