OSDN Git Service

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[android-x86/kernel.git] / fs / btrfs / extent-tree.c
index a684086..571f402 100644 (file)
@@ -74,8 +74,9 @@ enum {
        RESERVE_ALLOC_NO_ACCOUNT = 2,
 };
 
-static int update_block_group(struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc);
+static int update_block_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 bytenr,
+                             u64 num_bytes, int alloc);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
                         */
                        ret = 0;
                }
-               kfree(bbio);
+               btrfs_put_bbio(bbio);
        }
 
        if (actual_bytes)
@@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_head *head;
        int ret;
        int run_all = count == (unsigned long)-1;
-       int run_most = 0;
 
        /* We'll clean this up in btrfs_cleanup_transaction */
        if (trans->aborted)
@@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                root = root->fs_info->tree_root;
 
        delayed_refs = &trans->transaction->delayed_refs;
-       if (count == 0) {
+       if (count == 0)
                count = atomic_read(&delayed_refs->num_entries) * 2;
-               run_most = 1;
-       }
 
 again:
 #ifdef SCRAMBLE_DELAYED_REFS
@@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
        struct btrfs_block_group_cache *cache;
-       int err = 0;
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       int ret = 0;
        struct btrfs_path *path;
-       u64 last = 0;
+
+       if (list_empty(&cur_trans->dirty_bgs))
+               return 0;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-again:
-       while (1) {
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       if (cache->disk_cache_state == BTRFS_DC_CLEAR)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-               err = cache_save_setup(cache, trans, path);
-               last = cache->key.objectid + cache->key.offset;
-               btrfs_put_block_group(cache);
-       }
-
-       while (1) {
-               if (last == 0) {
-                       err = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long)-1);
-                       if (err) /* File system offline */
-                               goto out;
-               }
-
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
-                               btrfs_put_block_group(cache);
-                               goto again;
-                       }
-
-                       if (cache->dirty)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-
-               if (cache->disk_cache_state == BTRFS_DC_SETUP)
-                       cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
-               cache->dirty = 0;
-               last = cache->key.objectid + cache->key.offset;
-
-               err = write_one_cache_group(trans, root, path, cache);
-               btrfs_put_block_group(cache);
-               if (err) /* File system offline */
-                       goto out;
-       }
-
-       while (1) {
-               /*
-                * I don't think this is needed since we're just marking our
-                * preallocated extent as written, but just in case it can't
-                * hurt.
-                */
-               if (last == 0) {
-                       err = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long)-1);
-                       if (err) /* File system offline */
-                               goto out;
-               }
-
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               while (cache) {
-                       /*
-                        * Really this shouldn't happen, but it could if we
-                        * couldn't write the entire preallocated extent and
-                        * splitting the extent resulted in a new block.
-                        */
-                       if (cache->dirty) {
-                               btrfs_put_block_group(cache);
-                               goto again;
-                       }
-                       if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
-                               break;
-                       cache = next_block_group(root, cache);
-               }
-               if (!cache) {
-                       if (last == 0)
-                               break;
-                       last = 0;
-                       continue;
-               }
-
-               err = btrfs_write_out_cache(root, trans, cache, path);
-
-               /*
-                * If we didn't have an error then the cache state is still
-                * NEED_WRITE, so we can set it to WRITTEN.
-                */
-               if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
-                       cache->disk_cache_state = BTRFS_DC_WRITTEN;
-               last = cache->key.objectid + cache->key.offset;
+       /*
+        * We don't need the lock here since we are protected by the transaction
+        * commit.  We want to do the cache_save_setup first and then run the
+        * delayed refs to make sure we have the best chance at doing this all
+        * in one shot.
+        */
+       while (!list_empty(&cur_trans->dirty_bgs)) {
+               cache = list_first_entry(&cur_trans->dirty_bgs,
+                                        struct btrfs_block_group_cache,
+                                        dirty_list);
+               list_del_init(&cache->dirty_list);
+               if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                       cache_save_setup(cache, trans, path);
+               if (!ret)
+                       ret = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long) -1);
+               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
+                       btrfs_write_out_cache(root, trans, cache, path);
+               if (!ret)
+                       ret = write_one_cache_group(trans, root, path, cache);
                btrfs_put_block_group(cache);
        }
-out:
 
        btrfs_free_path(path);
-       return err;
+       return ret;
 }
 
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
 /**
  * drop_outstanding_extent - drop an outstanding extent
  * @inode: the inode we're dropping the extent for
+ * @num_bytes: the number of bytes we're relaseing.
  *
  * This is called when we are freeing up an outstanding extent, either called
  * after an error or after an extent is written.  This will return the number of
  * reserved extents that need to be freed.  This must be called with
  * BTRFS_I(inode)->lock held.
  */
-static unsigned drop_outstanding_extent(struct inode *inode)
+static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
 {
        unsigned drop_inode_space = 0;
        unsigned dropped_extents = 0;
+       unsigned num_extents = 0;
 
-       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
-       BTRFS_I(inode)->outstanding_extents--;
+       num_extents = (unsigned)div64_u64(num_bytes +
+                                         BTRFS_MAX_EXTENT_SIZE - 1,
+                                         BTRFS_MAX_EXTENT_SIZE);
+       ASSERT(num_extents);
+       ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
+       BTRFS_I(inode)->outstanding_extents -= num_extents;
 
        if (BTRFS_I(inode)->outstanding_extents == 0 &&
            test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 out_fail:
        spin_lock(&BTRFS_I(inode)->lock);
-       dropped = drop_outstanding_extent(inode);
+       dropped = drop_outstanding_extent(inode, num_bytes);
        /*
         * If the inodes csum_bytes is the same as the original
         * csum_bytes then we know we haven't raced with any free()ers
@@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
        spin_lock(&BTRFS_I(inode)->lock);
-       dropped = drop_outstanding_extent(inode);
+       dropped = drop_outstanding_extent(inode, num_bytes);
 
        if (num_bytes)
                to_free = calc_csum_metadata_size(inode, num_bytes, 0);
@@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
        btrfs_free_reserved_data_space(inode, num_bytes);
 }
 
-static int update_block_group(struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc)
+static int update_block_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root, u64 bytenr,
+                             u64 num_bytes, int alloc)
 {
        struct btrfs_block_group_cache *cache = NULL;
        struct btrfs_fs_info *info = root->fs_info;
@@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root,
                if (!alloc && cache->cached == BTRFS_CACHE_NO)
                        cache_block_group(cache, 1);
 
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+               if (list_empty(&cache->dirty_list)) {
+                       list_add_tail(&cache->dirty_list,
+                                     &trans->transaction->dirty_bgs);
+                       btrfs_get_block_group(cache);
+               }
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
 
@@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root,
                    cache->disk_cache_state < BTRFS_DC_CLEAR)
                        cache->disk_cache_state = BTRFS_DC_CLEAR;
 
-               cache->dirty = 1;
                old_val = btrfs_block_group_used(&cache->item);
                num_bytes = min(total, cache->key.offset - byte_in_group);
                if (alloc) {
@@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                unpin = &fs_info->freed_extents[0];
 
        while (1) {
+               mutex_lock(&fs_info->unused_bg_unpin_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY, NULL);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
+               }
 
                if (btrfs_test_opt(root, DISCARD))
                        ret = btrfs_discard_extent(root, start,
@@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                unpin_extent_range(root, start, end, true);
+               mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                cond_resched();
        }
 
@@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        }
                }
 
-               ret = update_block_group(root, bytenr, num_bytes, 0);
+               ret = update_block_group(trans, root, bytenr, num_bytes, 0);
                if (ret) {
                        btrfs_abort_transaction(trans, extent_root, ret);
                        goto out;
@@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           struct extent_buffer *buf,
                           u64 parent, int last_ref)
 {
-       struct btrfs_block_group_cache *cache = NULL;
        int pin = 1;
        int ret;
 
@@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        if (!last_ref)
                return;
 
-       cache = btrfs_lookup_block_group(root->fs_info, buf->start);
-
        if (btrfs_header_generation(buf) == trans->transid) {
+               struct btrfs_block_group_cache *cache;
+
                if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
                        ret = check_ref_cleanup(trans, root, buf->start);
                        if (!ret)
                                goto out;
                }
 
+               cache = btrfs_lookup_block_group(root->fs_info, buf->start);
+
                if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
                        pin_down_extent(root, cache, buf->start, buf->len, 1);
+                       btrfs_put_block_group(cache);
                        goto out;
                }
 
@@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
                btrfs_add_free_space(cache, buf->start, buf->len);
                btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+               btrfs_put_block_group(cache);
                trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
                pin = 0;
        }
@@ -6253,7 +6194,6 @@ out:
         * anymore.
         */
        clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
-       btrfs_put_block_group(cache);
 }
 
 /* Can return -ENOMEM */
@@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       ret = update_block_group(root, ins->objectid, ins->offset, 1);
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
                        ins->objectid, ins->offset);
@@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                        return ret;
        }
 
-       ret = update_block_group(root, ins->objectid, root->nodesize, 1);
+       ret = update_block_group(trans, root, ins->objectid, root->nodesize,
+                                1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
                        ins->objectid, ins->offset);
@@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 
 static struct extent_buffer *
 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                     u64 bytenr, u32 blocksize, int level)
+                     u64 bytenr, int level)
 {
        struct extent_buffer *buf;
 
-       buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+       buf = btrfs_find_create_tree_block(root, bytenr);
        if (!buf)
                return ERR_PTR(-ENOMEM);
        btrfs_set_header_generation(buf, trans->transid);
@@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 
        if (btrfs_test_is_dummy_root(root)) {
                buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
-                                           blocksize, level);
+                                           level);
                if (!IS_ERR(buf))
                        root->alloc_bytenr += blocksize;
                return buf;
@@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
                return ERR_PTR(ret);
        }
 
-       buf = btrfs_init_new_buffer(trans, root, ins.objectid,
-                                   blocksize, level);
+       buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
        BUG_ON(IS_ERR(buf)); /* -ENOMEM */
 
        if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
@@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                                continue;
                }
 reada:
-               readahead_tree_block(root, bytenr, blocksize);
+               readahead_tree_block(root, bytenr);
                nread++;
        }
        wc->reada_slot = slot;
@@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
        next = btrfs_find_tree_block(root, bytenr);
        if (!next) {
-               next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+               next = btrfs_find_create_tree_block(root, bytenr);
                if (!next)
                        return -ENOMEM;
                btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
@@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       alloc_flags = update_block_group_flags(root, cache->flags);
-       if (alloc_flags != cache->flags) {
-               ret = do_chunk_alloc(trans, root, alloc_flags,
-                                    CHUNK_ALLOC_FORCE);
-               if (ret < 0)
-                       goto out;
-       }
-
        ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
@@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                goto out;
        ret = set_block_group_ro(cache, 0);
 out:
+       if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
+               alloc_flags = update_block_group_flags(root, cache->flags);
+               check_system_chunk(trans, root, alloc_flags);
+       }
+
        btrfs_end_transaction(trans, root);
        return ret;
 }
@@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
        INIT_LIST_HEAD(&cache->cluster_list);
        INIT_LIST_HEAD(&cache->bg_list);
        INIT_LIST_HEAD(&cache->ro_list);
+       INIT_LIST_HEAD(&cache->dirty_list);
        btrfs_init_free_space_ctl(cache);
        atomic_set(&cache->trimming, 0);
 
@@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                         * b) Setting 'dirty flag' makes sure that we flush
                         *    the new space cache info onto disk.
                         */
-                       cache->disk_cache_state = BTRFS_DC_CLEAR;
                        if (btrfs_test_opt(root, SPACE_CACHE))
-                               cache->dirty = 1;
+                               cache->disk_cache_state = BTRFS_DC_CLEAR;
                }
 
                read_extent_buffer(leaf, &cache->item,
@@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                }
        }
 
+       spin_lock(&trans->transaction->dirty_bgs_lock);
+       if (!list_empty(&block_group->dirty_list)) {
+               list_del_init(&block_group->dirty_list);
+               btrfs_put_block_group(block_group);
+       }
+       spin_unlock(&trans->transaction->dirty_bgs_lock);
+
        btrfs_remove_free_space_cache(block_group);
 
        spin_lock(&block_group->space_info->lock);
@@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 * Want to do this before we do anything else so we can recover
                 * properly if we fail to join the transaction.
                 */
-               trans = btrfs_join_transaction(root);
+               /* 1 for btrfs_orphan_reserve_metadata() */
+               trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
                        btrfs_set_block_group_rw(root, block_group);
                        ret = PTR_ERR(trans);
@@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 */
                start = block_group->key.objectid;
                end = start + block_group->key.offset - 1;
+               /*
+                * Hold the unused_bg_unpin_mutex lock to avoid racing with
+                * btrfs_finish_extent_commit(). If we are at transaction N,
+                * another task might be running finish_extent_commit() for the
+                * previous transaction N - 1, and have seen a range belonging
+                * to the block group in freed_extents[] before we were able to
+                * clear the whole block group range from freed_extents[]. This
+                * means that task can lookup for the block group after we
+                * unpinned it from freed_extents[] and removed it, leading to
+                * a BUG_ON() at btrfs_unpin_extent_range().
+                */
+               mutex_lock(&fs_info->unused_bg_unpin_mutex);
                ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
                                  EXTENT_DIRTY, GFP_NOFS);
                if (ret) {
+                       mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        btrfs_set_block_group_rw(root, block_group);
                        goto end_trans;
                }
                ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
                                  EXTENT_DIRTY, GFP_NOFS);
                if (ret) {
+                       mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        btrfs_set_block_group_rw(root, block_group);
                        goto end_trans;
                }
+               mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 
                /* Reset pinned so btrfs_put_block_group doesn't complain */
                block_group->pinned = 0;