OSDN Git Service

Btrfs: add tree modification log functions
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / fs / btrfs / ctree.c
index e697afd..72b9f97 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/rbtree.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -156,10 +157,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
 {
        struct extent_buffer *eb;
 
-       rcu_read_lock();
-       eb = rcu_dereference(root->node);
-       extent_buffer_get(eb);
-       rcu_read_unlock();
+       while (1) {
+               rcu_read_lock();
+               eb = rcu_dereference(root->node);
+
+               /*
+                * RCU really hurts here, we could free up the root node because
+                * it was cow'ed but we may not get the new root node yet so do
+                * the inc_not_zero dance and if it doesn't work then
+                * synchronize_rcu and try again.
+                */
+               if (atomic_inc_not_zero(&eb->refs)) {
+                       rcu_read_unlock();
+                       break;
+               }
+               rcu_read_unlock();
+               synchronize_rcu();
+       }
        return eb;
 }
 
@@ -207,10 +221,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
  */
 static void add_root_to_dirty_list(struct btrfs_root *root)
 {
+       spin_lock(&root->fs_info->trans_lock);
        if (root->track_dirty && list_empty(&root->dirty_list)) {
                list_add(&root->dirty_list,
                         &root->fs_info->dirty_cowonly_roots);
        }
+       spin_unlock(&root->fs_info->trans_lock);
 }
 
 /*
@@ -240,7 +256,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
        cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
                                     new_root_objectid, &disk_key, level,
-                                    buf->start, 0, 1);
+                                    buf->start, 0);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -273,6 +289,412 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+enum mod_log_op {
+       MOD_LOG_KEY_REPLACE,
+       MOD_LOG_KEY_ADD,
+       MOD_LOG_KEY_REMOVE,
+       MOD_LOG_KEY_REMOVE_WHILE_FREEING,
+       MOD_LOG_KEY_REMOVE_WHILE_MOVING,
+       MOD_LOG_MOVE_KEYS,
+       MOD_LOG_ROOT_REPLACE,
+};
+
+struct tree_mod_move {
+       int dst_slot;
+       int nr_items;
+};
+
+struct tree_mod_root {
+       u64 logical;
+       u8 level;
+};
+
+struct tree_mod_elem {
+       struct rb_node node;
+       u64 index;              /* shifted logical */
+       struct seq_list elem;
+       enum mod_log_op op;
+
+       /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
+       int slot;
+
+       /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
+       u64 generation;
+
+       /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
+       struct btrfs_disk_key key;
+       u64 blockptr;
+
+       /* this is used for op == MOD_LOG_MOVE_KEYS */
+       struct tree_mod_move move;
+
+       /* this is used for op == MOD_LOG_ROOT_REPLACE */
+       struct tree_mod_root old_root;
+};
+
+static inline void
+__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
+{
+       elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
+       list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
+}
+
+void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
+                           struct seq_list *elem)
+{
+       elem->flags = 1;
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       __get_tree_mod_seq(fs_info, elem);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+}
+
+void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
+                           struct seq_list *elem)
+{
+       struct rb_root *tm_root;
+       struct rb_node *node;
+       struct rb_node *next;
+       struct seq_list *cur_elem;
+       struct tree_mod_elem *tm;
+       u64 min_seq = (u64)-1;
+       u64 seq_putting = elem->seq;
+
+       if (!seq_putting)
+               return;
+
+       BUG_ON(!(elem->flags & 1));
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       list_del(&elem->list);
+
+       list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
+               if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
+                       if (seq_putting > cur_elem->seq) {
+                               /*
+                                * blocker with lower sequence number exists, we
+                                * cannot remove anything from the log
+                                */
+                               goto out;
+                       }
+                       min_seq = cur_elem->seq;
+               }
+       }
+
+       /*
+        * anything that's lower than the lowest existing (read: blocked)
+        * sequence number can be removed from the tree.
+        */
+       write_lock(&fs_info->tree_mod_log_lock);
+       tm_root = &fs_info->tree_mod_log;
+       for (node = rb_first(tm_root); node; node = next) {
+               next = rb_next(node);
+               tm = container_of(node, struct tree_mod_elem, node);
+               if (tm->elem.seq > min_seq)
+                       continue;
+               rb_erase(node, tm_root);
+               list_del(&tm->elem.list);
+               kfree(tm);
+       }
+       write_unlock(&fs_info->tree_mod_log_lock);
+out:
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+}
+
+/*
+ * key order of the log:
+ *       index -> sequence
+ *
+ * the index is the shifted logical of the *new* root node for root replace
+ * operations, or the shifted logical of the affected block for all other
+ * operations.
+ */
+static noinline int
+__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
+{
+       struct rb_root *tm_root;
+       struct rb_node **new;
+       struct rb_node *parent = NULL;
+       struct tree_mod_elem *cur;
+       int ret = 0;
+
+       BUG_ON(!tm || !tm->elem.seq);
+
+       write_lock(&fs_info->tree_mod_log_lock);
+       tm_root = &fs_info->tree_mod_log;
+       new = &tm_root->rb_node;
+       while (*new) {
+               cur = container_of(*new, struct tree_mod_elem, node);
+               parent = *new;
+               if (cur->index < tm->index)
+                       new = &((*new)->rb_left);
+               else if (cur->index > tm->index)
+                       new = &((*new)->rb_right);
+               else if (cur->elem.seq < tm->elem.seq)
+                       new = &((*new)->rb_left);
+               else if (cur->elem.seq > tm->elem.seq)
+                       new = &((*new)->rb_right);
+               else {
+                       kfree(tm);
+                       ret = -EEXIST;
+                       goto unlock;
+               }
+       }
+
+       rb_link_node(&tm->node, parent, new);
+       rb_insert_color(&tm->node, tm_root);
+unlock:
+       write_unlock(&fs_info->tree_mod_log_lock);
+       return ret;
+}
+
+int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
+                  struct tree_mod_elem **tm_ret)
+{
+       struct tree_mod_elem *tm;
+       u64 seq = 0;
+
+       smp_mb();
+       if (list_empty(&fs_info->tree_mod_seq_list))
+               return 0;
+
+       tm = *tm_ret = kzalloc(sizeof(*tm), flags);
+       if (!tm)
+               return -ENOMEM;
+
+       __get_tree_mod_seq(fs_info, &tm->elem);
+       seq = tm->elem.seq;
+       tm->elem.flags = 0;
+
+       return seq;
+}
+
+static noinline int
+tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
+                            struct extent_buffer *eb, int slot,
+                            enum mod_log_op op, gfp_t flags)
+{
+       struct tree_mod_elem *tm;
+       int ret;
+
+       ret = tree_mod_alloc(fs_info, flags, &tm);
+       if (ret <= 0)
+               return ret;
+
+       tm->index = eb->start >> PAGE_CACHE_SHIFT;
+       if (op != MOD_LOG_KEY_ADD) {
+               btrfs_node_key(eb, &tm->key, slot);
+               tm->blockptr = btrfs_node_blockptr(eb, slot);
+       }
+       tm->op = op;
+       tm->slot = slot;
+       tm->generation = btrfs_node_ptr_generation(eb, slot);
+
+       return __tree_mod_log_insert(fs_info, tm);
+}
+
+static noinline int
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+                       int slot, enum mod_log_op op)
+{
+       return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
+}
+
+static noinline int
+tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
+                        struct extent_buffer *eb, int dst_slot, int src_slot,
+                        int nr_items, gfp_t flags)
+{
+       struct tree_mod_elem *tm;
+       int ret;
+       int i;
+
+       ret = tree_mod_alloc(fs_info, flags, &tm);
+       if (ret <= 0)
+               return ret;
+
+       for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+               ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+                                             MOD_LOG_KEY_REMOVE_WHILE_MOVING);
+               BUG_ON(ret < 0);
+       }
+
+       tm->index = eb->start >> PAGE_CACHE_SHIFT;
+       tm->slot = src_slot;
+       tm->move.dst_slot = dst_slot;
+       tm->move.nr_items = nr_items;
+       tm->op = MOD_LOG_MOVE_KEYS;
+
+       return __tree_mod_log_insert(fs_info, tm);
+}
+
+static noinline int
+tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
+                        struct extent_buffer *old_root,
+                        struct extent_buffer *new_root, gfp_t flags)
+{
+       struct tree_mod_elem *tm;
+       int ret;
+
+       ret = tree_mod_alloc(fs_info, flags, &tm);
+       if (ret <= 0)
+               return ret;
+
+       tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+       tm->old_root.logical = old_root->start;
+       tm->old_root.level = btrfs_header_level(old_root);
+       tm->generation = btrfs_header_generation(old_root);
+       tm->op = MOD_LOG_ROOT_REPLACE;
+
+       return __tree_mod_log_insert(fs_info, tm);
+}
+
+static struct tree_mod_elem *
+__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
+                     int smallest)
+{
+       struct rb_root *tm_root;
+       struct rb_node *node;
+       struct tree_mod_elem *cur = NULL;
+       struct tree_mod_elem *found = NULL;
+       u64 index = start >> PAGE_CACHE_SHIFT;
+
+       read_lock(&fs_info->tree_mod_log_lock);
+       tm_root = &fs_info->tree_mod_log;
+       node = tm_root->rb_node;
+       while (node) {
+               cur = container_of(node, struct tree_mod_elem, node);
+               if (cur->index < index) {
+                       node = node->rb_left;
+               } else if (cur->index > index) {
+                       node = node->rb_right;
+               } else if (cur->elem.seq < min_seq) {
+                       node = node->rb_left;
+               } else if (!smallest) {
+                       /* we want the node with the highest seq */
+                       if (found)
+                               BUG_ON(found->elem.seq > cur->elem.seq);
+                       found = cur;
+                       node = node->rb_left;
+               } else if (cur->elem.seq > min_seq) {
+                       /* we want the node with the smallest seq */
+                       if (found)
+                               BUG_ON(found->elem.seq < cur->elem.seq);
+                       found = cur;
+                       node = node->rb_right;
+               } else {
+                       found = cur;
+                       break;
+               }
+       }
+       read_unlock(&fs_info->tree_mod_log_lock);
+
+       return found;
+}
+
+/*
+ * this returns the element from the log with the smallest time sequence
+ * value that's in the log (the oldest log item). any element with a time
+ * sequence lower than min_seq will be ignored.
+ */
+static struct tree_mod_elem *
+tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
+                          u64 min_seq)
+{
+       return __tree_mod_log_search(fs_info, start, min_seq, 1);
+}
+
+/*
+ * this returns the element from the log with the largest time sequence
+ * value that's in the log (the most recent log item). any element with
+ * a time sequence lower than min_seq will be ignored.
+ */
+static struct tree_mod_elem *
+tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
+{
+       return __tree_mod_log_search(fs_info, start, min_seq, 0);
+}
+
+static inline void
+tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+                    struct extent_buffer *src, unsigned long dst_offset,
+                    unsigned long src_offset, int nr_items)
+{
+       int ret;
+       int i;
+
+       smp_mb();
+       if (list_empty(&fs_info->tree_mod_seq_list))
+               return;
+
+       if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
+               return;
+
+       /* speed this up by single seq for all operations? */
+       for (i = 0; i < nr_items; i++) {
+               ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
+                                             MOD_LOG_KEY_REMOVE);
+               BUG_ON(ret < 0);
+               ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
+                                             MOD_LOG_KEY_ADD);
+               BUG_ON(ret < 0);
+       }
+}
+
+static inline void
+tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+                    int dst_offset, int src_offset, int nr_items)
+{
+       int ret;
+       ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
+                                      nr_items, GFP_NOFS);
+       BUG_ON(ret < 0);
+}
+
+static inline void
+tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
+                         struct extent_buffer *eb,
+                         struct btrfs_disk_key *disk_key, int slot, int atomic)
+{
+       int ret;
+
+       ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
+                                          MOD_LOG_KEY_REPLACE,
+                                          atomic ? GFP_ATOMIC : GFP_NOFS);
+       BUG_ON(ret < 0);
+}
+
+static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+                                struct extent_buffer *eb)
+{
+       int i;
+       int ret;
+       u32 nritems;
+
+       smp_mb();
+       if (list_empty(&fs_info->tree_mod_seq_list))
+               return;
+
+       if (btrfs_header_level(eb) == 0)
+               return;
+
+       nritems = btrfs_header_nritems(eb);
+       for (i = nritems - 1; i >= 0; i--) {
+               ret = tree_mod_log_insert_key(fs_info, eb, i,
+                                             MOD_LOG_KEY_REMOVE_WHILE_FREEING);
+               BUG_ON(ret < 0);
+       }
+}
+
+static inline void
+tree_mod_log_set_root_pointer(struct btrfs_root *root,
+                             struct extent_buffer *new_root_node)
+{
+       int ret;
+       tree_mod_log_free_eb(root->fs_info, root->node);
+       ret = tree_mod_log_insert_root(root->fs_info, root->node,
+                                      new_root_node, GFP_NOFS);
+       BUG_ON(ret < 0);
+}
+
 /*
  * check if the tree block can be shared by multiple trees
  */
@@ -452,7 +874,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
        cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
                                     root->root_key.objectid, &disk_key,
-                                    level, search_start, empty_size, 1);
+                                    level, search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -494,7 +916,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                rcu_assign_pointer(root->node, cow);
 
                btrfs_free_tree_block(trans, root, buf, parent_start,
-                                     last_ref, 1);
+                                     last_ref);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
@@ -510,11 +932,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
                btrfs_free_tree_block(trans, root, buf, parent_start,
-                                     last_ref, 1);
+                                     last_ref);
        }
        if (unlock_orig)
                btrfs_tree_unlock(buf);
-       free_extent_buffer(buf);
+       free_extent_buffer_stale(buf);
        btrfs_mark_buffer_dirty(cow);
        *cow_ret = cow;
        return 0;
@@ -710,7 +1132,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
                cur = btrfs_find_tree_block(root, blocknr, blocksize);
                if (cur)
-                       uptodate = btrfs_buffer_uptodate(cur, gen);
+                       uptodate = btrfs_buffer_uptodate(cur, gen, 0);
                else
                        uptodate = 0;
                if (!cur || !uptodate) {
@@ -972,9 +1394,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                free_extent_buffer(mid);
 
                root_sub_used(root, mid->len);
-               btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
                /* once for the root ptr */
-               free_extent_buffer(mid);
+               free_extent_buffer_stale(mid);
                return 0;
        }
        if (btrfs_header_nritems(mid) >
@@ -1027,8 +1449,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        btrfs_tree_unlock(right);
                        del_ptr(trans, root, path, level + 1, pslot + 1);
                        root_sub_used(root, right->len);
-                       btrfs_free_tree_block(trans, root, right, 0, 1, 0);
-                       free_extent_buffer(right);
+                       btrfs_free_tree_block(trans, root, right, 0, 1);
+                       free_extent_buffer_stale(right);
                        right = NULL;
                } else {
                        struct btrfs_disk_key right_key;
@@ -1069,8 +1491,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(mid);
                del_ptr(trans, root, path, level + 1, pslot);
                root_sub_used(root, mid->len);
-               btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
-               free_extent_buffer(mid);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
+               free_extent_buffer_stale(mid);
                mid = NULL;
        } else {
                /* update the parent key to reflect our changes */
@@ -1345,7 +1767,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
                block1 = btrfs_node_blockptr(parent, slot - 1);
                gen = btrfs_node_ptr_generation(parent, slot - 1);
                eb = btrfs_find_tree_block(root, block1, blocksize);
-               if (eb && btrfs_buffer_uptodate(eb, gen))
+               /*
+                * if we get -eagain from btrfs_buffer_uptodate, we
+                * don't want to return eagain here.  That will loop
+                * forever
+                */
+               if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
                        block1 = 0;
                free_extent_buffer(eb);
        }
@@ -1353,7 +1780,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
                block2 = btrfs_node_blockptr(parent, slot + 1);
                gen = btrfs_node_ptr_generation(parent, slot + 1);
                eb = btrfs_find_tree_block(root, block2, blocksize);
-               if (eb && btrfs_buffer_uptodate(eb, gen))
+               if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
                        block2 = 0;
                free_extent_buffer(eb);
        }
@@ -1396,7 +1823,8 @@ static noinline int reada_for_balance(struct btrfs_root *root,
  * if lowest_unlock is 1, level 0 won't be unlocked
  */
 static noinline void unlock_up(struct btrfs_path *path, int level,
-                              int lowest_unlock)
+                              int lowest_unlock, int min_write_lock_level,
+                              int *write_lock_level)
 {
        int i;
        int skip_level = level;
@@ -1428,6 +1856,11 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
                if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
                        btrfs_tree_unlock_rw(t, path->locks[i]);
                        path->locks[i] = 0;
+                       if (write_lock_level &&
+                           i > min_write_lock_level &&
+                           i <= *write_lock_level) {
+                               *write_lock_level = i - 1;
+                       }
                }
        }
 }
@@ -1485,8 +1918,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
        if (tmp) {
-               if (btrfs_buffer_uptodate(tmp, 0)) {
-                       if (btrfs_buffer_uptodate(tmp, gen)) {
+               /* first we do an atomic uptodate check */
+               if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
+                       if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
                                /*
                                 * we found an up to date block without
                                 * sleeping, return
@@ -1504,8 +1938,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
                        free_extent_buffer(tmp);
                        btrfs_set_path_blocking(p);
 
+                       /* now we're allowed to do a blocking uptodate check */
                        tmp = read_tree_block(root, blocknr, blocksize, gen);
-                       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                       if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
                                *eb_ret = tmp;
                                return 0;
                        }
@@ -1540,7 +1975,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
                 * and give up so that our caller doesn't loop forever
                 * on our EAGAINs.
                 */
-               if (!btrfs_buffer_uptodate(tmp, 0))
+               if (!btrfs_buffer_uptodate(tmp, 0, 0))
                        ret = -EIO;
                free_extent_buffer(tmp);
        }
@@ -1651,6 +2086,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        /* everything at write_lock_level or lower must be write locked */
        int write_lock_level = 0;
        u8 lowest_level = 0;
+       int min_write_lock_level;
 
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
@@ -1678,6 +2114,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        if (cow && (p->keep_locks || p->lowest_level))
                write_lock_level = BTRFS_MAX_LEVEL;
 
+       min_write_lock_level = write_lock_level;
+
 again:
        /*
         * we try very hard to do read locks on the root
@@ -1809,7 +2247,8 @@ cow_done:
                                goto again;
                        }
 
-                       unlock_up(p, level, lowest_unlock);
+                       unlock_up(p, level, lowest_unlock,
+                                 min_write_lock_level, &write_lock_level);
 
                        if (level == lowest_level) {
                                if (dec)
@@ -1871,7 +2310,8 @@ cow_done:
                                }
                        }
                        if (!p->search_for_split)
-                               unlock_up(p, level, lowest_unlock);
+                               unlock_up(p, level, lowest_unlock,
+                                         min_write_lock_level, &write_lock_level);
                        goto done;
                }
        }
@@ -2096,7 +2536,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
        c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
                                   root->root_key.objectid, &lower_key,
-                                  level, root->node->start, 0, 0);
+                                  level, root->node->start, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
 
@@ -2219,7 +2659,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 
        split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
                                        root->root_key.objectid,
-                                       &disk_key, level, c->start, 0, 0);
+                                       &disk_key, level, c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
@@ -2238,7 +2678,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
 
-
        copy_extent_buffer(split, c,
                           btrfs_node_key_ptr_offset(0),
                           btrfs_node_key_ptr_offset(mid),
@@ -2320,6 +2759,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *upper = path->nodes[1];
+       struct btrfs_map_token token;
        struct btrfs_disk_key disk_key;
        int slot;
        u32 i;
@@ -2331,6 +2771,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        u32 data_end;
        u32 this_item_size;
 
+       btrfs_init_map_token(&token);
+
        if (empty)
                nr = 0;
        else
@@ -2408,8 +2850,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        push_space = BTRFS_LEAF_DATA_SIZE(root);
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
-               push_space -= btrfs_item_size(right, item);
-               btrfs_set_item_offset(right, item, push_space);
+               push_space -= btrfs_token_item_size(right, item, &token);
+               btrfs_set_token_item_offset(right, item, push_space, &token);
        }
 
        left_nritems -= push_items;
@@ -2539,6 +2981,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        int ret = 0;
        u32 this_item_size;
        u32 old_left_item_size;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        if (empty)
                nr = min(right_nritems, max_slot);
@@ -2599,9 +3044,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 
                item = btrfs_item_nr(left, i);
 
-               ioff = btrfs_item_offset(left, item);
-               btrfs_set_item_offset(left, item,
-                     ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+               ioff = btrfs_token_item_offset(left, item, &token);
+               btrfs_set_token_item_offset(left, item,
+                     ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
+                     &token);
        }
        btrfs_set_header_nritems(left, old_left_nritems + push_items);
 
@@ -2631,8 +3077,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
 
-               push_space = push_space - btrfs_item_size(right, item);
-               btrfs_set_item_offset(right, item, push_space);
+               push_space = push_space - btrfs_token_item_size(right,
+                                                               item, &token);
+               btrfs_set_token_item_offset(right, item, push_space, &token);
        }
 
        btrfs_mark_buffer_dirty(left);
@@ -2748,6 +3195,9 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
        int rt_data_off;
        int i;
        struct btrfs_disk_key disk_key;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        nritems = nritems - mid;
        btrfs_set_header_nritems(right, nritems);
@@ -2769,8 +3219,9 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
                struct btrfs_item *item = btrfs_item_nr(right, i);
                u32 ioff;
 
-               ioff = btrfs_item_offset(right, item);
-               btrfs_set_item_offset(right, item, ioff + rt_data_off);
+               ioff = btrfs_token_item_offset(right, item, &token);
+               btrfs_set_token_item_offset(right, item,
+                                           ioff + rt_data_off, &token);
        }
 
        btrfs_set_header_nritems(l, mid);
@@ -2959,7 +3410,7 @@ again:
 
        right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
                                        root->root_key.objectid,
-                                       &disk_key, 0, l->start, 0, 0);
+                                       &disk_key, 0, l->start, 0);
        if (IS_ERR(right))
                return PTR_ERR(right);
 
@@ -3246,6 +3697,9 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
        unsigned int old_size;
        unsigned int size_diff;
        int i;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        leaf = path->nodes[0];
        slot = path->slots[0];
@@ -3272,8 +3726,9 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               ioff = btrfs_item_offset(leaf, item);
-               btrfs_set_item_offset(leaf, item, ioff + size_diff);
+               ioff = btrfs_token_item_offset(leaf, item, &token);
+               btrfs_set_token_item_offset(leaf, item,
+                                           ioff + size_diff, &token);
        }
 
        /* shift the data */
@@ -3342,6 +3797,9 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
        unsigned int old_data;
        unsigned int old_size;
        int i;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        leaf = path->nodes[0];
 
@@ -3371,8 +3829,9 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               ioff = btrfs_item_offset(leaf, item);
-               btrfs_set_item_offset(leaf, item, ioff - data_size);
+               ioff = btrfs_token_item_offset(leaf, item, &token);
+               btrfs_set_token_item_offset(leaf, item,
+                                           ioff - data_size, &token);
        }
 
        /* shift the data */
@@ -3414,6 +3873,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
        unsigned int data_end;
        struct btrfs_disk_key disk_key;
        struct btrfs_key found_key;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        for (i = 0; i < nr; i++) {
                if (total_size + data_size[i] + sizeof(struct btrfs_item) >
@@ -3479,8 +3941,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       ioff = btrfs_item_offset(leaf, item);
-                       btrfs_set_item_offset(leaf, item, ioff - total_data);
+                       ioff = btrfs_token_item_offset(leaf, item, &token);
+                       btrfs_set_token_item_offset(leaf, item,
+                                                   ioff - total_data, &token);
                }
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3507,9 +3970,10 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
                btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
                btrfs_set_item_key(leaf, &disk_key, slot + i);
                item = btrfs_item_nr(leaf, slot + i);
-               btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+               btrfs_set_token_item_offset(leaf, item,
+                                           data_end - data_size[i], &token);
                data_end -= data_size[i];
-               btrfs_set_item_size(leaf, item, data_size[i]);
+               btrfs_set_token_item_size(leaf, item, data_size[i], &token);
        }
        btrfs_set_header_nritems(leaf, nritems + nr);
        btrfs_mark_buffer_dirty(leaf);
@@ -3547,6 +4011,9 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
        struct btrfs_disk_key disk_key;
        struct extent_buffer *leaf;
        int slot;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        leaf = path->nodes[0];
        slot = path->slots[0];
@@ -3578,8 +4045,9 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       ioff = btrfs_item_offset(leaf, item);
-                       btrfs_set_item_offset(leaf, item, ioff - total_data);
+                       ioff = btrfs_token_item_offset(leaf, item, &token);
+                       btrfs_set_token_item_offset(leaf, item,
+                                                   ioff - total_data, &token);
                }
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3598,9 +4066,10 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
                btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
                btrfs_set_item_key(leaf, &disk_key, slot + i);
                item = btrfs_item_nr(leaf, slot + i);
-               btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+               btrfs_set_token_item_offset(leaf, item,
+                                           data_end - data_size[i], &token);
                data_end -= data_size[i];
-               btrfs_set_item_size(leaf, item, data_size[i]);
+               btrfs_set_token_item_size(leaf, item, data_size[i], &token);
        }
 
        btrfs_set_header_nritems(leaf, nritems + nr);
@@ -3740,7 +4209,9 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
 
        root_sub_used(root, leaf->len);
 
-       btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
+       extent_buffer_get(leaf);
+       btrfs_free_tree_block(trans, root, leaf, 0, 1);
+       free_extent_buffer_stale(leaf);
 }
 /*
  * delete the item at the leaf level in path.  If that empties
@@ -3757,6 +4228,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        int wret;
        int i;
        u32 nritems;
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
 
        leaf = path->nodes[0];
        last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
@@ -3778,8 +4252,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       ioff = btrfs_item_offset(leaf, item);
-                       btrfs_set_item_offset(leaf, item, ioff + dsize);
+                       ioff = btrfs_token_item_offset(leaf, item, &token);
+                       btrfs_set_token_item_offset(leaf, item,
+                                                   ioff + dsize, &token);
                }
 
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -3983,7 +4458,7 @@ again:
                        tmp = btrfs_find_tree_block(root, blockptr,
                                            btrfs_level_size(root, level - 1));
 
-                       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                       if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
                                free_extent_buffer(tmp);
                                break;
                        }
@@ -4013,7 +4488,7 @@ find_next_key:
                path->slots[level] = slot;
                if (level == path->lowest_level) {
                        ret = 0;
-                       unlock_up(path, level, 1);
+                       unlock_up(path, level, 1, 0, NULL);
                        goto out;
                }
                btrfs_set_path_blocking(path);
@@ -4024,7 +4499,7 @@ find_next_key:
 
                path->locks[level - 1] = BTRFS_READ_LOCK;
                path->nodes[level - 1] = cur;
-               unlock_up(path, level, 1);
+               unlock_up(path, level, 1, 0, NULL);
                btrfs_clear_path_blocking(path, NULL, 0);
        }
 out:
@@ -4106,7 +4581,8 @@ next:
                                struct extent_buffer *cur;
                                cur = btrfs_find_tree_block(root, blockptr,
                                            btrfs_level_size(root, level - 1));
-                               if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
+                               if (!cur ||
+                                   btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
                                        slot++;
                                        if (cur)
                                                free_extent_buffer(cur);
@@ -4260,7 +4736,7 @@ again:
        }
        ret = 0;
 done:
-       unlock_up(path, 0, 1);
+       unlock_up(path, 0, 1, 0, NULL);
        path->leave_spinning = old_spinning;
        if (!old_spinning)
                btrfs_set_path_blocking(path);