OSDN Git Service

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 27 May 2011 20:57:12 +0000 (13:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 27 May 2011 20:57:12 +0000 (13:57 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (58 commits)
  Btrfs: use the device_list_mutex during write_dev_supers
  Btrfs: setup free ino caching in a more asynchronous way
  btrfs scrub: don't coalesce pages that are logically discontiguous
  Btrfs: return -ENOMEM in clear_extent_bit
  Btrfs: add mount -o auto_defrag
  Btrfs: using rcu lock in the reader side of devices list
  Btrfs: drop unnecessary device lock
  Btrfs: fix the race between remove dev and alloc chunk
  Btrfs: fix the race between reading and updating devices
  Btrfs: fix bh leak on __btrfs_open_devices path
  Btrfs: fix unsafe usage of merge_state
  Btrfs: allocate extent state and check the result properly
  fs/btrfs: Add missing btrfs_free_path
  Btrfs: check return value of btrfs_inc_extent_ref()
  Btrfs: return error to caller if read_one_inode() fails
  Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item
  Btrfs: return error code to caller when btrfs_del_item fails
  Btrfs: return error code to caller when btrfs_previous_item fails
  btrfs: fix typo 'testeing' -> 'testing'
  btrfs: typo: 'btrfS' -> 'btrfs'
  ...

1  2 
fs/btrfs/extent_io.c
fs/btrfs/relocation.c
fs/btrfs/super.c

diff --combined fs/btrfs/extent_io.c
@@@ -10,8 -10,6 +10,8 @@@
  #include <linux/swap.h>
  #include <linux/writeback.h>
  #include <linux/pagevec.h>
 +#include <linux/prefetch.h>
 +#include <linux/cleancache.h>
  #include "extent_io.h"
  #include "extent_map.h"
  #include "compat.h"
@@@ -103,7 -101,7 +103,7 @@@ void extent_io_exit(void
  }
  
  void extent_io_tree_init(struct extent_io_tree *tree,
-                         struct address_space *mapping, gfp_t mask)
+                        struct address_space *mapping)
  {
        tree->state = RB_ROOT;
        INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@@ -441,6 -439,15 +441,15 @@@ static int clear_state_bit(struct exten
        return ret;
  }
  
+ static struct extent_state *
+ alloc_extent_state_atomic(struct extent_state *prealloc)
+ {
+       if (!prealloc)
+               prealloc = alloc_extent_state(GFP_ATOMIC);
+       return prealloc;
+ }
  /*
   * clear some bits on a range in the tree.  This may require splitting
   * or inserting elements in the tree, so the gfp mask is used to
@@@ -531,8 -538,8 +540,8 @@@ hit_next
         */
  
        if (state->start < start) {
-               if (!prealloc)
-                       prealloc = alloc_extent_state(GFP_ATOMIC);
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
                BUG_ON(err == -EEXIST);
                prealloc = NULL;
         * on the first half
         */
        if (state->start <= end && state->end > end) {
-               if (!prealloc)
-                       prealloc = alloc_extent_state(GFP_ATOMIC);
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
                if (wake)
@@@ -727,8 -734,7 +736,7 @@@ int set_extent_bit(struct extent_io_tre
  again:
        if (!prealloc && (mask & __GFP_WAIT)) {
                prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
+               BUG_ON(!prealloc);
        }
  
        spin_lock(&tree->lock);
         */
        node = tree_search(tree, start);
        if (!node) {
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = insert_state(tree, prealloc, start, end, &bits);
                prealloc = NULL;
                BUG_ON(err == -EEXIST);
@@@ -773,20 -781,18 +783,18 @@@ hit_next
                if (err)
                        goto out;
  
+               next_node = rb_next(node);
                cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
  
                start = last_end + 1;
-               if (start < end && prealloc && !need_resched()) {
-                       next_node = rb_next(node);
-                       if (next_node) {
-                               state = rb_entry(next_node, struct extent_state,
-                                                rb_node);
-                               if (state->start == start)
-                                       goto hit_next;
-                       }
+               if (next_node && start < end && prealloc && !need_resched()) {
+                       state = rb_entry(next_node, struct extent_state,
+                                        rb_node);
+                       if (state->start == start)
+                               goto hit_next;
                }
                goto search_again;
        }
                        err = -EEXIST;
                        goto out;
                }
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
                BUG_ON(err == -EEXIST);
                prealloc = NULL;
                        this_end = end;
                else
                        this_end = last_start - 1;
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
+               /*
+                * Avoid to free 'prealloc' if it can be merged with
+                * the later extent.
+                */
+               atomic_inc(&prealloc->refs);
                err = insert_state(tree, prealloc, start, this_end,
                                   &bits);
                BUG_ON(err == -EEXIST);
                if (err) {
+                       free_extent_state(prealloc);
                        prealloc = NULL;
                        goto out;
                }
                cache_state(prealloc, cached_state);
+               free_extent_state(prealloc);
                prealloc = NULL;
                start = this_end + 1;
                goto search_again;
                        err = -EEXIST;
                        goto out;
                }
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
  
@@@ -943,13 -966,6 +968,6 @@@ int set_extent_new(struct extent_io_tre
                              NULL, mask);
  }
  
- static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
- {
-       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
-                               NULL, mask);
- }
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        struct extent_state **cached_state, gfp_t mask)
  {
@@@ -965,11 -981,6 +983,6 @@@ static int clear_extent_uptodate(struc
                                cached_state, mask);
  }
  
- int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
- {
-       return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
- }
  /*
   * either insert or lock state struct between start and end use mask to tell
   * us if waiting is desired.
@@@ -1030,25 -1041,6 +1043,6 @@@ int unlock_extent(struct extent_io_tre
  }
  
  /*
-  * helper function to set pages and extents in the tree dirty
-  */
- int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
- {
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-       while (index <= end_index) {
-               page = find_get_page(tree->mapping, index);
-               BUG_ON(!page);
-               __set_page_dirty_nobuffers(page);
-               page_cache_release(page);
-               index++;
-       }
-       return 0;
- }
- /*
   * helper function to set both pages and extents in the tree writeback
   */
  static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@@ -1821,46 -1813,6 +1815,6 @@@ static void end_bio_extent_readpage(str
        bio_put(bio);
  }
  
- /*
-  * IO done from prepare_write is pretty simple, we just unlock
-  * the structs in the extent tree when done, and set the uptodate bits
-  * as appropriate.
-  */
- static void end_bio_extent_preparewrite(struct bio *bio, int err)
- {
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct extent_io_tree *tree;
-       u64 start;
-       u64 end;
-       do {
-               struct page *page = bvec->bv_page;
-               struct extent_state *cached = NULL;
-               tree = &BTRFS_I(page->mapping->host)->io_tree;
-               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
-                       bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-               if (uptodate) {
-                       set_extent_uptodate(tree, start, end, &cached,
-                                           GFP_ATOMIC);
-               } else {
-                       ClearPageUptodate(page);
-                       SetPageError(page);
-               }
-               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-       } while (bvec >= bio->bi_io_vec);
-       bio_put(bio);
- }
  struct bio *
  btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
@@@ -2009,7 -1961,7 +1963,7 @@@ static int __extent_read_full_page(stru
        struct btrfs_ordered_extent *ordered;
        int ret;
        int nr = 0;
-       size_t page_offset = 0;
+       size_t pg_offset = 0;
        size_t iosize;
        size_t disk_io_size;
        size_t blocksize = inode->i_sb->s_blocksize;
  
        set_page_extent_mapped(page);
  
 +      if (!PageUptodate(page)) {
 +              if (cleancache_get_page(page) == 0) {
 +                      BUG_ON(blocksize != PAGE_SIZE);
 +                      goto out;
 +              }
 +      }
 +
        end = page_end;
        while (1) {
                lock_extent(tree, start, end, GFP_NOFS);
                        char *userpage;
                        struct extent_state *cached = NULL;
  
-                       iosize = PAGE_CACHE_SIZE - page_offset;
+                       iosize = PAGE_CACHE_SIZE - pg_offset;
                        userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
+                       memset(userpage + pg_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                             &cached, GFP_NOFS);
                        break;
                }
-               em = get_extent(inode, page, page_offset, cur,
+               em = get_extent(inode, page, pg_offset, cur,
                                end - cur + 1, 0);
-               if (IS_ERR(em) || !em) {
+               if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
                        unlock_extent(tree, cur, end, GFP_NOFS);
                        break;
                        struct extent_state *cached = NULL;
  
                        userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
+                       memset(userpage + pg_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
  
                        unlock_extent_cached(tree, cur, cur + iosize - 1,
                                             &cached, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                /* the get_extent function already copied into the page */
                        check_page_uptodate(tree, page);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                /* we have an inline extent but it didn't get marked up
                        SetPageError(page);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
  
                        unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
                        pnr -= page->index;
                        ret = submit_extent_page(READ, tree, page,
-                                        sector, disk_io_size, page_offset,
+                                        sector, disk_io_size, pg_offset,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
                if (ret)
                        SetPageError(page);
                cur = cur + iosize;
-               page_offset += iosize;
+               pg_offset += iosize;
        }
 +out:
        if (!nr) {
                if (!PageError(page))
                        SetPageUptodate(page);
@@@ -2351,7 -2295,7 +2305,7 @@@ static int __extent_writepage(struct pa
                }
                em = epd->get_extent(inode, page, pg_offset, cur,
                                     end - cur + 1, 1);
-               if (IS_ERR(em) || !em) {
+               if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
                        break;
                }
@@@ -2730,128 -2674,6 +2684,6 @@@ int extent_invalidatepage(struct extent
  }
  
  /*
-  * simple commit_write call, set_range_dirty is used to mark both
-  * the pages and the extent records as dirty
-  */
- int extent_commit_write(struct extent_io_tree *tree,
-                       struct inode *inode, struct page *page,
-                       unsigned from, unsigned to)
- {
-       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-       set_page_extent_mapped(page);
-       set_page_dirty(page);
-       if (pos > inode->i_size) {
-               i_size_write(inode, pos);
-               mark_inode_dirty(inode);
-       }
-       return 0;
- }
- int extent_prepare_write(struct extent_io_tree *tree,
-                        struct inode *inode, struct page *page,
-                        unsigned from, unsigned to, get_extent_t *get_extent)
- {
-       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
-       u64 block_start;
-       u64 orig_block_start;
-       u64 block_end;
-       u64 cur_end;
-       struct extent_map *em;
-       unsigned blocksize = 1 << inode->i_blkbits;
-       size_t page_offset = 0;
-       size_t block_off_start;
-       size_t block_off_end;
-       int err = 0;
-       int iocount = 0;
-       int ret = 0;
-       int isnew;
-       set_page_extent_mapped(page);
-       block_start = (page_start + from) & ~((u64)blocksize - 1);
-       block_end = (page_start + to - 1) | (blocksize - 1);
-       orig_block_start = block_start;
-       lock_extent(tree, page_start, page_end, GFP_NOFS);
-       while (block_start <= block_end) {
-               em = get_extent(inode, page, page_offset, block_start,
-                               block_end - block_start + 1, 1);
-               if (IS_ERR(em) || !em)
-                       goto err;
-               cur_end = min(block_end, extent_map_end(em) - 1);
-               block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
-               block_off_end = block_off_start + blocksize;
-               isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
-               if (!PageUptodate(page) && isnew &&
-                   (block_off_end > to || block_off_start < from)) {
-                       void *kaddr;
-                       kaddr = kmap_atomic(page, KM_USER0);
-                       if (block_off_end > to)
-                               memset(kaddr + to, 0, block_off_end - to);
-                       if (block_off_start < from)
-                               memset(kaddr + block_off_start, 0,
-                                      from - block_off_start);
-                       flush_dcache_page(page);
-                       kunmap_atomic(kaddr, KM_USER0);
-               }
-               if ((em->block_start != EXTENT_MAP_HOLE &&
-                    em->block_start != EXTENT_MAP_INLINE) &&
-                   !isnew && !PageUptodate(page) &&
-                   (block_off_end > to || block_off_start < from) &&
-                   !test_range_bit(tree, block_start, cur_end,
-                                   EXTENT_UPTODATE, 1, NULL)) {
-                       u64 sector;
-                       u64 extent_offset = block_start - em->start;
-                       size_t iosize;
-                       sector = (em->block_start + extent_offset) >> 9;
-                       iosize = (cur_end - block_start + blocksize) &
-                               ~((u64)blocksize - 1);
-                       /*
-                        * we've already got the extent locked, but we
-                        * need to split the state such that our end_bio
-                        * handler can clear the lock.
-                        */
-                       set_extent_bit(tree, block_start,
-                                      block_start + iosize - 1,
-                                      EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
-                       ret = submit_extent_page(READ, tree, page,
-                                        sector, iosize, page_offset, em->bdev,
-                                        NULL, 1,
-                                        end_bio_extent_preparewrite, 0,
-                                        0, 0);
-                       if (ret && !err)
-                               err = ret;
-                       iocount++;
-                       block_start = block_start + iosize;
-               } else {
-                       struct extent_state *cached = NULL;
-                       set_extent_uptodate(tree, block_start, cur_end, &cached,
-                                           GFP_NOFS);
-                       unlock_extent_cached(tree, block_start, cur_end,
-                                            &cached, GFP_NOFS);
-                       block_start = cur_end + 1;
-               }
-               page_offset = block_start & (PAGE_CACHE_SIZE - 1);
-               free_extent_map(em);
-       }
-       if (iocount) {
-               wait_extent_bit(tree, orig_block_start,
-                               block_end, EXTENT_LOCKED);
-       }
-       check_page_uptodate(tree, page);
- err:
-       /* FIXME, zero out newly allocated blocks on error */
-       return err;
- }
- /*
   * a helper for releasepage, this tests for areas of the page that
   * are locked or under IO and drops the related state bits if it is safe
   * to drop the page.
@@@ -2909,7 -2731,7 +2741,7 @@@ int try_release_extent_mapping(struct e
                        len = end - start + 1;
                        write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
-                       if (!em || IS_ERR(em)) {
+                       if (IS_ERR_OR_NULL(em)) {
                                write_unlock(&map->lock);
                                break;
                        }
        return try_release_extent_state(map, tree, page, mask);
  }
  
- sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
-               get_extent_t *get_extent)
- {
-       struct inode *inode = mapping->host;
-       struct extent_state *cached_state = NULL;
-       u64 start = iblock << inode->i_blkbits;
-       sector_t sector = 0;
-       size_t blksize = (1 << inode->i_blkbits);
-       struct extent_map *em;
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
-                        0, &cached_state, GFP_NOFS);
-       em = get_extent(inode, NULL, 0, start, blksize, 0);
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
-                            start + blksize - 1, &cached_state, GFP_NOFS);
-       if (!em || IS_ERR(em))
-               return 0;
-       if (em->block_start > EXTENT_MAP_LAST_BYTE)
-               goto out;
-       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
- out:
-       free_extent_map(em);
-       return sector;
- }
  /*
   * helper function for fiemap, which doesn't want to see any holes.
   * This maps until we find something past 'last'
@@@ -2986,7 -2781,7 +2791,7 @@@ static struct extent_map *get_extent_sk
                        break;
                len = (len + sectorsize - 1) & ~(sectorsize - 1);
                em = get_extent(inode, NULL, 0, offset, len, 0);
-               if (!em || IS_ERR(em))
+               if (IS_ERR_OR_NULL(em))
                        return em;
  
                /* if this isn't a hole return it */
@@@ -3040,7 -2835,7 +2845,7 @@@ int extent_fiemap(struct inode *inode, 
         * because there might be preallocation past i_size
         */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
-                                      path, inode->i_ino, -1, 0);
+                                      path, btrfs_ino(inode), -1, 0);
        if (ret < 0) {
                btrfs_free_path(path);
                return ret;
        found_type = btrfs_key_type(&found_key);
  
        /* No extents, but there might be delalloc bits */
-       if (found_key.objectid != inode->i_ino ||
+       if (found_key.objectid != btrfs_ino(inode) ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
                /* have to trust i_size as the end */
                last = (u64)-1;
@@@ -3276,8 -3071,7 +3081,7 @@@ static inline void btrfs_release_extent
  
  struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                                          u64 start, unsigned long len,
-                                         struct page *page0,
-                                         gfp_t mask)
+                                         struct page *page0)
  {
        unsigned long num_pages = num_extent_pages(start, len);
        unsigned long i;
        }
        rcu_read_unlock();
  
-       eb = __alloc_extent_buffer(tree, start, len, mask);
+       eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
        if (!eb)
                return NULL;
  
                i = 0;
        }
        for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+               p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
                if (!p) {
                        WARN_ON(1);
                        goto free_eb;
@@@ -3387,8 -3181,7 +3191,7 @@@ free_eb
  }
  
  struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
-                                        u64 start, unsigned long len,
-                                         gfp_t mask)
+                                        u64 start, unsigned long len)
  {
        struct extent_buffer *eb;
  
@@@ -3449,13 -3242,6 +3252,6 @@@ int clear_extent_buffer_dirty(struct ex
        return 0;
  }
  
- int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
-                                   struct extent_buffer *eb)
- {
-       return wait_on_extent_writeback(tree, eb->start,
-                                       eb->start + eb->len - 1);
- }
  int set_extent_buffer_dirty(struct extent_io_tree *tree,
                             struct extent_buffer *eb)
  {
diff --combined fs/btrfs/relocation.c
@@@ -30,6 -30,7 +30,7 @@@
  #include "btrfs_inode.h"
  #include "async-thread.h"
  #include "free-space-cache.h"
+ #include "inode-map.h"
  
  /*
   * backref_node, mapping_node and tree_block start with this
@@@ -507,6 -508,7 +508,7 @@@ static int update_backref_cache(struct 
        return 1;
  }
  
  static int should_ignore_root(struct btrfs_root *root)
  {
        struct btrfs_root *reloc_root;
         */
        return 1;
  }
  /*
   * find reloc tree by address of tree root
   */
@@@ -709,7 -710,7 +710,7 @@@ again
        WARN_ON(cur->checked);
        if (!list_empty(&cur->upper)) {
                /*
 -               * the backref was added previously when processsing
 +               * the backref was added previously when processing
                 * backref of type BTRFS_TREE_BLOCK_REF_KEY
                 */
                BUG_ON(!list_is_singular(&cur->upper));
                        lower = upper;
                        upper = NULL;
                }
-               btrfs_release_path(root, path2);
+               btrfs_release_path(path2);
  next:
                if (ptr < end) {
                        ptr += btrfs_extent_inline_ref_size(key.type);
                if (ptr >= end)
                        path1->slots[0]++;
        }
-       btrfs_release_path(rc->extent_root, path1);
+       btrfs_release_path(path1);
  
        cur->checked = 1;
        WARN_ON(exist);
@@@ -1409,9 -1410,9 +1410,9 @@@ again
                prev = node;
                entry = rb_entry(node, struct btrfs_inode, rb_node);
  
-               if (objectid < entry->vfs_inode.i_ino)
+               if (objectid < btrfs_ino(&entry->vfs_inode))
                        node = node->rb_left;
-               else if (objectid > entry->vfs_inode.i_ino)
+               else if (objectid > btrfs_ino(&entry->vfs_inode))
                        node = node->rb_right;
                else
                        break;
        if (!node) {
                while (prev) {
                        entry = rb_entry(prev, struct btrfs_inode, rb_node);
-                       if (objectid <= entry->vfs_inode.i_ino) {
+                       if (objectid <= btrfs_ino(&entry->vfs_inode)) {
                                node = prev;
                                break;
                        }
                        return inode;
                }
  
-               objectid = entry->vfs_inode.i_ino + 1;
+               objectid = btrfs_ino(&entry->vfs_inode) + 1;
                if (cond_resched_lock(&root->inode_lock))
                        goto again;
  
@@@ -1470,7 -1471,7 +1471,7 @@@ static int get_new_location(struct inod
                return -ENOMEM;
  
        bytenr -= BTRFS_I(reloc_inode)->index_cnt;
-       ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
+       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
                                       bytenr, 0);
        if (ret < 0)
                goto out;
@@@ -1558,11 -1559,11 +1559,11 @@@ int replace_file_extents(struct btrfs_t
                        if (first) {
                                inode = find_next_inode(root, key.objectid);
                                first = 0;
-                       } else if (inode && inode->i_ino < key.objectid) {
+                       } else if (inode && btrfs_ino(inode) < key.objectid) {
                                btrfs_add_delayed_iput(inode);
                                inode = find_next_inode(root, key.objectid);
                        }
-                       if (inode && inode->i_ino == key.objectid) {
+                       if (inode && btrfs_ino(inode) == key.objectid) {
                                end = key.offset +
                                      btrfs_file_extent_num_bytes(leaf, fi);
                                WARN_ON(!IS_ALIGNED(key.offset,
@@@ -1749,7 -1750,7 +1750,7 @@@ again
  
                btrfs_node_key_to_cpu(path->nodes[level], &key,
                                      path->slots[level]);
-               btrfs_release_path(src, path);
+               btrfs_release_path(path);
  
                path->lowest_level = level;
                ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
@@@ -1893,6 -1894,7 +1894,7 @@@ static int invalidate_extent_cache(stru
        struct inode *inode = NULL;
        u64 objectid;
        u64 start, end;
+       u64 ino;
  
        objectid = min_key->objectid;
        while (1) {
                inode = find_next_inode(root, objectid);
                if (!inode)
                        break;
+               ino = btrfs_ino(inode);
  
-               if (inode->i_ino > max_key->objectid) {
+               if (ino > max_key->objectid) {
                        iput(inode);
                        break;
                }
  
-               objectid = inode->i_ino + 1;
+               objectid = ino + 1;
                if (!S_ISREG(inode->i_mode))
                        continue;
  
-               if (unlikely(min_key->objectid == inode->i_ino)) {
+               if (unlikely(min_key->objectid == ino)) {
                        if (min_key->type > BTRFS_EXTENT_DATA_KEY)
                                continue;
                        if (min_key->type < BTRFS_EXTENT_DATA_KEY)
                        start = 0;
                }
  
-               if (unlikely(max_key->objectid == inode->i_ino)) {
+               if (unlikely(max_key->objectid == ino)) {
                        if (max_key->type < BTRFS_EXTENT_DATA_KEY)
                                continue;
                        if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
@@@ -2496,7 -2499,7 +2499,7 @@@ static int do_relocation(struct btrfs_t
                        path->locks[upper->level] = 0;
  
                        slot = path->slots[upper->level];
-                       btrfs_release_path(NULL, path);
+                       btrfs_release_path(path);
                } else {
                        ret = btrfs_bin_search(upper->eb, key, upper->level,
                                               &slot);
@@@ -2737,7 -2740,7 +2740,7 @@@ static int relocate_tree_block(struct b
                } else {
                        path->lowest_level = node->level;
                        ret = btrfs_search_slot(trans, root, key, path, 0, 1);
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
                        if (ret > 0)
                                ret = 0;
                }
@@@ -2870,7 -2873,7 +2873,7 @@@ int setup_extent_mapping(struct inode *
        struct extent_map *em;
        int ret = 0;
  
-       em = alloc_extent_map(GFP_NOFS);
+       em = alloc_extent_map();
        if (!em)
                return -ENOMEM;
  
@@@ -3119,7 -3122,7 +3122,7 @@@ static int add_tree_block(struct reloc_
  #endif
        }
  
-       btrfs_release_path(rc->extent_root, path);
+       btrfs_release_path(path);
  
        BUG_ON(level == -1);
  
@@@ -3220,7 -3223,7 +3223,7 @@@ static int delete_block_group_cache(str
        key.offset = 0;
  
        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
-       if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
                if (inode && !IS_ERR(inode))
                        iput(inode);
                return -ENOENT;
@@@ -3505,7 -3508,7 +3508,7 @@@ int add_data_references(struct reloc_co
                }
                path->slots[0]++;
        }
-       btrfs_release_path(rc->extent_root, path);
+       btrfs_release_path(path);
        if (err)
                free_block_list(blocks);
        return err;
@@@ -3568,7 -3571,7 +3571,7 @@@ next
                                            EXTENT_DIRTY);
  
                if (ret == 0 && start <= key.objectid) {
-                       btrfs_release_path(rc->extent_root, path);
+                       btrfs_release_path(path);
                        rc->search_start = end + 1;
                } else {
                        rc->search_start = key.objectid + key.offset;
                        return 0;
                }
        }
-       btrfs_release_path(rc->extent_root, path);
+       btrfs_release_path(path);
        return ret;
  }
  
@@@ -3713,7 -3716,7 +3716,7 @@@ restart
                                flags = BTRFS_EXTENT_FLAG_DATA;
  
                        if (path_change) {
-                               btrfs_release_path(rc->extent_root, path);
+                               btrfs_release_path(path);
  
                                path->search_commit_root = 1;
                                path->skip_locking = 1;
                           (flags & BTRFS_EXTENT_FLAG_DATA)) {
                        ret = add_data_references(rc, &key, path, &blocks);
                } else {
-                       btrfs_release_path(rc->extent_root, path);
+                       btrfs_release_path(path);
                        ret = 0;
                }
                if (ret < 0) {
                }
        }
  
-       btrfs_release_path(rc->extent_root, path);
+       btrfs_release_path(path);
        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
                          GFP_NOFS);
  
@@@ -3867,7 -3870,7 +3870,7 @@@ static int __insert_orphan_inode(struc
        btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
                                          BTRFS_INODE_PREALLOC);
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
  out:
        btrfs_free_path(path);
        return ret;
@@@ -3897,7 -3900,7 +3900,7 @@@ struct inode *create_reloc_inode(struc
        if (IS_ERR(trans))
                return ERR_CAST(trans);
  
-       err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
+       err = btrfs_find_free_objectid(root, &objectid);
        if (err)
                goto out;
  
@@@ -3935,7 -3938,7 +3938,7 @@@ static struct reloc_control *alloc_relo
        INIT_LIST_HEAD(&rc->reloc_roots);
        backref_cache_init(&rc->backref_cache);
        mapping_tree_init(&rc->reloc_root_tree);
-       extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
+       extent_io_tree_init(&rc->processed_blocks, NULL);
        return rc;
  }
  
@@@ -4109,7 -4112,7 +4112,7 @@@ int btrfs_recover_relocation(struct btr
                }
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               btrfs_release_path(root->fs_info->tree_root, path);
+               btrfs_release_path(path);
  
                if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
                    key.type != BTRFS_ROOT_ITEM_KEY)
  
                key.offset--;
        }
-       btrfs_release_path(root->fs_info->tree_root, path);
+       btrfs_release_path(path);
  
        if (list_empty(&reloc_roots))
                goto out;
@@@ -4242,7 -4245,7 +4245,7 @@@ int btrfs_reloc_clone_csums(struct inod
  
        disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
        ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
-                                      disk_bytenr + len - 1, &list);
+                                      disk_bytenr + len - 1, &list, 0);
  
        while (!list_empty(&list)) {
                sums = list_entry(list.next, struct btrfs_ordered_sum, list);
diff --combined fs/btrfs/super.c
@@@ -39,8 -39,8 +39,9 @@@
  #include <linux/miscdevice.h>
  #include <linux/magic.h>
  #include <linux/slab.h>
 +#include <linux/cleancache.h>
  #include "compat.h"
+ #include "delayed-inode.h"
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
@@@ -160,7 -160,7 +161,7 @@@ enum 
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_err,
+       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err,
  };
  
  static match_table_t tokens = {
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_enospc_debug, "enospc_debug"},
        {Opt_subvolrootid, "subvolrootid=%d"},
+       {Opt_defrag, "autodefrag"},
        {Opt_err, NULL},
  };
  
@@@ -369,6 -370,10 +371,10 @@@ int btrfs_parse_options(struct btrfs_ro
                case Opt_enospc_debug:
                        btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
                        break;
+               case Opt_defrag:
+                       printk(KERN_INFO "btrfs: enabling auto defrag");
+                       btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@@ -507,8 -512,10 +513,10 @@@ static struct dentry *get_default_root(
         */
        dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
        di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
-       if (IS_ERR(di))
+       if (IS_ERR(di)) {
+               btrfs_free_path(path);
                return ERR_CAST(di);
+       }
        if (!di) {
                /*
                 * Ok the default dir item isn't there.  This is weird since
@@@ -625,7 -632,6 +633,7 @@@ static int btrfs_fill_super(struct supe
        sb->s_root = root_dentry;
  
        save_mount_options(sb, data);
 +      cleancache_init_fs(sb);
        return 0;
  
  fail_close:
@@@ -741,7 -747,7 +749,7 @@@ static int btrfs_set_super(struct super
   *      for multiple device setup.  Make sure to keep it in sync.
   */
  static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
-               const char *dev_name, void *data)
+               const char *device_name, void *data)
  {
        struct block_device *bdev = NULL;
        struct super_block *s;
        if (error)
                return ERR_PTR(error);
  
-       error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
+       error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
        if (error)
                goto error_free_subvol_name;
  
@@@ -915,6 -921,32 +923,32 @@@ static int btrfs_remount(struct super_b
        return 0;
  }
  
+ /* Used to sort the devices by max_avail(descending sort) */
+ static int btrfs_cmp_device_free_bytes(const void *dev_info1,
+                                      const void *dev_info2)
+ {
+       if (((struct btrfs_device_info *)dev_info1)->max_avail >
+           ((struct btrfs_device_info *)dev_info2)->max_avail)
+               return -1;
+       else if (((struct btrfs_device_info *)dev_info1)->max_avail <
+                ((struct btrfs_device_info *)dev_info2)->max_avail)
+               return 1;
+       else
+       return 0;
+ }
+ /*
+  * sort the devices by max_avail, in which max free extent size of each device
+  * is stored.(Descending Sort)
+  */
+ static inline void btrfs_descending_sort_devices(
+                                       struct btrfs_device_info *devices,
+                                       size_t nr_devices)
+ {
+       sort(devices, nr_devices, sizeof(struct btrfs_device_info),
+            btrfs_cmp_device_free_bytes, NULL);
+ }
  /*
   * The helper to calc the free space on the devices that can be used to store
   * file data.
@@@ -1208,10 -1240,14 +1242,14 @@@ static int __init init_btrfs_fs(void
        if (err)
                goto free_extent_io;
  
-       err = btrfs_interface_init();
+       err = btrfs_delayed_inode_init();
        if (err)
                goto free_extent_map;
  
+       err = btrfs_interface_init();
+       if (err)
+               goto free_delayed_inode;
        err = register_filesystem(&btrfs_fs_type);
        if (err)
                goto unregister_ioctl;
  
  unregister_ioctl:
        btrfs_interface_exit();
+ free_delayed_inode:
+       btrfs_delayed_inode_exit();
  free_extent_map:
        extent_map_exit();
  free_extent_io:
@@@ -1237,6 -1275,7 +1277,7 @@@ free_sysfs
  static void __exit exit_btrfs_fs(void)
  {
        btrfs_destroy_cachep();
+       btrfs_delayed_inode_exit();
        extent_map_exit();
        extent_io_exit();
        btrfs_interface_exit();