OSDN Git Service

Btrfs: fix use-after-free on root->orphan_block_rsv
[uclinux-h8/linux.git] / fs / btrfs / inode.c
index 1c704eb..29b4913 100644 (file)
@@ -536,9 +536,14 @@ again:
                 *
                 * If the compression fails for any reason, we set the pages
                 * dirty again later on.
+                *
+                * Note that the remaining part is redirtied, the start pointer
+                * has moved, the end is the original one.
                 */
-               extent_range_clear_dirty_for_io(inode, start, end);
-               redirty = 1;
+               if (!redirty) {
+                       extent_range_clear_dirty_for_io(inode, start, end);
+                       redirty = 1;
+               }
 
                /* Compression level is applied here and only here */
                ret = btrfs_compress_pages(
@@ -765,8 +770,8 @@ retry:
                         * all those pages down to the drive.
                         */
                        if (!page_started && !ret)
-                               extent_write_locked_range(io_tree,
-                                                 inode, async_extent->start,
+                               extent_write_locked_range(inode,
+                                                 async_extent->start,
                                                  async_extent->start +
                                                  async_extent->ram_size - 1,
                                                  WB_SYNC_ALL);
@@ -1329,8 +1334,11 @@ next_slot:
                leaf = path->nodes[0];
                if (path->slots[0] >= btrfs_header_nritems(leaf)) {
                        ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               if (cow_start != (u64)-1)
+                                       cur_offset = cow_start;
                                goto error;
+                       }
                        if (ret > 0)
                                break;
                        leaf = path->nodes[0];
@@ -2095,7 +2103,7 @@ again:
                                        PAGE_SIZE);
        if (ordered) {
                unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
-                                    page_end, &cached_state, GFP_NOFS);
+                                    page_end, &cached_state);
                unlock_page(page);
                btrfs_start_ordered_extent(inode, ordered, 1);
                btrfs_put_ordered_extent(ordered);
@@ -2111,14 +2119,21 @@ again:
                goto out;
         }
 
-       btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state,
-                                 0);
+       ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
+                                       &cached_state, 0);
+       if (ret) {
+               mapping_set_error(page->mapping, ret);
+               end_extent_writepage(page, ret, page_start, page_end);
+               ClearPageChecked(page);
+               goto out;
+       }
+
        ClearPageChecked(page);
        set_page_dirty(page);
        btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                            &cached_state, GFP_NOFS);
+                            &cached_state);
 out_page:
        unlock_page(page);
        put_page(page);
@@ -2710,7 +2725,7 @@ out_free_path:
        btrfs_end_transaction(trans);
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
-                            &cached, GFP_NOFS);
+                            &cached);
        iput(inode);
        return ret;
 }
@@ -3372,6 +3387,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
                ret = btrfs_orphan_reserve_metadata(trans, inode);
                ASSERT(!ret);
                if (ret) {
+                       /*
+                        * dec doesn't need spin_lock as ->orphan_block_rsv
+                        * would be released only if ->orphan_inodes is
+                        * zero.
+                        */
                        atomic_dec(&root->orphan_inodes);
                        clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                                  &inode->runtime_flags);
@@ -3386,12 +3406,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
                if (ret) {
-                       atomic_dec(&root->orphan_inodes);
                        if (reserve) {
                                clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                                          &inode->runtime_flags);
                                btrfs_orphan_release_metadata(inode);
                        }
+                       /*
+                        * btrfs_orphan_commit_root may race with us and set
+                        * ->orphan_block_rsv to zero, in order to avoid that,
+                        * decrease ->orphan_inodes after everything is done.
+                        */
+                       atomic_dec(&root->orphan_inodes);
                        if (ret != -EEXIST) {
                                clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                                          &inode->runtime_flags);
@@ -3423,28 +3448,26 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 {
        struct btrfs_root *root = inode->root;
        int delete_item = 0;
-       int release_rsv = 0;
        int ret = 0;
 
-       spin_lock(&root->orphan_lock);
        if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                               &inode->runtime_flags))
                delete_item = 1;
 
+       if (delete_item && trans)
+               ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+
        if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
                               &inode->runtime_flags))
-               release_rsv = 1;
-       spin_unlock(&root->orphan_lock);
+               btrfs_orphan_release_metadata(inode);
 
-       if (delete_item) {
+       /*
+        * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
+        * to zero, in order to avoid that, decrease ->orphan_inodes after
+        * everything is done.
+        */
+       if (delete_item)
                atomic_dec(&root->orphan_inodes);
-               if (trans)
-                       ret = btrfs_del_orphan_item(trans, root,
-                                                   btrfs_ino(inode));
-       }
-
-       if (release_rsv)
-               btrfs_orphan_release_metadata(inode);
 
        return ret;
 }
@@ -4757,8 +4780,8 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
        u64 block_start;
        u64 block_end;
 
-       if ((offset & (blocksize - 1)) == 0 &&
-           (!len || ((len & (blocksize - 1)) == 0)))
+       if (IS_ALIGNED(offset, blocksize) &&
+           (!len || IS_ALIGNED(len, blocksize)))
                goto out;
 
        block_start = round_down(from, blocksize);
@@ -4800,7 +4823,7 @@ again:
        ordered = btrfs_lookup_ordered_extent(inode, block_start);
        if (ordered) {
                unlock_extent_cached(io_tree, block_start, block_end,
-                                    &cached_state, GFP_NOFS);
+                                    &cached_state);
                unlock_page(page);
                put_page(page);
                btrfs_start_ordered_extent(inode, ordered, 1);
@@ -4817,7 +4840,7 @@ again:
                                        &cached_state, 0);
        if (ret) {
                unlock_extent_cached(io_tree, block_start, block_end,
-                                    &cached_state, GFP_NOFS);
+                                    &cached_state);
                goto out_unlock;
        }
 
@@ -4836,8 +4859,7 @@ again:
        }
        ClearPageChecked(page);
        set_page_dirty(page);
-       unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
-                            GFP_NOFS);
+       unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
 
 out_unlock:
        if (ret)
@@ -4938,7 +4960,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                if (!ordered)
                        break;
                unlock_extent_cached(io_tree, hole_start, block_end - 1,
-                                    &cached_state, GFP_NOFS);
+                                    &cached_state);
                btrfs_start_ordered_extent(inode, ordered, 1);
                btrfs_put_ordered_extent(ordered);
        }
@@ -5003,8 +5025,7 @@ next:
                        break;
        }
        free_extent_map(em);
-       unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
-                            GFP_NOFS);
+       unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
        return err;
 }
 
@@ -5268,7 +5289,7 @@ void btrfs_evict_inode(struct inode *inode)
        trace_btrfs_inode_evict(inode);
 
        if (!root) {
-               kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+               clear_inode(inode);
                return;
        }
 
@@ -6305,7 +6326,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        }
        /*
         * index_cnt is ignored for everything but a dir,
-        * btrfs_get_inode_index_count has an explanation for the magic
+        * btrfs_set_inode_index_count has an explanation for the magic
         * number
         */
        BTRFS_I(inode)->index_cnt = 2;
@@ -6807,68 +6828,6 @@ out_fail_inode:
        goto out_fail;
 }
 
-/* Find next extent map of a given extent map, caller needs to ensure locks */
-static struct extent_map *next_extent_map(struct extent_map *em)
-{
-       struct rb_node *next;
-
-       next = rb_next(&em->rb_node);
-       if (!next)
-               return NULL;
-       return container_of(next, struct extent_map, rb_node);
-}
-
-static struct extent_map *prev_extent_map(struct extent_map *em)
-{
-       struct rb_node *prev;
-
-       prev = rb_prev(&em->rb_node);
-       if (!prev)
-               return NULL;
-       return container_of(prev, struct extent_map, rb_node);
-}
-
-/* helper for btfs_get_extent.  Given an existing extent in the tree,
- * the existing extent is the nearest extent to map_start,
- * and an extent that you want to insert, deal with overlap and insert
- * the best fitted new extent into the tree.
- */
-static int merge_extent_mapping(struct extent_map_tree *em_tree,
-                               struct extent_map *existing,
-                               struct extent_map *em,
-                               u64 map_start)
-{
-       struct extent_map *prev;
-       struct extent_map *next;
-       u64 start;
-       u64 end;
-       u64 start_diff;
-
-       BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
-
-       if (existing->start > map_start) {
-               next = existing;
-               prev = prev_extent_map(next);
-       } else {
-               prev = existing;
-               next = next_extent_map(prev);
-       }
-
-       start = prev ? extent_map_end(prev) : em->start;
-       start = max_t(u64, start, em->start);
-       end = next ? next->start : extent_map_end(em);
-       end = min_t(u64, end, extent_map_end(em));
-       start_diff = start - em->start;
-       em->start = start;
-       em->len = end - start;
-       if (em->block_start < EXTENT_MAP_LAST_BYTE &&
-           !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-               em->block_start += start_diff;
-               em->block_len -= start_diff;
-       }
-       return add_extent_mapping(em_tree, em, 0);
-}
-
 static noinline int uncompress_inline(struct btrfs_path *path,
                                      struct page *page,
                                      size_t pg_offset, u64 extent_offset,
@@ -6943,7 +6902,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
        struct extent_map *em = NULL;
        struct extent_map_tree *em_tree = &inode->extent_tree;
        struct extent_io_tree *io_tree = &inode->io_tree;
-       struct btrfs_trans_handle *trans = NULL;
        const bool new_inline = !page || create;
 
        read_lock(&em_tree->lock);
@@ -6984,8 +6942,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
                path->reada = READA_FORWARD;
        }
 
-       ret = btrfs_lookup_file_extent(trans, root, path,
-                                      objectid, start, trans != NULL);
+       ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
        if (ret < 0) {
                err = ret;
                goto out;
@@ -7118,7 +7075,6 @@ not_found:
        em->len = len;
 not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
-       set_bit(EXTENT_FLAG_VACANCY, &em->flags);
 insert:
        btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
@@ -7131,62 +7087,13 @@ insert:
 
        err = 0;
        write_lock(&em_tree->lock);
-       ret = add_extent_mapping(em_tree, em, 0);
-       /* it is possible that someone inserted the extent into the tree
-        * while we had the lock dropped.  It is also possible that
-        * an overlapping map exists in the tree
-        */
-       if (ret == -EEXIST) {
-               struct extent_map *existing;
-
-               ret = 0;
-
-               existing = search_extent_mapping(em_tree, start, len);
-               /*
-                * existing will always be non-NULL, since there must be
-                * extent causing the -EEXIST.
-                */
-               if (existing->start == em->start &&
-                   extent_map_end(existing) >= extent_map_end(em) &&
-                   em->block_start == existing->block_start) {
-                       /*
-                        * The existing extent map already encompasses the
-                        * entire extent map we tried to add.
-                        */
-                       free_extent_map(em);
-                       em = existing;
-                       err = 0;
-
-               } else if (start >= extent_map_end(existing) ||
-                   start <= existing->start) {
-                       /*
-                        * The existing extent map is the one nearest to
-                        * the [start, start + len) range which overlaps
-                        */
-                       err = merge_extent_mapping(em_tree, existing,
-                                                  em, start);
-                       free_extent_map(existing);
-                       if (err) {
-                               free_extent_map(em);
-                               em = NULL;
-                       }
-               } else {
-                       free_extent_map(em);
-                       em = existing;
-                       err = 0;
-               }
-       }
+       err = btrfs_add_extent_mapping(em_tree, &em, start, len);
        write_unlock(&em_tree->lock);
 out:
 
        trace_btrfs_get_extent(root, inode, em);
 
        btrfs_free_path(path);
-       if (trans) {
-               ret = btrfs_end_transaction(trans);
-               if (!err)
-                       err = ret;
-       }
        if (err) {
                free_extent_map(em);
                return ERR_PTR(err);
@@ -7308,7 +7215,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
                        em->block_start = EXTENT_MAP_DELALLOC;
                        em->block_len = found;
                }
-       } else if (hole_em) {
+       } else {
                return hole_em;
        }
 out:
@@ -7625,7 +7532,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
                        break;
 
                unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    cached_state, GFP_NOFS);
+                                    cached_state);
 
                if (ordered) {
                        /*
@@ -7937,15 +7844,12 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
 
        BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-       bio_get(bio);
-
        ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
        if (ret)
-               goto err;
+               return ret;
 
        ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
-err:
-       bio_put(bio);
+
        return ret;
 }
 
@@ -8448,8 +8352,6 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
        if (async_submit)
                async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
-       bio_get(bio);
-
        if (!write) {
                ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
                if (ret)
@@ -8482,7 +8384,6 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
 map:
        ret = btrfs_map_bio(fs_info, bio, 0, 0);
 err:
-       bio_put(bio);
        return ret;
 }
 
@@ -8851,7 +8752,6 @@ int btrfs_readpage(struct file *file, struct page *page)
 
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-       struct extent_io_tree *tree;
        struct inode *inode = page->mapping->host;
        int ret;
 
@@ -8870,8 +8770,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
                redirty_page_for_writepage(wbc, page);
                return AOP_WRITEPAGE_ACTIVATE;
        }
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
-       ret = extent_write_full_page(tree, page, wbc);
+       ret = extent_write_full_page(page, wbc);
        btrfs_add_delayed_iput(inode);
        return ret;
 }
@@ -9120,7 +9019,7 @@ again:
                        PAGE_SIZE);
        if (ordered) {
                unlock_extent_cached(io_tree, page_start, page_end,
-                                    &cached_state, GFP_NOFS);
+                                    &cached_state);
                unlock_page(page);
                btrfs_start_ordered_extent(inode, ordered, 1);
                btrfs_put_ordered_extent(ordered);
@@ -9153,7 +9052,7 @@ again:
                                        &cached_state, 0);
        if (ret) {
                unlock_extent_cached(io_tree, page_start, page_end,
-                                    &cached_state, GFP_NOFS);
+                                    &cached_state);
                ret = VM_FAULT_SIGBUS;
                goto out_unlock;
        }
@@ -9179,7 +9078,7 @@ again:
        BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
 
-       unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
+       unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
 
 out_unlock:
        if (!ret) {