Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...

[uclinux-h8/linux.git] / fs / btrfs / tree-log.c
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 1ce80c1..9c45431 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
         return 0;
  }
  
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *inode,
+                               struct btrfs_path *path,
+                               struct btrfs_path *dst_path)
+{
+       int ret;
+       struct btrfs_key key;
+       const u64 ino = btrfs_ino(inode);
+       int ins_nr = 0;
+       int start_slot = 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_XATTR_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       while (true) {
+               int slot = path->slots[0];
+               struct extent_buffer *leaf = path->nodes[0];
+               int nritems = btrfs_header_nritems(leaf);
+
+               if (slot >= nritems) {
+                       if (ins_nr > 0) {
+                               u64 last_extent = 0;
+
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               /* can't be 1, extent items aren't processed */
+                               ASSERT(ret <= 0);
+                               if (ret < 0)
+                                       return ret;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+                       break;
+
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               cond_resched();
+       }
+       if (ins_nr > 0) {
+               u64 last_extent = 0;
+
+               ret = copy_items(trans, inode, dst_path, path,
+                                &last_extent, start_slot,
+                                ins_nr, 1, 0);
+               /* can't be 1, extent items aren't processed */
+               ASSERT(ret <= 0);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * If the no holes feature is enabled we need to make sure any hole between the
+ * last extent and the i_size of our inode is explicitly marked in the log. This
+ * is to make sure that doing something like:
+ *
+ *      1) create file with 128Kb of data
+ *      2) truncate file to 64Kb
+ *      3) truncate file to 256Kb
+ *      4) fsync file
+ *      5) <crash/power failure>
+ *      6) mount fs and trigger log replay
+ *
+ * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+ * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+ * file correspond to a hole. The presence of explicit holes in a log tree is
+ * what guarantees that log replay will remove/adjust file extent items in the
+ * fs/subvol tree.
+ *
+ * Here we do not need to care about holes between extents, that is already done
+ * by copy_items(). We also only need to do this in the full sync path, where we
+ * lookup for extents from the fs/subvol tree only. In the fast path case, we
+ * lookup the list of modified extent maps and if any represents a hole, we
+ * insert a corresponding extent representing a hole in the log tree.
+ */
+static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct inode *inode,
+                                  struct btrfs_path *path)
+{
+       int ret;
+       struct btrfs_key key;
+       u64 hole_start;
+       u64 hole_size;
+       struct extent_buffer *leaf;
+       struct btrfs_root *log = root->log_root;
+       const u64 ino = btrfs_ino(inode);
+       const u64 i_size = i_size_read(inode);
+
+       if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
+               return 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ASSERT(ret != 0);
+       if (ret < 0)
+               return ret;
+
+       ASSERT(path->slots[0] > 0);
+       path->slots[0]--;
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+       if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+               /* inode does not have any extents */
+               hole_start = 0;
+               hole_size = i_size;
+       } else {
+               struct btrfs_file_extent_item *extent;
+               u64 len;
+
+               /*
+                * If there's an extent beyond i_size, an explicit hole was
+                * already inserted by copy_items().
+                */
+               if (key.offset >= i_size)
+                       return 0;
+
+               extent = btrfs_item_ptr(leaf, path->slots[0],
+                                       struct btrfs_file_extent_item);
+
+               if (btrfs_file_extent_type(leaf, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(leaf,
+                                                          path->slots[0],
+                                                          extent);
+                       ASSERT(len == i_size);
+                       return 0;
+               }
+
+               len = btrfs_file_extent_num_bytes(leaf, extent);
+               /* Last extent goes beyond i_size, no need to log a hole. */
+               if (key.offset + len > i_size)
+                       return 0;
+               hole_start = key.offset + len;
+               hole_size = i_size - hole_start;
+       }
+       btrfs_release_path(path);
+
+       /* Last extent ends at i_size. */
+       if (hole_size == 0)
+               return 0;
+
+       hole_size = ALIGN(hole_size, root->sectorsize);
+       ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+                                      hole_size, 0, hole_size, 0, 0, 0);
+       return ret;
+}
+
  /* log a single inode in the tree log.
   * At least one parent directory for this inode must exist in the tree
   * or be logged already.
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
         u64 ino = btrfs_ino(inode);
         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
         u64 logged_isize = 0;
+       bool need_log_inode_item = true;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                 } else {
                         if (inode_only == LOG_INODE_ALL)
                                 fast_search = true;
-                       ret = log_inode_item(trans, log, dst_path, inode);
-                       if (ret) {
-                               err = ret;
-                               goto out_unlock;
-                       }
                         goto log_extents;
                 }
  
@@ -4290,6 +4467,28 @@ again:
                 if (min_key.type > max_key.type)
                         break;
  
+               if (min_key.type == BTRFS_INODE_ITEM_KEY)
+                       need_log_inode_item = false;
+
+               /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+               if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (ins_nr == 0)
+                               goto next_slot;
+                       ret = copy_items(trans, inode, dst_path, path,
+                                        &last_extent, ins_start_slot,
+                                        ins_nr, inode_only, logged_isize);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       }
+                       ins_nr = 0;
+                       if (ret) {
+                               btrfs_release_path(path);
+                               continue;
+                       }
+                       goto next_slot;
+               }
+
                 src = path->nodes[0];
                 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                         ins_nr++;
@@ -4357,9 +4556,26 @@ next_slot:
                 ins_nr = 0;
         }
  
+       btrfs_release_path(path);
+       btrfs_release_path(dst_path);
+       err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+       if (err)
+               goto out_unlock;
+       if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+               btrfs_release_path(path);
+               btrfs_release_path(dst_path);
+               err = btrfs_log_trailing_hole(trans, root, inode, path);
+               if (err)
+                       goto out_unlock;
+       }
  log_extents:
         btrfs_release_path(path);
         btrfs_release_path(dst_path);
+       if (need_log_inode_item) {
+               err = log_inode_item(trans, log, dst_path, inode);
+               if (err)
+                       goto out_unlock;
+       }
         if (fast_search) {
                 /*
                  * Some ordered extents started by fsync might have completed