OSDN Git Service

e2fsck: Add support to check journal checksums
authorTheodore Ts'o <tytso@mit.edu>
Fri, 23 May 2008 05:00:19 +0000 (01:00 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sat, 7 Jun 2008 03:38:38 +0000 (23:38 -0400)
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
e2fsck/jfs_user.h
e2fsck/journal.c
e2fsck/problem.c
e2fsck/problem.h
e2fsck/recovery.c
lib/ext2fs/jfs_compat.h
lib/ext2fs/kernel-jbd.h

index 9da5a16..09d348c 100644 (file)
@@ -70,6 +70,13 @@ typedef unsigned int __be32;
 extern kmem_cache_t * do_cache_create(int len);
 extern void do_cache_destroy(kmem_cache_t *cache);
        
+#define __init
+
+/*
+ * Now pull in the real linux/jfs.h definitions.
+ */
+#include <ext2fs/kernel-jbd.h>
+
 #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS))
 #ifdef E2FSCK_INCLUDE_INLINE_FUNCS
 #define _INLINE_ extern
@@ -94,15 +101,20 @@ _INLINE_ void do_cache_destroy(kmem_cache_t *cache)
 {
        free(cache);
 }
-#undef _INLINE_
-#endif
-
-#define __init
 
 /*
- * Now pull in the real linux/jfs.h definitions.
+ * helper functions to deal with 32 or 64bit block numbers.
  */
-#include <ext2fs/kernel-jbd.h>
+_INLINE_ size_t journal_tag_bytes(journal_t *journal)
+{
+       if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT))
+               return JBD_TAG_SIZE64;
+       else
+               return JBD_TAG_SIZE32;
+}
+
+#undef _INLINE_
+#endif
 
 /*
  * Kernel compatibility functions are defined in journal.c
index ec0af4b..b701d19 100644 (file)
@@ -73,8 +73,12 @@ struct buffer_head *getblk(kdev_t kdev, blk_t blocknr, int blocksize)
        if (!bh)
                return NULL;
 
+#ifdef CONFIG_JBD_DEBUG
+       if (journal_enable_debug >= 3)
+               bh_count++;
+#endif
        jfs_debug(4, "getblk for block %lu (%d bytes)(total %d)\n",
-                 (unsigned long) blocknr, blocksize, ++bh_count);
+                 (unsigned long) blocknr, blocksize, bh_count);
 
        bh->b_ctx = kdev->k_ctx;
        if (kdev->k_dev == K_DEV_FS)
@@ -798,9 +802,12 @@ no_has_journal:
 
 static errcode_t recover_ext3_journal(e2fsck_t ctx)
 {
+       struct problem_context  pctx;
        journal_t *journal;
        int retval;
 
+       clear_problem_context(&pctx);
+
        journal_init_revoke_caches();
        retval = e2fsck_get_journal(ctx, &journal);
        if (retval)
@@ -818,6 +825,14 @@ static errcode_t recover_ext3_journal(e2fsck_t ctx)
        if (retval)
                goto errout;
        
+       if (journal->j_failed_commit) {
+               pctx.ino = journal->j_failed_commit;
+               fix_problem(ctx, PR_0_JNL_TXN_CORRUPT, &pctx);
+               ctx->fs->super->s_state |= EXT2_ERROR_FS;
+               ext2fs_mark_super_dirty(ctx->fs);
+       }
+
+
        if (journal->j_superblock->s_errno) {
                ctx->fs->super->s_state |= EXT2_ERROR_FS;
                ext2fs_mark_super_dirty(ctx->fs);
index 81ad9b0..0fbcd89 100644 (file)
@@ -376,6 +376,11 @@ static struct e2fsck_problem problem_table[] = {
          N_("Last @g @b @B uninitialized.  "),
             PROMPT_FIX, PR_PREEN_OK },
 
+       /* Journal transaction found corrupt */
+       { PR_0_JNL_TXN_CORRUPT,
+         N_("Journal transaction %i was corrupt, replay was aborted.\n"),
+         PROMPT_NONE, 0 },
+
        /* Pass 1 errors */
 
        /* Pass 1: Checking inodes, blocks, and sizes */
index 5f90a18..24e7ed7 100644 (file)
@@ -211,6 +211,9 @@ struct problem_context {
 /* Last group block bitmap is uninitialized. */
 #define PR_0_BB_UNINIT_LAST                    0x000039
 
+/* Journal transaction found corrupt */
+#define PR_0_JNL_TXN_CORRUPT                   0x00003A
+
 /*
  * Pass 1 errors
  */
index 43bc5e5..2edb576 100644 (file)
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
  * Count the number of in-use tags in a journal descriptor block.
  */
 
-static int count_tags(struct buffer_head *bh, int size)
+static int count_tags(journal_t *journal, struct buffer_head *bh)
 {
        char *                  tagp;
        journal_block_tag_t *   tag;
-       int                     nr = 0;
+       int                     nr = 0, size = journal->j_blocksize;
+       int                     tag_bytes = journal_tag_bytes(journal);
 
        tagp = &bh->b_data[sizeof(journal_header_t)];
 
-       while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+       while ((tagp - bh->b_data + tag_bytes) <= size) {
                tag = (journal_block_tag_t *) tagp;
 
                nr++;
-               tagp += sizeof(journal_block_tag_t);
+               tagp += tag_bytes;
                if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
                        tagp += 16;
 
@@ -307,6 +308,46 @@ int journal_skip_recovery(journal_t *journal)
        return err;
 }
 
+static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+{
+       unsigned long long block = be32_to_cpu(tag->t_blocknr);
+       if (tag_bytes > JBD_TAG_SIZE32)
+               block |= (__u64)be32_to_cpu(tag->t_blocknr_high) << 32;
+       return block;
+}
+
+/*
+ * calc_chksums calculates the checksums for the blocks described in the
+ * descriptor block.
+ */
+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
+                       unsigned long *next_log_block, __u32 *crc32_sum)
+{
+       int i, num_blks, err;
+       unsigned long io_block;
+       struct buffer_head *obh;
+
+       num_blks = count_tags(journal, bh);
+       /* Calculate checksum of the descriptor block. */
+       *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
+
+       for (i = 0; i < num_blks; i++) {
+               io_block = (*next_log_block)++;
+               wrap(journal, *next_log_block);
+               err = jread(&obh, journal, io_block);
+               if (err) {
+                       printk(KERN_ERR "JBD: IO error %d recovering block "
+                               "%lu in log\n", err, io_block);
+                       return 1;
+               } else {
+                       *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
+                                    obh->b_size);
+               }
+               brelse(obh);
+       }
+       return 0;
+}
+
 static int do_one_pass(journal_t *journal,
                        struct recovery_info *info, enum passtype pass)
 {
@@ -318,11 +359,13 @@ static int do_one_pass(journal_t *journal,
        struct buffer_head *    bh;
        unsigned int            sequence;
        int                     blocktype;
+       int                     tag_bytes = journal_tag_bytes(journal);
+       __u32                   crc32_sum = ~0; /* Transactional Checksums */
 
        /* Precompute the maximum metadata descriptors in a descriptor block */
        int                     MAX_BLOCKS_PER_DESC;
        MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-                              / sizeof(journal_block_tag_t));
+                              / tag_bytes);
 
        /*
         * First thing is to establish what we expect to find in the log
@@ -409,11 +452,24 @@ static int do_one_pass(journal_t *journal,
                switch(blocktype) {
                case JFS_DESCRIPTOR_BLOCK:
                        /* If it is a valid descriptor block, replay it
-                        * in pass REPLAY; otherwise, just skip over the
-                        * blocks it describes. */
+                        * in pass REPLAY; if journal_checksums enabled, then
+                        * calculate checksums in PASS_SCAN, otherwise,
+                        * just skip over the blocks it describes. */
                        if (pass != PASS_REPLAY) {
-                               next_log_block +=
-                                       count_tags(bh, journal->j_blocksize);
+                               if (pass == PASS_SCAN &&
+                                   JFS_HAS_COMPAT_FEATURE(journal,
+                                           JFS_FEATURE_COMPAT_CHECKSUM) &&
+                                   !info->end_transaction) {
+                                       if (calc_chksums(journal, bh,
+                                                       &next_log_block,
+                                                       &crc32_sum)) {
+                                               brelse(bh);
+                                               break;
+                                       }
+                                       brelse(bh);
+                                       continue;
+                               }
+                               next_log_block += count_tags(journal, bh);
                                wrap(journal, next_log_block);
                                brelse(bh);
                                continue;
@@ -424,7 +480,7 @@ static int do_one_pass(journal_t *journal,
                         * getting done here! */
 
                        tagp = &bh->b_data[sizeof(journal_header_t)];
-                       while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+                       while ((tagp - bh->b_data + tag_bytes)
                               <= journal->j_blocksize) {
                                unsigned long io_block;
 
@@ -494,7 +550,7 @@ static int do_one_pass(journal_t *journal,
                                }
 
                        skip_write:
-                               tagp += sizeof(journal_block_tag_t);
+                               tagp += tag_bytes;
                                if (!(flags & JFS_FLAG_SAME_UUID))
                                        tagp += 16;
 
@@ -506,9 +562,98 @@ static int do_one_pass(journal_t *journal,
                        continue;
 
                case JFS_COMMIT_BLOCK:
-                       /* Found an expected commit block: not much to
-                        * do other than move on to the next sequence
+                       jbd_debug(3, "Commit block for #%u found\n",
+                                 next_commit_ID);
+                       /*     How to differentiate between interrupted commit
+                        *               and journal corruption ?
+                        *
+                        * {nth transaction}
+                        *        Checksum Verification Failed
+                        *                       |
+                        *               ____________________
+                        *              |                    |
+                        *      async_commit             sync_commit
+                        *              |                    |
+                        *              | GO TO NEXT    "Journal Corruption"
+                        *              | TRANSACTION
+                        *              |
+                        * {(n+1)th transanction}
+                        *              |
+                        *       _______|______________
+                        *      |                     |
+                        * Commit block found   Commit block not found
+                        *      |                     |
+                        * "Journal Corruption"       |
+                        *               _____________|_________
+                        *              |                       |
+                        *      nth trans corrupt       OR   nth trans
+                        *      and (n+1)th interrupted     interrupted
+                        *      before commit block
+                        *      could reach the disk.
+                        *      (Cannot find the difference in above
+                        *       mentioned conditions. Hence assume
+                        *       "Interrupted Commit".)
+                        */
+
+                       /* Found an expected commit block: if checksums
+                        * are present verify them in PASS_SCAN; else not
+                        * much to do other than move on to the next sequence
                         * number. */
+                       if (pass == PASS_SCAN &&
+                           JFS_HAS_COMPAT_FEATURE(journal,
+                                   JFS_FEATURE_COMPAT_CHECKSUM)) {
+                               int chksum_err, chksum_seen;
+                               struct commit_header *cbh =
+                                       (struct commit_header *)bh->b_data;
+                               unsigned found_chksum =
+                                       be32_to_cpu(cbh->h_chksum[0]);
+
+                               chksum_err = chksum_seen = 0;
+
+                               jbd_debug(3, "Checksums %x %x\n",
+                                         crc32_sum, found_chksum);
+                               if (info->end_transaction) {
+                                       journal->j_failed_commit =
+                                               info->end_transaction;
+                                       brelse(bh);
+                                       break;
+                               }
+
+                               if (crc32_sum == found_chksum &&
+                                   cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
+                                   cbh->h_chksum_size ==
+                                               JBD2_CRC32_CHKSUM_SIZE)
+                                      chksum_seen = 1;
+                               else if (!(cbh->h_chksum_type == 0 &&
+                                            cbh->h_chksum_size == 0 &&
+                                            found_chksum == 0 &&
+                                            !chksum_seen))
+                               /*
+                                * If fs is mounted using an old kernel and then
+                                * kernel with journal_chksum is used then we
+                                * get a situation where the journal flag has
+                                * checksum flag set but checksums are not
+                                * present i.e chksum = 0, in the individual
+                                * commit blocks.
+                                * Hence to avoid checksum failures, in this
+                                * situation, this extra check is added.
+                                */
+                                               chksum_err = 1;
+
+                               if (chksum_err) {
+                                       info->end_transaction = next_commit_ID;
+                                       jbd_debug(1, "Checksum_err\n");
+
+                                       if (!JFS_HAS_INCOMPAT_FEATURE(journal,
+                                          JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){
+                                               journal->j_failed_commit =
+                                                       next_commit_ID;
+                                               brelse(bh);
+                                               break;
+                                       }
+                               }
+                               crc32_sum = ~0;
+                       }
                        brelse(bh);
                        next_commit_ID++;
                        continue;
@@ -544,9 +689,10 @@ static int do_one_pass(journal_t *journal,
         * transaction marks the end of the valid log.
         */
 
-       if (pass == PASS_SCAN)
-               info->end_transaction = next_commit_ID;
-       else {
+       if (pass == PASS_SCAN) {
+               if (!info->end_transaction)
+                       info->end_transaction = next_commit_ID;
+       } else {
                /* It's really bad news if different passes end up at
                 * different places (but possible due to IO errors). */
                if (info->end_transaction != next_commit_ID) {
index 30ad1ef..7b8aafd 100644 (file)
@@ -45,6 +45,7 @@ struct journal_s
        tid_t                   j_transaction_sequence;
        __u8                    j_uuid[16];
        struct jbd_revoke_table_s *j_revoke;
+       tid_t                   j_failed_commit;
 };
 
 #define J_ASSERT(assert)                                               \
index 2a099d8..158d764 100644 (file)
@@ -108,6 +108,30 @@ typedef struct journal_header_s
        __u32           h_sequence;
 } journal_header_t;
 
+/*
+ * Checksum types.
+ */
+#define JBD2_CRC32_CHKSUM   1
+#define JBD2_MD5_CHKSUM     2
+#define JBD2_SHA1_CHKSUM    3
+
+#define JBD2_CRC32_CHKSUM_SIZE 4
+
+#define JBD2_CHECKSUM_BYTES (32 / sizeof(__u32))
+/*
+ * Commit block header for storing transactional checksums:
+ */
+struct commit_header {
+       __u32           h_magic;
+       __u32           h_blocktype;
+       __u32           h_sequence;
+       unsigned char   h_chksum_type;
+       unsigned char   h_chksum_size;
+       unsigned char   h_padding[2];
+       __u32           h_chksum[JBD2_CHECKSUM_BYTES];
+       __u64           h_commit_sec;
+       __u32           h_commit_nsec;
+};
 
 /* 
  * The block tag: used to describe a single buffer in the journal 
@@ -116,8 +140,12 @@ typedef struct journal_block_tag_s
 {
        __u32           t_blocknr;      /* The on-disk block number */
        __u32           t_flags;        /* See below */
+       __u32           t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
+#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t))
+#define JBD_TAG_SIZE32 (8)
+
 /* 
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log 
@@ -194,12 +222,19 @@ typedef struct journal_superblock_s
        ((j)->j_format_version >= 2 &&                                  \
         ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
+#define JFS_FEATURE_COMPAT_CHECKSUM    0x00000001
+
 #define JFS_FEATURE_INCOMPAT_REVOKE    0x00000001
 
+#define JFS_FEATURE_INCOMPAT_REVOKE            0x00000001
+#define JFS_FEATURE_INCOMPAT_64BIT             0x00000002
+#define JFS_FEATURE_INCOMPAT_ASYNC_COMMIT      0x00000004
+
 /* Features known to this kernel version: */
 #define JFS_KNOWN_COMPAT_FEATURES      0
 #define JFS_KNOWN_ROCOMPAT_FEATURES    0
-#define JFS_KNOWN_INCOMPAT_FEATURES    JFS_FEATURE_INCOMPAT_REVOKE
+#define JFS_KNOWN_INCOMPAT_FEATURES    (JFS_FEATURE_INCOMPAT_REVOKE|\
+                                        JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)
 
 #ifdef __KERNEL__
 
@@ -548,6 +583,9 @@ struct journal_s
        /* The revoke table: maintains the list of revoked blocks in the
            current transaction. */
        struct jbd_revoke_table_s *j_revoke;
+
+       /* Failed journal commit ID */
+       unsigned int            j_failed_commit;
 };
 
 /*