2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "ext4fixup.h"
19 #ifndef _LARGEFILE64_SOURCE
20 #define _LARGEFILE64_SOURCE 1
25 #include <sys/types.h>
28 #include <sparse/sparse.h>
31 #include "ext4_utils/ext4_extents.h"
32 #include "ext4_utils/ext4_utils.h"
33 #include "ext4_utils/make_ext4fs.h"
39 #if defined(__APPLE__) && defined(__MACH__)
44 /* The inode block count for a file/directory is in units of 512 byte blocks,
45 * _NOT_ the filesystem block size!
47 #define INODE_BLOCK_SIZE 512
49 #define MAX_EXT4_BLOCK_SIZE 4096
51 /* The two modes the recurse_dir() can be in */
52 #define SANITY_CHECK_PASS 1
53 #define MARK_INODE_NUMS 2
54 #define UPDATE_INODE_NUMS 3
56 /* Magic numbers to indicate what state the update process is in */
57 #define MAGIC_STATE_MARKING_INUMS 0x7000151515565512ll
58 #define MAGIC_STATE_UPDATING_INUMS 0x6121131211735123ll
59 #define MAGIC_STATE_UPDATING_SB 0x15e1715151558477ll
61 /* Internal state variables corresponding to the magic numbers */
63 #define STATE_MARKING_INUMS 1
64 #define STATE_UPDATING_INUMS 2
65 #define STATE_UPDATING_SB 3
67 /* Used for automated testing of this programs ability to stop and be restarted wthout error */
68 static int bail_phase = 0;
69 static int bail_loc = 0;
70 static int bail_count = 0;
74 static int verbose = 0;
75 static int no_write = 0;
77 static int new_inodes_per_group = 0;
79 static int no_write_fixup_state = 0;
81 static int compute_new_inum(unsigned int old_inum)
83 unsigned int group, offset;
85 group = (old_inum - 1) / info.inodes_per_group;
86 offset = (old_inum -1) % info.inodes_per_group;
88 return (group * new_inodes_per_group) + offset + 1;
91 static int get_fs_fixup_state(int fd)
93 unsigned long long magic;
97 return no_write_fixup_state;
100 lseek64(fd, 0, SEEK_SET);
101 len = read(fd, &magic, sizeof(magic));
102 if (len != sizeof(magic)) {
103 critical_error("cannot read fixup_state\n");
107 case MAGIC_STATE_MARKING_INUMS:
108 ret = STATE_MARKING_INUMS;
110 case MAGIC_STATE_UPDATING_INUMS:
111 ret = STATE_UPDATING_INUMS;
113 case MAGIC_STATE_UPDATING_SB:
114 ret = STATE_UPDATING_SB;
122 static int set_fs_fixup_state(int fd, int state)
124 unsigned long long magic;
125 struct ext4_super_block sb;
129 no_write_fixup_state = state;
134 case STATE_MARKING_INUMS:
135 magic = MAGIC_STATE_MARKING_INUMS;
137 case STATE_UPDATING_INUMS:
138 magic = MAGIC_STATE_UPDATING_INUMS;
140 case STATE_UPDATING_SB:
141 magic = MAGIC_STATE_UPDATING_SB;
149 lseek64(fd, 0, SEEK_SET);
150 len = write(fd, &magic, sizeof(magic));
151 if (len != sizeof(magic)) {
152 critical_error("cannot write fixup_state\n");
157 /* If we are in the process of updating the filesystem, make it unmountable */
160 /* we are done, so make the filesystem mountable again */
161 sb.s_desc_size &= ~1;
165 write_sb(fd, 1024, &sb);
171 static int read_inode(int fd, unsigned int inum, struct ext4_inode *inode)
173 unsigned int bg_num, bg_offset;
174 off64_t inode_offset;
177 bg_num = (inum-1) / info.inodes_per_group;
178 bg_offset = (inum-1) % info.inodes_per_group;
180 inode_offset = ((unsigned long long)aux_info.bg_desc[bg_num].bg_inode_table * info.block_size) +
181 (bg_offset * info.inode_size);
183 if (lseek64(fd, inode_offset, SEEK_SET) < 0) {
184 critical_error_errno("failed to seek to inode %d\n", inum);
187 len=read(fd, inode, sizeof(*inode));
188 if (len != sizeof(*inode)) {
189 critical_error_errno("failed to read inode %d\n", inum);
195 static int read_block(int fd, unsigned long long block_num, void *block)
200 off = block_num * info.block_size;
202 if (lseek64(fd, off, SEEK_SET) , 0) {
203 critical_error_errno("failed to seek to block %lld\n", block_num);
206 len=read(fd, block, info.block_size);
207 if (len != info.block_size) {
208 critical_error_errno("failed to read block %lld\n", block_num);
214 static int write_block(int fd, unsigned long long block_num, void *block)
223 off = block_num * info.block_size;
225 if (lseek64(fd, off, SEEK_SET) < 0) {
226 critical_error_errno("failed to seek to block %lld\n", block_num);
229 len=write(fd, block, info.block_size);
230 if (len != info.block_size) {
231 critical_error_errno("failed to write block %lld\n", block_num);
237 static void check_inode_bitmap(int fd, unsigned int bg_num)
239 unsigned int inode_bitmap_block_num;
240 unsigned char block[MAX_EXT4_BLOCK_SIZE];
241 int i, bitmap_updated = 0;
243 /* Using the bg_num, aux_info.bg_desc[], info.inodes_per_group and
244 * new_inodes_per_group, retrieve the inode bitmap, and make sure
245 * the bits between the old and new size are clear
247 inode_bitmap_block_num = aux_info.bg_desc[bg_num].bg_inode_bitmap;
249 read_block(fd, inode_bitmap_block_num, block);
251 for (i = info.inodes_per_group; i < new_inodes_per_group; i++) {
252 if (bitmap_get_bit(block, i)) {
253 bitmap_clear_bit(block, i);
258 if (bitmap_updated) {
260 printf("Warning: updated inode bitmap for block group %d\n", bg_num);
262 write_block(fd, inode_bitmap_block_num, block);
268 /* Update the superblock and bgdesc of the specified block group */
269 static int update_superblocks_and_bg_desc(int fd, int state)
272 struct ext4_super_block sb;
273 unsigned int num_block_groups, total_new_inodes;
279 /* Compute how many more inodes are now available */
280 num_block_groups = DIV_ROUND_UP(aux_info.len_blocks, info.blocks_per_group);
281 total_new_inodes = num_block_groups * (new_inodes_per_group - sb.s_inodes_per_group);
284 printf("created %d additional inodes\n", total_new_inodes);
287 /* Update the free inodes count in each block group descriptor */
288 for (i = 0; i < num_block_groups; i++) {
289 if (state == STATE_UPDATING_SB) {
290 aux_info.bg_desc[i].bg_free_inodes_count += (new_inodes_per_group - sb.s_inodes_per_group);
292 check_inode_bitmap(fd, i);
295 /* First some sanity checks */
296 if ((sb.s_inodes_count + total_new_inodes) != (new_inodes_per_group * num_block_groups)) {
297 critical_error("Failed sanity check on new inode count\n");
299 if (new_inodes_per_group % (info.block_size/info.inode_size)) {
300 critical_error("Failed sanity check on new inode per group alignment\n");
303 /* Update the free inodes count in the superblock */
304 sb.s_inodes_count += total_new_inodes;
305 sb.s_free_inodes_count += total_new_inodes;
306 sb.s_inodes_per_group = new_inodes_per_group;
308 for (i = 0; i < aux_info.groups; i++) {
309 if (ext4_bg_has_super_block(i)) {
310 unsigned int sb_offset;
313 /* The first superblock is offset by 1K to leave room for boot sectors */
319 sb.s_block_group_nr = i;
320 /* Don't write out the backup superblocks with the bit set in the s_desc_size
321 * which prevents the filesystem from mounting. The bit for the primary
322 * superblock will be cleared on the final call to set_fs_fixup_state() */
324 sb.s_desc_size &= ~1;
329 (unsigned long long)i
330 * info.blocks_per_group * info.block_size
335 ret = lseek64(fd, ((unsigned long long)i * info.blocks_per_group * info.block_size) +
336 (info.block_size * (aux_info.first_data_block + 1)), SEEK_SET);
338 critical_error_errno("failed to seek to block group descriptors");
341 ret = write(fd, aux_info.bg_desc, info.block_size * aux_info.bg_desc_blocks);
343 critical_error_errno("failed to write block group descriptors");
344 if (ret != (int)info.block_size * (int)aux_info.bg_desc_blocks)
345 critical_error("failed to write all of block group descriptors");
348 if ((bail_phase == 4) && ((unsigned int)bail_count == i)) {
349 critical_error("bailing at phase 4\n");
357 static int get_direct_blocks(struct ext4_inode *inode, unsigned long long *block_list,
361 unsigned int ret = 0;
362 unsigned int sectors_per_block;
364 sectors_per_block = info.block_size / INODE_BLOCK_SIZE;
365 while ((i < (inode->i_blocks_lo / sectors_per_block)) && (i < EXT4_NDIR_BLOCKS)) {
366 block_list[i] = inode->i_block[i];
372 if ((inode->i_blocks_lo / sectors_per_block) > EXT4_NDIR_BLOCKS) {
379 static int get_indirect_blocks(int fd, struct ext4_inode *inode,
380 unsigned long long *block_list, unsigned int *count)
383 unsigned int *indirect_block;
384 unsigned int sectors_per_block;
386 sectors_per_block = info.block_size / INODE_BLOCK_SIZE;
388 indirect_block = (unsigned int *)malloc(info.block_size);
389 if (indirect_block == 0) {
390 critical_error("failed to allocate memory for indirect_block\n");
393 read_block(fd, inode->i_block[EXT4_NDIR_BLOCKS], indirect_block);
395 for(i = 0; i < (inode->i_blocks_lo / sectors_per_block - EXT4_NDIR_BLOCKS); i++) {
396 block_list[EXT4_NDIR_BLOCKS+i] = indirect_block[i];
401 free(indirect_block);
406 static int get_block_list_indirect(int fd, struct ext4_inode *inode, unsigned long long *block_list)
408 unsigned int count=0;
410 if (get_direct_blocks(inode, block_list, &count)) {
411 get_indirect_blocks(fd, inode, block_list, &count);
417 static int get_extent_ents(struct ext4_extent_header *ext_hdr, unsigned long long *block_list)
420 struct ext4_extent *extent;
421 off64_t fs_block_num;
423 if (ext_hdr->eh_depth != 0) {
424 critical_error("get_extent_ents called with eh_depth != 0\n");
427 /* The extent entries immediately follow the header, so add 1 to the pointer
428 * and cast it to an extent pointer.
430 extent = (struct ext4_extent *)(ext_hdr + 1);
432 for (i = 0; i < ext_hdr->eh_entries; i++) {
433 fs_block_num = ((off64_t)extent->ee_start_hi << 32) | extent->ee_start_lo;
434 for (j = 0; j < extent->ee_len; j++) {
435 block_list[extent->ee_block+j] = fs_block_num+j;
443 static int get_extent_idx(int fd, struct ext4_extent_header *ext_hdr, unsigned long long *block_list)
446 struct ext4_extent_idx *extent_idx;
447 struct ext4_extent_header *tmp_ext_hdr;
448 off64_t fs_block_num;
449 unsigned char block[MAX_EXT4_BLOCK_SIZE];
452 if (ext_hdr->eh_depth == 0) {
453 critical_error("get_extent_idx called with eh_depth == 0\n");
456 /* The extent entries immediately follow the header, so add 1 to the pointer
457 * and cast it to an extent pointer.
459 extent_idx = (struct ext4_extent_idx *)(ext_hdr + 1);
461 for (i = 0; i < ext_hdr->eh_entries; i++) {
462 fs_block_num = ((off64_t)extent_idx->ei_leaf_hi << 32) | extent_idx->ei_leaf_lo;
463 read_block(fd, fs_block_num, block);
464 tmp_ext_hdr = (struct ext4_extent_header *)block;
466 if (tmp_ext_hdr->eh_depth == 0) {
467 get_extent_ents(tmp_ext_hdr, block_list); /* leaf node, fill in block_list */
469 get_extent_idx(fd, tmp_ext_hdr, block_list); /* recurse down the tree */
476 static int get_block_list_extents(int fd, struct ext4_inode *inode, unsigned long long *block_list)
478 struct ext4_extent_header *extent_hdr;
480 extent_hdr = (struct ext4_extent_header *)inode->i_block;
482 if (extent_hdr->eh_magic != EXT4_EXT_MAGIC) {
483 critical_error("extent header has unexpected magic value 0x%4.4x\n",
484 extent_hdr->eh_magic);
487 if (extent_hdr->eh_depth == 0) {
488 get_extent_ents((struct ext4_extent_header *)inode->i_block, block_list);
492 get_extent_idx(fd, (struct ext4_extent_header *)inode->i_block, block_list);
497 static int is_entry_dir(int fd, struct ext4_dir_entry_2 *dirp, int pass)
499 struct ext4_inode inode;
502 if (dirp->file_type == EXT4_FT_DIR) {
504 } else if (dirp->file_type == EXT4_FT_UNKNOWN) {
505 /* Somebody was too lazy to fill in the dir entry,
506 * so we have to go fetch it from the inode. Grrr.
508 /* if UPDATE_INODE_NUMS pass and the inode high bit is not
509 * set return false so we don't recurse down the tree that is
510 * already updated. Otherwise, fetch inode, and return answer.
512 if ((pass == UPDATE_INODE_NUMS) && !(dirp->inode & 0x80000000)) {
515 read_inode(fd, (dirp->inode & 0x7fffffff), &inode);
516 if (S_ISDIR(inode.i_mode)) {
525 static int recurse_dir(int fd, struct ext4_inode *inode, char *dirbuf, int dirsize, int mode)
527 unsigned long long *block_list;
528 unsigned int num_blocks;
529 struct ext4_dir_entry_2 *dirp, *prev_dirp = 0;
531 unsigned int i, leftover_space, is_dir;
532 struct ext4_inode tmp_inode;
537 case SANITY_CHECK_PASS:
538 case MARK_INODE_NUMS:
539 case UPDATE_INODE_NUMS:
542 critical_error("recurse_dir() called witn unknown mode!\n");
545 if (dirsize % info.block_size) {
546 critical_error("dirsize %d not a multiple of block_size %d. This is unexpected!\n",
547 dirsize, info.block_size);
550 num_blocks = dirsize / info.block_size;
552 block_list = malloc((num_blocks + 1) * sizeof(*block_list));
553 if (block_list == 0) {
554 critical_error("failed to allocate memory for block_list\n");
557 if (inode->i_flags & EXT4_EXTENTS_FL) {
558 get_block_list_extents(fd, inode, block_list);
560 /* A directory that requires doubly or triply indirect blocks in huge indeed,
561 * and will almost certainly not exist, especially since make_ext4fs only creates
562 * directories with extents, and the kernel will too, but check to make sure the
563 * directory is not that big and give an error if so. Our limit is 12 direct blocks,
564 * plus block_size/4 singly indirect blocks, which for a filesystem with 4K blocks
565 * is a directory 1036 blocks long, or 4,243,456 bytes long! Assuming an average
566 * filename length of 20 (which I think is generous) thats 20 + 8 bytes overhead
567 * per entry, or 151,552 entries in the directory!
569 if (num_blocks > (info.block_size / 4 + EXT4_NDIR_BLOCKS)) {
570 critical_error("Non-extent based directory is too big!\n");
572 get_block_list_indirect(fd, inode, block_list);
575 /* Read in all the blocks for this directory */
576 for (i = 0; i < num_blocks; i++) {
577 read_block(fd, block_list[i], dirbuf + (i * info.block_size));
580 dirp = (struct ext4_dir_entry_2 *)dirbuf;
581 while (dirp < (struct ext4_dir_entry_2 *)(dirbuf + dirsize)) {
583 leftover_space = (char *)(dirbuf + dirsize) - (char *)dirp;
584 if (((mode == SANITY_CHECK_PASS) || (mode == UPDATE_INODE_NUMS)) &&
585 (leftover_space <= 8) && prev_dirp) {
586 /* This is a bug in an older version of make_ext4fs, where it
587 * didn't properly include the rest of the block in rec_len.
588 * Update rec_len on the previous entry to include the rest of
589 * the block and exit the loop.
592 printf("fixing up short rec_len for diretory entry for %s\n", name);
594 prev_dirp->rec_len += leftover_space;
598 if (dirp->inode == 0) {
599 /* This is the last entry in the directory */
603 strncpy(name, dirp->name, dirp->name_len);
604 name[dirp->name_len]='\0';
606 /* Only recurse on pass UPDATE_INODE_NUMS if the high bit is set.
607 * Otherwise, this inode entry has already been updated
608 * and we'll do the wrong thing. Also don't recurse on . or ..,
609 * and certainly not on non-directories!
611 /* Hrm, looks like filesystems made by fastboot on stingray set the file_type
612 * flag, but the lost+found directory has the type set to Unknown, which
613 * seems to imply I need to read the inode and get it.
615 is_dir = is_entry_dir(fd, dirp, mode);
616 if ( is_dir && (strcmp(name, ".") && strcmp(name, "..")) &&
617 ((mode == SANITY_CHECK_PASS) || (mode == MARK_INODE_NUMS) ||
618 ((mode == UPDATE_INODE_NUMS) && (dirp->inode & 0x80000000))) ) {
619 /* A directory! Recurse! */
620 read_inode(fd, dirp->inode & 0x7fffffff, &tmp_inode);
622 if (!S_ISDIR(tmp_inode.i_mode)) {
623 critical_error("inode %d for name %s does not point to a directory\n",
624 dirp->inode & 0x7fffffff, name);
627 printf("inode %d %s use extents\n", dirp->inode & 0x7fffffff,
628 (tmp_inode.i_flags & EXT4_EXTENTS_FL) ? "does" : "does not");
631 tmp_dirsize = tmp_inode.i_blocks_lo * INODE_BLOCK_SIZE;
633 printf("dir size = %d bytes\n", tmp_dirsize);
636 tmp_dirbuf = malloc(tmp_dirsize);
637 if (tmp_dirbuf == 0) {
638 critical_error("failed to allocate memory for tmp_dirbuf\n");
641 recurse_dir(fd, &tmp_inode, tmp_dirbuf, tmp_dirsize, mode);
648 printf("Directory %s\n", name);
650 printf("Non-directory %s\n", name);
654 /* Process entry based on current mode. Either set high bit or change inode number */
655 if (mode == MARK_INODE_NUMS) {
656 dirp->inode |= 0x80000000;
657 } else if (mode == UPDATE_INODE_NUMS) {
658 if (dirp->inode & 0x80000000) {
659 dirp->inode = compute_new_inum(dirp->inode & 0x7fffffff);
663 if ((bail_phase == mode) && (bail_loc == 1) && (bail_count == count)) {
664 critical_error("Bailing at phase %d, loc 1 and count %d\n", mode, count);
667 /* Point dirp at the next entry */
669 dirp = (struct ext4_dir_entry_2*)((char *)dirp + dirp->rec_len);
672 /* Write out all the blocks for this directory */
673 for (i = 0; i < num_blocks; i++) {
674 write_block(fd, block_list[i], dirbuf + (i * info.block_size));
675 if ((bail_phase == mode) && (bail_loc == 2) && (bail_count <= count)) {
676 critical_error("Bailing at phase %d, loc 2 and count %d\n", mode, count);
685 int ext4fixup(char *fsdev)
687 return ext4fixup_internal(fsdev, 0, 0, 0, 0, 0);
690 int ext4fixup_internal(char *fsdev, int v_flag, int n_flag,
691 int stop_phase, int stop_loc, int stop_count)
694 struct ext4_inode root_inode;
695 unsigned int dirsize;
698 if (setjmp(setjmp_env))
699 return EXIT_FAILURE; /* Handle a call to longjmp() */
704 bail_phase = stop_phase;
706 bail_count = stop_count;
708 fd = open(fsdev, O_RDWR);
711 critical_error_errno("failed to open filesystem image");
713 read_ext(fd, verbose);
715 if (info.feat_incompat & EXT4_FEATURE_INCOMPAT_RECOVER) {
716 critical_error("Filesystem needs recovery first, mount and unmount to do that\n");
719 /* Clear the low bit which is set while this tool is in progress.
720 * If the tool crashes, it will still be set when we restart.
721 * The low bit is set to make the filesystem unmountable while
722 * it is being fixed up. Also allow 0, which means the old ext2
725 if (((aux_info.sb->s_desc_size & ~1) != sizeof(struct ext2_group_desc)) &&
726 ((aux_info.sb->s_desc_size & ~1) != 0))
727 critical_error("error: bg_desc_size != sizeof(struct ext2_group_desc)\n");
729 if ((info.feat_incompat & EXT4_FEATURE_INCOMPAT_FILETYPE) == 0) {
730 critical_error("Expected filesystem to have filetype flag set\n");
733 #if 0 // If we have to fix the directory rec_len issue, we can't use this check
734 /* Check to see if the inodes/group is copacetic */
735 if (info.inodes_per_blockgroup % (info.block_size/info.inode_size) == 0) {
736 /* This filesystem has either already been updated, or was
740 printf("%s: filesystem correct, no work to do\n", me);
746 /* Compute what the new value of inodes_per_blockgroup will be when we're done */
747 new_inodes_per_group=EXT4_ALIGN(info.inodes_per_group,(info.block_size/info.inode_size));
749 read_inode(fd, EXT4_ROOT_INO, &root_inode);
751 if (!S_ISDIR(root_inode.i_mode)) {
752 critical_error("root inode %d does not point to a directory\n", EXT4_ROOT_INO);
755 printf("inode %d %s use extents\n", EXT4_ROOT_INO,
756 (root_inode.i_flags & EXT4_EXTENTS_FL) ? "does" : "does not");
759 dirsize = root_inode.i_blocks_lo * INODE_BLOCK_SIZE;
761 printf("root dir size = %d bytes\n", dirsize);
764 dirbuf = malloc(dirsize);
766 critical_error("failed to allocate memory for dirbuf\n");
769 /* Perform a sanity check pass first, try to catch any errors that will occur
770 * before we actually change anything, so we don't leave a filesystem in a
771 * corrupted, unrecoverable state. Set no_write, make it quiet, and do a recurse
772 * pass and a update_superblock pass. Set flags back to requested state when done.
773 * Only perform sanity check if the state is unset. If the state is _NOT_ unset,
774 * then the tool has already been run and interrupted, and it presumably ran and
775 * passed sanity checked before it got interrupted. It is _NOT_ safe to run sanity
776 * check if state is unset because it assumes inodes are to be computed using the
777 * old inodes/group, but some inode numbers may be updated to the new number.
779 if (get_fs_fixup_state(fd) == STATE_UNSET) {
782 recurse_dir(fd, &root_inode, dirbuf, dirsize, SANITY_CHECK_PASS);
783 update_superblocks_and_bg_desc(fd, STATE_UNSET);
787 set_fs_fixup_state(fd, STATE_MARKING_INUMS);
790 if (get_fs_fixup_state(fd) == STATE_MARKING_INUMS) {
791 count = 0; /* Reset debugging counter */
792 if (!recurse_dir(fd, &root_inode, dirbuf, dirsize, MARK_INODE_NUMS)) {
793 set_fs_fixup_state(fd, STATE_UPDATING_INUMS);
797 if (get_fs_fixup_state(fd) == STATE_UPDATING_INUMS) {
798 count = 0; /* Reset debugging counter */
799 if (!recurse_dir(fd, &root_inode, dirbuf, dirsize, UPDATE_INODE_NUMS)) {
800 set_fs_fixup_state(fd, STATE_UPDATING_SB);
804 if (get_fs_fixup_state(fd) == STATE_UPDATING_SB) {
805 /* set the new inodes/blockgroup number,
806 * and sets the state back to 0.
808 if (!update_superblocks_and_bg_desc(fd, STATE_UPDATING_SB)) {
809 set_fs_fixup_state(fd, STATE_UNSET);