atomic_t pending_io;
wait_queue_head_t io_wait;
+ wait_queue_head_t repair_wait;
/*
* Indicate the states of the stripe. Bits are defined in
* group.
*/
u8 *csums;
+
+ struct work_struct work;
};
struct scrub_recover {
stripe->state = 0;
init_waitqueue_head(&stripe->io_wait);
+ init_waitqueue_head(&stripe->repair_wait);
atomic_set(&stripe->pending_io, 0);
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages);
return -ENOMEM;
}
-void wait_scrub_stripe_io(struct scrub_stripe *stripe)
+static void wait_scrub_stripe_io(struct scrub_stripe *stripe)
{
wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0);
}
}
/* Verify specified sectors of a stripe. */
-void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
+static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
}
}
+static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first_bvec)
+{
+ int i;
+
+ for (i = 0; i < stripe->nr_sectors; i++) {
+ if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page &&
+ scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset)
+ break;
+ }
+ ASSERT(i < stripe->nr_sectors);
+ return i;
+}
+
+/*
+ * Repair read is different to the regular read:
+ *
+ * - Only reads the failed sectors
+ * - May have extra blocksize limits
+ */
+static void scrub_repair_read_endio(struct btrfs_bio *bbio)
+{
+ struct scrub_stripe *stripe = bbio->private;
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct bio_vec *bvec;
+ int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+ u32 bio_size = 0;
+ int i;
+
+ ASSERT(sector_nr < stripe->nr_sectors);
+
+ bio_for_each_bvec_all(bvec, &bbio->bio, i)
+ bio_size += bvec->bv_len;
+
+ if (bbio->bio.bi_status) {
+ bitmap_set(&stripe->io_error_bitmap, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
+ bitmap_set(&stripe->error_bitmap, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
+ } else {
+ bitmap_clear(&stripe->io_error_bitmap, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
+ }
+ bio_put(&bbio->bio);
+ if (atomic_dec_and_test(&stripe->pending_io))
+ wake_up(&stripe->io_wait);
+}
+
+static int calc_next_mirror(int mirror, int num_copies)
+{
+ ASSERT(mirror <= num_copies);
+ return (mirror + 1 > num_copies) ? 1 : mirror + 1;
+}
+
+static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
+ int mirror, int blocksize, bool wait)
+{
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct btrfs_bio *bbio = NULL;
+ const unsigned long old_error_bitmap = stripe->error_bitmap;
+ int i;
+
+ ASSERT(stripe->mirror_num >= 1);
+ ASSERT(atomic_read(&stripe->pending_io) == 0);
+
+ for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) {
+ struct page *page;
+ int pgoff;
+ int ret;
+
+ page = scrub_stripe_get_page(stripe, i);
+ pgoff = scrub_stripe_get_page_offset(stripe, i);
+
+ /* The current sector cannot be merged, submit the bio. */
+ if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) ||
+ bbio->bio.bi_iter.bi_size >= blocksize)) {
+ ASSERT(bbio->bio.bi_iter.bi_size);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_bio(bbio, mirror);
+ if (wait)
+ wait_scrub_stripe_io(stripe);
+ bbio = NULL;
+ }
+
+ if (!bbio) {
+ bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+ fs_info, scrub_repair_read_endio, stripe);
+ bbio->bio.bi_iter.bi_sector = (stripe->logical +
+ (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
+ }
+
+ ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
+ ASSERT(ret == fs_info->sectorsize);
+ }
+ if (bbio) {
+ ASSERT(bbio->bio.bi_iter.bi_size);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_bio(bbio, mirror);
+ if (wait)
+ wait_scrub_stripe_io(stripe);
+ }
+}
+
+/*
+ * The main entrance for all read related scrub work, including:
+ *
+ * - Wait for the initial read to finish
+ * - Verify and locate any bad sectors
+ * - Go through the remaining mirrors and try to read as large blocksize as
+ * possible
+ * - Go through all mirrors (including the failed mirror) sector-by-sector
+ *
+ * Writeback does not happen here, it needs extra synchronization.
+ */
+static void scrub_stripe_read_repair_worker(struct work_struct *work)
+{
+ struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
+ struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
+ stripe->bg->length);
+ int mirror;
+ int i;
+
+ ASSERT(stripe->mirror_num > 0);
+
+ wait_scrub_stripe_io(stripe);
+ scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
+ /* Save the initial failed bitmap for later repair and report usage. */
+ stripe->init_error_bitmap = stripe->error_bitmap;
+
+ if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
+ goto out;
+
+ /*
+ * Try all remaining mirrors.
+ *
+ * Here we still try to read as large block as possible, as this is
+ * faster and we have extra safety nets to rely on.
+ */
+ for (mirror = calc_next_mirror(stripe->mirror_num, num_copies);
+ mirror != stripe->mirror_num;
+ mirror = calc_next_mirror(mirror, num_copies)) {
+ const unsigned long old_error_bitmap = stripe->error_bitmap;
+
+ scrub_stripe_submit_repair_read(stripe, mirror,
+ BTRFS_STRIPE_LEN, false);
+ wait_scrub_stripe_io(stripe);
+ scrub_verify_one_stripe(stripe, old_error_bitmap);
+ if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ goto out;
+ }
+
+ /*
+ * Last safety net, try re-checking all mirrors, including the failed
+ * one, sector-by-sector.
+ *
+ * As if one sector failed the drive's internal csum, the whole read
+ * containing the offending sector would be marked as error.
+ * Thus here we do sector-by-sector read.
+ *
+ * This can be slow, thus we only try it as the last resort.
+ */
+
+ for (i = 0, mirror = stripe->mirror_num;
+ i < num_copies;
+ i++, mirror = calc_next_mirror(mirror, num_copies)) {
+ const unsigned long old_error_bitmap = stripe->error_bitmap;
+
+ scrub_stripe_submit_repair_read(stripe, mirror,
+ fs_info->sectorsize, true);
+ wait_scrub_stripe_io(stripe);
+ scrub_verify_one_stripe(stripe, old_error_bitmap);
+ if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ goto out;
+ }
+out:
+ set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
+ wake_up(&stripe->repair_wait);
+}
+
+void scrub_read_endio(struct btrfs_bio *bbio)
+{
+ struct scrub_stripe *stripe = bbio->private;
+
+ if (bbio->bio.bi_status) {
+ bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+ bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors);
+ } else {
+ bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+ }
+ bio_put(&bbio->bio);
+ if (atomic_dec_and_test(&stripe->pending_io)) {
+ wake_up(&stripe->io_wait);
+ INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
+ queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
+ }
+}
+
static int scrub_checksum_tree_block(struct scrub_block *sblock)
{
struct scrub_ctx *sctx = sblock->sctx;