OSDN Git Service

Merge tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/devic...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 30 Apr 2020 23:45:08 +0000 (16:45 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 30 Apr 2020 23:45:08 +0000 (16:45 -0700)
Pull device mapper fixes from Mike Snitzer:

 - Document DM integrity allow_discard feature that was added during 5.7
   merge window.

 - Fix potential for DM writecache data corruption during DM table
   reloads.

 - Fix DM verity's FEC support's hash block number calculation in
   verity_fec_decode().

 - Fix bio-based DM multipath crash due to use of stale copy of
   MPATHF_QUEUE_IO flag state in __map_bio().

* tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm multipath: use updated MPATHF_QUEUE_IO on mapping for bio-based mpath
  dm verity fec: fix hash block number in verity_fec_decode
  dm writecache: fix data corruption when reloading the target
  dm integrity: document allow_discard option

Documentation/admin-guide/device-mapper/dm-integrity.rst
drivers/md/dm-mpath.c
drivers/md/dm-verity-fec.c
drivers/md/dm-writecache.c

index c00f9f1..8439d2a 100644 (file)
@@ -182,12 +182,15 @@ fix_padding
        space-efficient. If this option is not present, large padding is
        used - that is for compatibility with older kernels.
 
-
-The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
-be changed when reloading the target (load an inactive table and swap the
-tables with suspend and resume). The other arguments should not be changed
-when reloading the target because the layout of disk data depend on them
-and the reloaded target would be non-functional.
+allow_discards
+       Allow block discard requests (a.k.a. TRIM) for the integrity device.
+       Discards are only allowed to devices using internal hash.
+
+The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and
+allow_discards can be changed when reloading the target (load an inactive
+table and swap the tables with suspend and resume). The other arguments
+should not be changed when reloading the target because the layout of disk
+data depend on them and the reloaded target would be non-functional.
 
 
 The layout of the formatted block device:
index 58fd137..3e50009 100644 (file)
@@ -585,10 +585,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
 
        /* Do we need to select a new pgpath? */
        pgpath = READ_ONCE(m->current_pgpath);
-       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
-       if (!pgpath || !queue_io)
+       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
                pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
 
+       /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */
+       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
+
        if ((pgpath && queue_io) ||
            (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
                /* Queue for the daemon to resubmit */
index 49147e6..fb41b4f 100644 (file)
@@ -435,7 +435,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
        fio->level++;
 
        if (type == DM_VERITY_BLOCK_TYPE_METADATA)
-               block += v->data_blocks;
+               block = block - v->hash_start + v->data_blocks;
 
        /*
         * For RS(M, N), the continuous FEC data is divided into blocks of N
index 114927d..613c171 100644 (file)
@@ -931,6 +931,24 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
        return 0;
 }
 
+static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors)
+{
+       struct dm_io_region region;
+       struct dm_io_request req;
+
+       region.bdev = wc->ssd_dev->bdev;
+       region.sector = wc->start_sector;
+       region.count = n_sectors;
+       req.bi_op = REQ_OP_READ;
+       req.bi_op_flags = REQ_SYNC;
+       req.mem.type = DM_IO_VMA;
+       req.mem.ptr.vma = (char *)wc->memory_map;
+       req.client = wc->dm_io;
+       req.notify.fn = NULL;
+
+       return dm_io(&req, 1, &region, NULL);
+}
+
 static void writecache_resume(struct dm_target *ti)
 {
        struct dm_writecache *wc = ti->private;
@@ -941,8 +959,18 @@ static void writecache_resume(struct dm_target *ti)
 
        wc_lock(wc);
 
-       if (WC_MODE_PMEM(wc))
+       if (WC_MODE_PMEM(wc)) {
                persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
+       } else {
+               r = writecache_read_metadata(wc, wc->metadata_sectors);
+               if (r) {
+                       size_t sb_entries_offset;
+                       writecache_error(wc, r, "unable to read metadata: %d", r);
+                       sb_entries_offset = offsetof(struct wc_memory_superblock, entries);
+                       memset((char *)wc->memory_map + sb_entries_offset, -1,
+                              (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset);
+               }
+       }
 
        wc->tree = RB_ROOT;
        INIT_LIST_HEAD(&wc->lru);
@@ -2102,6 +2130,12 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
                ti->error = "Invalid block size";
                goto bad;
        }
+       if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) ||
+           wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) {
+               r = -EINVAL;
+               ti->error = "Block size is smaller than device logical block size";
+               goto bad;
+       }
        wc->block_size_bits = __ffs(wc->block_size);
 
        wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
@@ -2200,8 +2234,6 @@ invalid_optional:
                        goto bad;
                }
        } else {
-               struct dm_io_region region;
-               struct dm_io_request req;
                size_t n_blocks, n_metadata_blocks;
                uint64_t n_bitmap_bits;
 
@@ -2258,19 +2290,9 @@ invalid_optional:
                        goto bad;
                }
 
-               region.bdev = wc->ssd_dev->bdev;
-               region.sector = wc->start_sector;
-               region.count = wc->metadata_sectors;
-               req.bi_op = REQ_OP_READ;
-               req.bi_op_flags = REQ_SYNC;
-               req.mem.type = DM_IO_VMA;
-               req.mem.ptr.vma = (char *)wc->memory_map;
-               req.client = wc->dm_io;
-               req.notify.fn = NULL;
-
-               r = dm_io(&req, 1, &region, NULL);
+               r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT);
                if (r) {
-                       ti->error = "Unable to read metadata";
+                       ti->error = "Unable to read first block of metadata";
                        goto bad;
                }
        }