*/
chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
+ return dm_exception_store_set_chunk_size(store, chunk_size_ulong,
+ error);
+}
+
+int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
+ unsigned long chunk_size_ulong,
+ char **error)
+{
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
+ if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) {
+ *error = "Chunk size is too high";
+ return -EINVAL;
+ }
+
store->chunk_size = chunk_size_ulong;
store->chunk_mask = chunk_size_ulong - 1;
store->chunk_shift = ffs(chunk_size_ulong) - 1;
int dm_exception_store_type_register(struct dm_exception_store_type *type);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
+int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
+ unsigned long chunk_size_ulong,
+ char **error);
+
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store);
struct dm_target *ti;
uint32_t region_size;
region_t region_count;
+ uint64_t luid;
char uuid[DM_UUID_LEN];
char *usr_argv_str;
* restored.
*/
retry:
- r = dm_consult_userspace(uuid, request_type, data,
+ r = dm_consult_userspace(uuid, lc->luid, request_type, data,
data_size, rdata, rdata_size);
if (r != -ESRCH)
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(2*HZ);
DMWARN("Attempting to contact userspace log server...");
- r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+ r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
+ lc->usr_argv_str,
strlen(lc->usr_argv_str) + 1,
NULL, NULL);
if (!r)
break;
}
DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
- r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+ r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
0, NULL, NULL);
if (!r)
goto retry;
return -ENOMEM;
}
- for (i = 0, str_size = 0; i < argc; i++)
- str_size += sprintf(str + str_size, "%s ", argv[i]);
- str_size += sprintf(str + str_size, "%llu",
- (unsigned long long)ti->len);
+ str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
+ for (i = 0; i < argc; i++)
+ str_size += sprintf(str + str_size, " %s", argv[i]);
*ctr_str = str;
return str_size;
return -ENOMEM;
}
+ /* The ptr value is sufficient for local unique id */
+ lc->luid = (uint64_t)lc;
+
lc->ti = ti;
if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
}
/* Send table string */
- r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
ctr_str, str_size, NULL, NULL);
if (r == -ESRCH) {
/* Since the region size does not change, get it now */
rdata_size = sizeof(rdata);
- r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
NULL, 0, (char *)&rdata, &rdata_size);
if (r) {
int r;
struct log_c *lc = log->context;
- r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
NULL, 0,
NULL, NULL);
int r;
struct log_c *lc = log->context;
- r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
NULL, 0,
NULL, NULL);
int r;
struct log_c *lc = log->context;
- r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
NULL, 0,
NULL, NULL);
struct log_c *lc = log->context;
lc->in_sync_hint = 0;
- r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+ r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
NULL, 0,
NULL, NULL);
char *result, unsigned maxlen)
{
int r = 0;
+ char *table_args;
size_t sz = (size_t)maxlen;
struct log_c *lc = log->context;
break;
case STATUSTYPE_TABLE:
sz = 0;
- DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
- lc->uuid, lc->usr_argv_str);
+ table_args = strstr(lc->usr_argv_str, " ");
+ BUG_ON(!table_args); /* There will always be a ' ' */
+ table_args++;
+
+ DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
+ lc->uuid, table_args);
break;
}
return (r) ? 0 : (int)sz;
/**
* dm_consult_userspace
- * @uuid: log's uuid (must be DM_UUID_LEN in size)
+ * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
+ * @luid: log's local unique identifier
* @request_type: found in include/linux/dm-log-userspace.h
* @data: data to tx to the server
* @data_size: size of data in bytes
*
* Returns: 0 on success, -EXXX on failure
**/
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
char *data, size_t data_size,
char *rdata, size_t *rdata_size)
{
memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
memcpy(tfr->uuid, uuid, DM_UUID_LEN);
+ tfr->luid = luid;
tfr->seq = dm_ulog_seq++;
/*
int dm_ulog_tfr_init(void);
void dm_ulog_tfr_exit(void);
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
char *data, size_t data_size,
char *rdata, size_t *rdata_size);
*/
dm_rh_inc_pending(ms->rh, &sync);
dm_rh_inc_pending(ms->rh, &nosync);
- ms->log_failure = dm_rh_flush(ms->rh) ? 1 : 0;
+
+ /*
+ * If the flush fails on a previous call and succeeds here,
+ * we must not reset the log_failure variable. We need
+ * userspace interaction to do that.
+ */
+ ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
/*
* Dispatch io.
void *zero_area;
/*
+ * An area used for header. The header can be written
+ * concurrently with metadata (when invalidating the snapshot),
+ * so it needs a separate buffer.
+ */
+ void *header_area;
+
+ /*
* Used to keep track of which metadata area the data in
* 'chunk' refers to.
*/
*/
ps->area = vmalloc(len);
if (!ps->area)
- return r;
+ goto err_area;
ps->zero_area = vmalloc(len);
- if (!ps->zero_area) {
- vfree(ps->area);
- return r;
- }
+ if (!ps->zero_area)
+ goto err_zero_area;
memset(ps->zero_area, 0, len);
+ ps->header_area = vmalloc(len);
+ if (!ps->header_area)
+ goto err_header_area;
+
return 0;
+
+err_header_area:
+ vfree(ps->zero_area);
+
+err_zero_area:
+ vfree(ps->area);
+
+err_area:
+ return r;
}
static void free_area(struct pstore *ps)
if (ps->zero_area)
vfree(ps->zero_area);
ps->zero_area = NULL;
+
+ if (ps->header_area)
+ vfree(ps->header_area);
+ ps->header_area = NULL;
}
struct mdata_req {
/*
* Read or write a chunk aligned and sized block of data from a device.
*/
-static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
+static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
+ int metadata)
{
struct dm_io_region where = {
.bdev = ps->store->cow->bdev,
struct dm_io_request io_req = {
.bi_rw = rw,
.mem.type = DM_IO_VMA,
- .mem.ptr.vma = ps->area,
+ .mem.ptr.vma = area,
.client = ps->io_client,
.notify.fn = NULL,
};
chunk = area_location(ps, ps->current_area);
- r = chunk_io(ps, chunk, rw, 0);
+ r = chunk_io(ps, ps->area, chunk, rw, 0);
if (r)
return r;
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
- struct dm_io_region where = {
- .bdev = ps->store->cow->bdev,
- .sector = ps->store->chunk_size * area_location(ps, area),
- .count = ps->store->chunk_size,
- };
- struct dm_io_request io_req = {
- .bi_rw = WRITE,
- .mem.type = DM_IO_VMA,
- .mem.ptr.vma = ps->zero_area,
- .client = ps->io_client,
- .notify.fn = NULL,
- };
-
- return dm_io(&io_req, 1, &where, NULL);
+ return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
}
static int read_header(struct pstore *ps, int *new_snapshot)
struct disk_header *dh;
chunk_t chunk_size;
int chunk_size_supplied = 1;
+ char *chunk_err;
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
if (r)
return r;
- r = chunk_io(ps, 0, READ, 1);
+ r = chunk_io(ps, ps->header_area, 0, READ, 1);
if (r)
goto bad;
- dh = (struct disk_header *) ps->area;
+ dh = ps->header_area;
if (le32_to_cpu(dh->magic) == 0) {
*new_snapshot = 1;
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
- if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
+ if (ps->store->chunk_size == chunk_size)
return 0;
- DMWARN("chunk size %llu in device metadata overrides "
- "table chunk size of %llu.",
- (unsigned long long)chunk_size,
- (unsigned long long)ps->store->chunk_size);
+ if (chunk_size_supplied)
+ DMWARN("chunk size %llu in device metadata overrides "
+ "table chunk size of %llu.",
+ (unsigned long long)chunk_size,
+ (unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
- ps->store->chunk_size = chunk_size;
- ps->store->chunk_mask = chunk_size - 1;
- ps->store->chunk_shift = ffs(chunk_size) - 1;
+ r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
+ &chunk_err);
+ if (r) {
+ DMERR("invalid on-disk chunk size %llu: %s.",
+ (unsigned long long)chunk_size, chunk_err);
+ return r;
+ }
r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client);
{
struct disk_header *dh;
- memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
+ memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
- dh = (struct disk_header *) ps->area;
+ dh = ps->header_area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
- return chunk_io(ps, 0, WRITE, 1);
+ return chunk_io(ps, ps->header_area, 0, WRITE, 1);
}
/*
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
+ ps->zero_area = NULL;
+ ps->header_area = NULL;
ps->next_free = 2; /* skipping the header and first area */
ps->current_committed = 0;
return 0;
}
+static int snapshot_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct dm_snapshot *snap = ti->private;
+
+ return fn(ti, snap->origin, 0, ti->len, data);
+}
+
+
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
return 0;
}
+static int origin_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct dm_dev *dev = ti->private;
+
+ return fn(ti, dev, 0, ti->len, data);
+}
+
static struct target_type origin_target = {
.name = "snapshot-origin",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = origin_ctr,
.dtr = origin_dtr,
.map = origin_map,
.resume = origin_resume,
.status = origin_status,
+ .iterate_devices = origin_iterate_devices,
};
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
.end_io = snapshot_end_io,
.resume = snapshot_resume,
.status = snapshot_status,
+ .iterate_devices = snapshot_iterate_devices,
};
static int __init dm_snapshot_init(void)
return ret;
}
+static void stripe_io_hints(struct dm_target *ti,
+ struct queue_limits *limits)
+{
+ struct stripe_c *sc = ti->private;
+ unsigned chunk_size = (sc->chunk_mask + 1) << 9;
+
+ blk_limits_io_min(limits, chunk_size);
+ limits->io_opt = chunk_size * sc->stripes;
+}
+
static struct target_type stripe_target = {
.name = "striped",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
.end_io = stripe_end_io,
.status = stripe_status,
.iterate_devices = stripe_iterate_devices,
+ .io_hints = stripe_io_hints,
};
int __init dm_stripe_init(void)
}
/*
- * If possible, this checks an area of a destination device is valid.
+ * If possible, this checks an area of a destination device is invalid.
*/
-static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
struct queue_limits *limits = data;
struct block_device *bdev = dev->bdev;
char b[BDEVNAME_SIZE];
if (!dev_size)
- return 1;
+ return 0;
if ((start >= dev_size) || (start + len > dev_size)) {
- DMWARN("%s: %s too small for target",
- dm_device_name(ti->table->md), bdevname(bdev, b));
- return 0;
+ DMWARN("%s: %s too small for target: "
+ "start=%llu, len=%llu, dev_size=%llu",
+ dm_device_name(ti->table->md), bdevname(bdev, b),
+ (unsigned long long)start,
+ (unsigned long long)len,
+ (unsigned long long)dev_size);
+ return 1;
}
if (logical_block_size_sectors <= 1)
- return 1;
+ return 0;
if (start & (logical_block_size_sectors - 1)) {
DMWARN("%s: start=%llu not aligned to h/w "
- "logical block size %hu of %s",
+ "logical block size %u of %s",
dm_device_name(ti->table->md),
(unsigned long long)start,
limits->logical_block_size, bdevname(bdev, b));
- return 0;
+ return 1;
}
if (len & (logical_block_size_sectors - 1)) {
DMWARN("%s: len=%llu not aligned to h/w "
- "logical block size %hu of %s",
+ "logical block size %u of %s",
dm_device_name(ti->table->md),
(unsigned long long)len,
limits->logical_block_size, bdevname(bdev, b));
- return 0;
+ return 1;
}
- return 1;
+ return 0;
}
/*
}
if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
- DMWARN("%s: target device %s is misaligned",
- dm_device_name(ti->table->md), bdevname(bdev, b));
+ DMWARN("%s: target device %s is misaligned: "
+ "physical_block_size=%u, logical_block_size=%u, "
+ "alignment_offset=%u, start=%llu",
+ dm_device_name(ti->table->md), bdevname(bdev, b),
+ q->limits.physical_block_size,
+ q->limits.logical_block_size,
+ q->limits.alignment_offset,
+ (unsigned long long) start << 9);
+
/*
* Check if merge fn is supported.
if (remaining) {
DMWARN("%s: table line %u (start sect %llu len %llu) "
- "not aligned to h/w logical block size %hu",
+ "not aligned to h/w logical block size %u",
dm_device_name(table->md), i,
(unsigned long long) ti->begin,
(unsigned long long) ti->len,
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
+ /* Set I/O hints portion of queue limits */
+ if (ti->type->io_hints)
+ ti->type->io_hints(ti, &ti_limits);
+
/*
* Check each device area is consistent with the target's
* overall queue limits.
*/
- if (!ti->type->iterate_devices(ti, device_area_is_valid,
- &ti_limits))
+ if (ti->type->iterate_devices(ti, device_area_is_invalid,
+ &ti_limits))
return -EINVAL;
combine_limits:
dm_put(md);
}
+static void free_rq_clone(struct request *clone)
+{
+ struct dm_rq_target_io *tio = clone->end_io_data;
+
+ blk_rq_unprep_clone(clone);
+ free_rq_tio(tio);
+}
+
static void dm_unprep_request(struct request *rq)
{
struct request *clone = rq->special;
- struct dm_rq_target_io *tio = clone->end_io_data;
rq->special = NULL;
rq->cmd_flags &= ~REQ_DONTPREP;
- blk_rq_unprep_clone(clone);
- free_rq_tio(tio);
+ free_rq_clone(clone);
}
/*
rq->sense_len = clone->sense_len;
}
- BUG_ON(clone->bio);
- free_rq_tio(tio);
+ free_rq_clone(clone);
blk_end_request_all(rq, error);
iterate_devices_callout_fn fn,
void *data);
+typedef void (*dm_io_hints_fn) (struct dm_target *ti,
+ struct queue_limits *limits);
+
/*
* Returns:
* 0: The target can handle the next I/O immediately.
dm_merge_fn merge;
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
+ dm_io_hints_fn io_hints;
/* For internal device-mapper use. */
struct list_head list;
(DM_ULOG_REQUEST_MASK & (request_type))
struct dm_ulog_request {
- char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
+ /*
+ * The local unique identifier (luid) and the universally unique
+ * identifier (uuid) are used to tie a request to a specific
+ * mirror log. A single machine log could probably make due with
+ * just the 'luid', but a cluster-aware log must use the 'uuid' and
+ * the 'luid'. The uuid is what is required for node to node
+ * communication concerning a particular log, but the 'luid' helps
+ * differentiate between logs that are being swapped and have the
+ * same 'uuid'. (Think "live" and "inactive" device-mapper tables.)
+ */
+ uint64_t luid;
+ char uuid[DM_UUID_LEN];
char padding[7]; /* Padding because DM_UUID_LEN = 129 */
int32_t error; /* Used to report back processing errors */