#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
-#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
-#define RBD_MAX_POOL_NAME_LEN 64
#define RBD_MAX_SNAP_NAME_LEN 32
#define RBD_MAX_OPT_LEN 1024
*/
struct rbd_image_header {
u64 image_size;
- char block_name[32];
+ char *object_prefix;
__u8 obj_order;
__u8 crypt_type;
__u8 comp_type;
struct rbd_snap {
struct device dev;
const char *name;
- size_t size;
+ u64 size;
struct list_head node;
u64 id;
};
spinlock_t lock; /* queue lock */
struct rbd_image_header header;
- char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
- int obj_len;
- char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
- char pool_name[RBD_MAX_POOL_NAME_LEN];
- int poolid;
+ char *image_name;
+ size_t image_name_len;
+ char *header_name;
+ char *pool_name;
+ int pool_id;
struct ceph_osd_event *watch_event;
struct ceph_osd_request *watch_request;
/* protects updating the header */
struct rw_semaphore header_rwsem;
- char snap_name[RBD_MAX_SNAP_NAME_LEN];
- u32 cur_snap; /* index+1 of current snapshot within snap context
- 0 - for the head */
- int read_only;
+ /* name of the snapshot this device reads from */
+ char *snap_name;
+ /* id of the snapshot this device reads from */
+ u64 snap_id; /* current snapshot id */
+ /* whether the snap_id this device reads from still exists */
+ bool snap_exists;
+ int read_only;
struct list_head node;
put_device(&rbd_dev->dev);
}
-static int __rbd_update_snaps(struct rbd_device *rbd_dev);
+static int __rbd_refresh_header(struct rbd_device *rbd_dev);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
/*
* Initialize an rbd client instance.
- * We own *opt.
+ * We own *ceph_opts.
*/
-static struct rbd_client *rbd_client_create(struct ceph_options *opt,
+static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts,
struct rbd_options *rbd_opts)
{
struct rbd_client *rbdc;
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
+ rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
if (IS_ERR(rbdc->client))
goto out_mutex;
- opt = NULL; /* Now rbdc->client is responsible for opt */
+ ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
ret = ceph_open_session(rbdc->client);
if (ret < 0)
mutex_unlock(&ctl_mutex);
kfree(rbdc);
out_opt:
- if (opt)
- ceph_destroy_options(opt);
+ if (ceph_opts)
+ ceph_destroy_options(ceph_opts);
return ERR_PTR(ret);
}
/*
* Find a ceph client with specific addr and configuration.
*/
-static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
+static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts)
{
struct rbd_client *client_node;
- if (opt->flags & CEPH_OPT_NOSHARE)
+ if (ceph_opts->flags & CEPH_OPT_NOSHARE)
return NULL;
list_for_each_entry(client_node, &rbd_client_list, node)
- if (ceph_compare_options(opt, client_node->client) == 0)
+ if (!ceph_compare_options(ceph_opts, client_node->client))
return client_node;
return NULL;
}
/* string args above */
};
-static match_table_t rbdopt_tokens = {
+static match_table_t rbd_opts_tokens = {
{Opt_notify_timeout, "notify_timeout=%d"},
/* int args above */
/* string args above */
static int parse_rbd_opts_token(char *c, void *private)
{
- struct rbd_options *rbdopt = private;
+ struct rbd_options *rbd_opts = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
- token = match_token(c, rbdopt_tokens, argstr);
+ token = match_token(c, rbd_opts_tokens, argstr);
if (token < 0)
return -EINVAL;
switch (token) {
case Opt_notify_timeout:
- rbdopt->notify_timeout = intval;
+ rbd_opts->notify_timeout = intval;
break;
default:
BUG_ON(token);
char *options)
{
struct rbd_client *rbdc;
- struct ceph_options *opt;
+ struct ceph_options *ceph_opts;
struct rbd_options *rbd_opts;
rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
- opt = ceph_parse_options(options, mon_addr,
- mon_addr + mon_addr_len,
- parse_rbd_opts_token, rbd_opts);
- if (IS_ERR(opt)) {
+ ceph_opts = ceph_parse_options(options, mon_addr,
+ mon_addr + mon_addr_len,
+ parse_rbd_opts_token, rbd_opts);
+ if (IS_ERR(ceph_opts)) {
kfree(rbd_opts);
- return ERR_CAST(opt);
+ return ERR_CAST(ceph_opts);
}
spin_lock(&rbd_client_list_lock);
- rbdc = __rbd_client_find(opt);
+ rbdc = __rbd_client_find(ceph_opts);
if (rbdc) {
/* using an existing client */
kref_get(&rbdc->kref);
spin_unlock(&rbd_client_list_lock);
- ceph_destroy_options(opt);
+ ceph_destroy_options(ceph_opts);
kfree(rbd_opts);
return rbdc;
}
spin_unlock(&rbd_client_list_lock);
- rbdc = rbd_client_create(opt, rbd_opts);
+ rbdc = rbd_client_create(ceph_opts, rbd_opts);
if (IS_ERR(rbdc))
kfree(rbd_opts);
struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
dout("rbd_release_client %p\n", rbdc);
+ spin_lock(&rbd_client_list_lock);
list_del(&rbdc->node);
+ spin_unlock(&rbd_client_list_lock);
ceph_destroy_client(rbdc->client);
kfree(rbdc->rbd_opts);
*/
static void rbd_put_client(struct rbd_device *rbd_dev)
{
- spin_lock(&rbd_client_list_lock);
kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
- spin_unlock(&rbd_client_list_lock);
rbd_dev->rbd_client = NULL;
}
*/
static int rbd_header_from_disk(struct rbd_image_header *header,
struct rbd_image_header_ondisk *ondisk,
- int allocated_snaps,
+ u32 allocated_snaps,
gfp_t gfp_flags)
{
- int i;
- u32 snap_count;
+ u32 i, snap_count;
if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
return -ENXIO;
snap_count = le32_to_cpu(ondisk->snap_count);
+ if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
+ / sizeof (*ondisk))
+ return -EINVAL;
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
- snap_count * sizeof (*ondisk),
+ snap_count * sizeof(u64),
gfp_flags);
if (!header->snapc)
return -ENOMEM;
header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
if (snap_count) {
header->snap_names = kmalloc(header->snap_names_len,
- GFP_KERNEL);
+ gfp_flags);
if (!header->snap_names)
goto err_snapc;
header->snap_sizes = kmalloc(snap_count * sizeof(u64),
- GFP_KERNEL);
+ gfp_flags);
if (!header->snap_sizes)
goto err_names;
} else {
header->snap_names = NULL;
header->snap_sizes = NULL;
}
- memcpy(header->block_name, ondisk->block_name,
+
+ header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1,
+ gfp_flags);
+ if (!header->object_prefix)
+ goto err_sizes;
+
+ memcpy(header->object_prefix, ondisk->block_name,
sizeof(ondisk->block_name));
+ header->object_prefix[sizeof (ondisk->block_name)] = '\0';
header->image_size = le64_to_cpu(ondisk->image_size);
header->obj_order = ondisk->options.order;
return 0;
+err_sizes:
+ kfree(header->snap_sizes);
err_names:
kfree(header->snap_names);
err_snapc:
return -ENOMEM;
}
-static int snap_index(struct rbd_image_header *header, int snap_num)
-{
- return header->total_snaps - snap_num;
-}
-
-static u64 cur_snap_id(struct rbd_device *rbd_dev)
-{
- struct rbd_image_header *header = &rbd_dev->header;
-
- if (!rbd_dev->cur_snap)
- return 0;
-
- return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
-}
-
static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
u64 *seq, u64 *size)
{
return -ENOENT;
}
-static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
+static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size)
{
- struct rbd_image_header *header = &dev->header;
- struct ceph_snap_context *snapc = header->snapc;
- int ret = -ENOENT;
-
- BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
+ int ret;
- down_write(&dev->header_rwsem);
+ down_write(&rbd_dev->header_rwsem);
- if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
+ if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME))) {
- if (header->total_snaps)
- snapc->seq = header->snap_seq;
- else
- snapc->seq = 0;
- dev->cur_snap = 0;
- dev->read_only = 0;
+ rbd_dev->snap_id = CEPH_NOSNAP;
+ rbd_dev->snap_exists = false;
+ rbd_dev->read_only = 0;
if (size)
- *size = header->image_size;
+ *size = rbd_dev->header.image_size;
} else {
- ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
+ u64 snap_id = 0;
+
+ ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name,
+ &snap_id, size);
if (ret < 0)
goto done;
-
- dev->cur_snap = header->total_snaps - ret;
- dev->read_only = 1;
+ rbd_dev->snap_id = snap_id;
+ rbd_dev->snap_exists = true;
+ rbd_dev->read_only = 1;
}
ret = 0;
done:
- up_write(&dev->header_rwsem);
+ up_write(&rbd_dev->header_rwsem);
return ret;
}
static void rbd_header_free(struct rbd_image_header *header)
{
- kfree(header->snapc);
- kfree(header->snap_names);
+ kfree(header->object_prefix);
kfree(header->snap_sizes);
+ kfree(header->snap_names);
+ ceph_put_snap_context(header->snapc);
}
/*
* get the actual striped segment name, offset and length
*/
static u64 rbd_get_segment(struct rbd_image_header *header,
- const char *block_name,
+ const char *object_prefix,
u64 ofs, u64 len,
char *seg_name, u64 *segofs)
{
if (seg_name)
snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
- "%s.%012llx", block_name, seg);
+ "%s.%012llx", object_prefix, seg);
ofs = ofs & ((1 << header->obj_order) - 1);
len = min_t(u64, len, (1 << header->obj_order) - ofs);
* Send ceph osd request
*/
static int rbd_do_request(struct request *rq,
- struct rbd_device *dev,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- const char *obj, u64 ofs, u64 len,
+ const char *object_name, u64 ofs, u64 len,
struct bio *bio,
struct page **pages,
int num_pages,
int flags,
struct ceph_osd_req_op *ops,
- int num_reply,
struct rbd_req_coll *coll,
int coll_index,
void (*rbd_cb)(struct ceph_osd_request *req,
req_data->coll_index = coll_index;
}
- dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
-
- down_read(&dev->header_rwsem);
+ dout("rbd_do_request object_name=%s ofs=%lld len=%lld\n",
+ object_name, len, ofs);
- osdc = &dev->rbd_client->client->osdc;
+ osdc = &rbd_dev->rbd_client->client->osdc;
req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
false, GFP_NOIO, pages, bio);
if (!req) {
- up_read(&dev->header_rwsem);
ret = -ENOMEM;
goto done_pages;
}
reqhead = req->r_request->front.iov_base;
reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
- strncpy(req->r_oid, obj, sizeof(req->r_oid));
+ strncpy(req->r_oid, object_name, sizeof(req->r_oid));
req->r_oid_len = strlen(req->r_oid);
layout = &req->r_file_layout;
layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_stripe_count = cpu_to_le32(1);
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
- layout->fl_pg_preferred = cpu_to_le32(-1);
- layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+ layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id);
ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
req, ops);
snapc,
&mtime,
req->r_oid, req->r_oid_len);
- up_read(&dev->header_rwsem);
if (linger_req) {
ceph_osdc_set_request_linger(osdc, req);
op = (void *)(replyhead + 1);
rc = le32_to_cpu(replyhead->result);
bytes = le64_to_cpu(op->extent.length);
- read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+ read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
/*
* Do a synchronous ceph osd operation
*/
-static int rbd_req_sync_op(struct rbd_device *dev,
+static int rbd_req_sync_op(struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
int opcode,
int flags,
struct ceph_osd_req_op *orig_ops,
- int num_reply,
- const char *obj,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
struct ceph_osd_request **linger_req,
}
}
- ret = rbd_do_request(NULL, dev, snapc, snapid,
- obj, ofs, len, NULL,
+ ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
+ object_name, ofs, len, NULL,
pages, num_pages,
flags,
ops,
- 2,
NULL, 0,
NULL,
linger_req, ver);
* Do an asynchronous ceph osd operation
*/
static int rbd_do_op(struct request *rq,
- struct rbd_device *rbd_dev ,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- int opcode, int flags, int num_reply,
+ int opcode, int flags,
u64 ofs, u64 len,
struct bio *bio,
struct rbd_req_coll *coll,
return -ENOMEM;
seg_len = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, len,
seg_name, &seg_ofs);
NULL, 0,
flags,
ops,
- num_reply,
coll, coll_index,
rbd_req_cb, 0, NULL);
return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- 2,
ofs, len, bio, coll, coll_index);
}
int coll_index)
{
return rbd_do_op(rq, rbd_dev, NULL,
- (snapid ? snapid : CEPH_NOSNAP),
+ snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
- 2,
ofs, len, bio, coll, coll_index);
}
/*
* Request sync osd read
*/
-static int rbd_req_sync_read(struct rbd_device *dev,
+static int rbd_req_sync_read(struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- const char *obj,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
u64 *ver)
{
- return rbd_req_sync_op(dev, NULL,
- (snapid ? snapid : CEPH_NOSNAP),
+ return rbd_req_sync_op(rbd_dev, NULL,
+ snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
NULL,
- 1, obj, ofs, len, buf, NULL, ver);
+ object_name, ofs, len, buf, NULL, ver);
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_notify_ack(struct rbd_device *dev,
+static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
u64 ver,
u64 notify_id,
- const char *obj)
+ const char *object_name)
{
struct ceph_osd_req_op *ops;
- struct page **pages = NULL;
int ret;
ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
if (ret < 0)
return ret;
- ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
+ ops[0].watch.ver = cpu_to_le64(ver);
ops[0].watch.cookie = notify_id;
ops[0].watch.flag = 0;
- ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
- obj, 0, 0, NULL,
- pages, 0,
+ ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
+ object_name, 0, 0, NULL,
+ NULL, 0,
CEPH_OSD_FLAG_READ,
ops,
- 1,
NULL, 0,
rbd_simple_req_cb, 0, NULL);
static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ u64 hver;
int rc;
- if (!dev)
+ if (!rbd_dev)
return;
- dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
- notify_id, (int)opcode);
+ dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n",
+ rbd_dev->header_name, notify_id, (int) opcode);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(dev);
+ rc = __rbd_refresh_header(rbd_dev);
+ hver = rbd_dev->header.obj_version;
mutex_unlock(&ctl_mutex);
if (rc)
pr_warning(RBD_DRV_NAME "%d got notification but failed to "
- " update snaps: %d\n", dev->major, rc);
+ " update snaps: %d\n", rbd_dev->major, rc);
- rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
+ rbd_req_sync_notify_ack(rbd_dev, hver, notify_id, rbd_dev->header_name);
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_watch(struct rbd_device *dev,
- const char *obj,
+static int rbd_req_sync_watch(struct rbd_device *rbd_dev,
+ const char *object_name,
u64 ver)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
if (ret < 0)
return ret;
ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
- (void *)dev, &dev->watch_event);
+ (void *)rbd_dev, &rbd_dev->watch_event);
if (ret < 0)
goto fail;
ops[0].watch.ver = cpu_to_le64(ver);
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 1;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL,
- &dev->watch_request, NULL);
+ object_name, 0, 0, NULL,
+ &rbd_dev->watch_request, NULL);
if (ret < 0)
goto fail_event;
return 0;
fail_event:
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
fail:
rbd_destroy_ops(ops);
return ret;
/*
* Request sync osd unwatch
*/
-static int rbd_req_sync_unwatch(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev,
+ const char *object_name)
{
struct ceph_osd_req_op *ops;
return ret;
ops[0].watch.ver = 0;
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 0;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ object_name, 0, 0, NULL, NULL, NULL);
rbd_destroy_ops(ops);
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
return ret;
}
struct rbd_notify_info {
- struct rbd_device *dev;
+ struct rbd_device *rbd_dev;
};
static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
- if (!dev)
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ if (!rbd_dev)
return;
- dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
+ dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n",
+ rbd_dev->header_name,
notify_id, (int)opcode);
}
/*
* Request sync osd notify
*/
-static int rbd_req_sync_notify(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_notify(struct rbd_device *rbd_dev,
+ const char *object_name)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct ceph_osd_event *event;
struct rbd_notify_info info;
int payload_len = sizeof(u32) + sizeof(u32);
if (ret < 0)
return ret;
- info.dev = dev;
+ info.rbd_dev = rbd_dev;
ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
(void *)&info, &event);
ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
ops[0].watch.timeout = 12;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ object_name, 0, 0, NULL, NULL, NULL);
if (ret < 0)
goto fail_event;
/*
* Request sync osd read
*/
-static int rbd_req_sync_exec(struct rbd_device *dev,
- const char *obj,
- const char *cls,
- const char *method,
+static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
+ const char *object_name,
+ const char *class_name,
+ const char *method_name,
const char *data,
int len,
u64 *ver)
{
struct ceph_osd_req_op *ops;
- int cls_len = strlen(cls);
- int method_len = strlen(method);
+ int class_name_len = strlen(class_name);
+ int method_name_len = strlen(method_name);
int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
- cls_len + method_len + len);
+ class_name_len + method_name_len + len);
if (ret < 0)
return ret;
- ops[0].cls.class_name = cls;
- ops[0].cls.class_len = (__u8)cls_len;
- ops[0].cls.method_name = method;
- ops[0].cls.method_len = (__u8)method_len;
+ ops[0].cls.class_name = class_name;
+ ops[0].cls.class_len = (__u8) class_name_len;
+ ops[0].cls.method_name = method_name;
+ ops[0].cls.method_len = (__u8) method_name_len;
ops[0].cls.argc = 0;
ops[0].cls.indata = data;
ops[0].cls.indata_len = len;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, ver);
+ object_name, 0, 0, NULL, NULL, ver);
rbd_destroy_ops(ops);
u64 ofs;
int num_segs, cur_seg = 0;
struct rbd_req_coll *coll;
+ struct ceph_snap_context *snapc;
/* peek at request from block layer */
if (!rq)
spin_unlock_irq(q->queue_lock);
+ down_read(&rbd_dev->header_rwsem);
+
+ if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) {
+ up_read(&rbd_dev->header_rwsem);
+ dout("request for non-existent snapshot");
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENXIO);
+ continue;
+ }
+
+ snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+
+ up_read(&rbd_dev->header_rwsem);
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
size, blk_rq_pos(rq) * SECTOR_SIZE);
if (!coll) {
spin_lock_irq(q->queue_lock);
__blk_end_request_all(rq, -ENOMEM);
+ ceph_put_snap_context(snapc);
continue;
}
/* a bio clone to be passed down to OSD req */
dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
op_size = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, size,
NULL, NULL);
kref_get(&coll->kref);
/* init OSD command: write or read */
if (do_write)
rbd_req_write(rq, rbd_dev,
- rbd_dev->header.snapc,
+ snapc,
ofs,
op_size, bio,
coll, cur_seg);
else
rbd_req_read(rq, rbd_dev,
- cur_snap_id(rbd_dev),
+ rbd_dev->snap_id,
ofs,
op_size, bio,
coll, cur_seg);
if (bp)
bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
+
+ ceph_put_snap_context(snapc);
}
}
{
ssize_t rc;
struct rbd_image_header_ondisk *dh;
- int snap_count = 0;
+ u32 snap_count = 0;
u64 ver;
size_t len;
rc = rbd_req_sync_read(rbd_dev,
NULL, CEPH_NOSNAP,
- rbd_dev->obj_md_name,
+ rbd_dev->header_name,
0, len,
(char *)dh, &ver);
if (rc < 0)
if (rc < 0) {
if (rc == -ENXIO)
pr_warning("unrecognized header format"
- " for image %s", rbd_dev->obj);
+ " for image %s\n",
+ rbd_dev->image_name);
goto out_dh;
}
/*
* create a snapshot
*/
-static int rbd_header_add_snap(struct rbd_device *dev,
+static int rbd_header_add_snap(struct rbd_device *rbd_dev,
const char *snap_name,
gfp_t gfp_flags)
{
struct ceph_mon_client *monc;
/* we should create a snapshot only if we're pointing at the head */
- if (dev->cur_snap)
+ if (rbd_dev->snap_id != CEPH_NOSNAP)
return -EINVAL;
- monc = &dev->rbd_client->client->monc;
- ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
+ monc = &rbd_dev->rbd_client->client->monc;
+ ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid);
dout("created snapid=%lld\n", new_snapid);
if (ret < 0)
return ret;
ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
ceph_encode_64_safe(&p, e, new_snapid, bad);
- ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
+ ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+ "rbd", "snap_add",
data, p - data, &ver);
kfree(data);
if (ret < 0)
return ret;
- dev->header.snapc->seq = new_snapid;
+ down_write(&rbd_dev->header_rwsem);
+ rbd_dev->header.snapc->seq = new_snapid;
+ up_write(&rbd_dev->header_rwsem);
return 0;
bad:
/*
* only read the first part of the ondisk header, without the snaps info
*/
-static int __rbd_update_snaps(struct rbd_device *rbd_dev)
+static int __rbd_refresh_header(struct rbd_device *rbd_dev)
{
int ret;
struct rbd_image_header h;
- u64 snap_seq;
- int follow_seq = 0;
ret = rbd_read_header(rbd_dev, &h);
if (ret < 0)
return ret;
- /* resized? */
- set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
-
down_write(&rbd_dev->header_rwsem);
- snap_seq = rbd_dev->header.snapc->seq;
- if (rbd_dev->header.total_snaps &&
- rbd_dev->header.snapc->snaps[0] == snap_seq)
- /* pointing at the head, will need to follow that
- if head moves */
- follow_seq = 1;
+ /* resized? */
+ if (rbd_dev->snap_id == CEPH_NOSNAP) {
+ sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
- kfree(rbd_dev->header.snapc);
- kfree(rbd_dev->header.snap_names);
+ dout("setting size to %llu sectors", (unsigned long long) size);
+ set_capacity(rbd_dev->disk, size);
+ }
+
+ /* rbd_dev->header.object_prefix shouldn't change */
kfree(rbd_dev->header.snap_sizes);
+ kfree(rbd_dev->header.snap_names);
+ /* osd requests may still refer to snapc */
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.obj_version = h.obj_version;
+ rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
rbd_dev->header.snap_names = h.snap_names;
rbd_dev->header.snap_names_len = h.snap_names_len;
rbd_dev->header.snap_sizes = h.snap_sizes;
- if (follow_seq)
- rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
- else
- rbd_dev->header.snapc->seq = snap_seq;
+ /* Free the extra copy of the object prefix */
+ WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
+ kfree(h.object_prefix);
ret = __rbd_init_snaps_header(rbd_dev);
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ sector_t size;
+
+ down_read(&rbd_dev->header_rwsem);
+ size = get_capacity(rbd_dev->disk);
+ up_read(&rbd_dev->header_rwsem);
- return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
+ return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
}
static ssize_t rbd_major_show(struct device *dev,
return sprintf(buf, "%s\n", rbd_dev->pool_name);
}
+static ssize_t rbd_pool_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%d\n", rbd_dev->pool_id);
+}
+
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "%s\n", rbd_dev->obj);
+ return sprintf(buf, "%s\n", rbd_dev->image_name);
}
static ssize_t rbd_snap_show(struct device *dev,
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(rbd_dev);
+ rc = __rbd_refresh_header(rbd_dev);
if (rc < 0)
ret = rc;
static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
&dev_attr_major.attr,
&dev_attr_client_id.attr,
&dev_attr_pool.attr,
+ &dev_attr_pool_id.attr,
&dev_attr_name.attr,
&dev_attr_current_snap.attr,
&dev_attr_refresh.attr,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%zd\n", snap->size);
+ return sprintf(buf, "%llu\n", (unsigned long long)snap->size);
}
static ssize_t rbd_snap_id_show(struct device *dev,
{
struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
- return sprintf(buf, "%llu\n", (unsigned long long) snap->id);
+ return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
}
static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
cur_id = rbd_dev->header.snapc->snaps[i - 1];
if (!i || old_snap->id < cur_id) {
- /* old_snap->id was skipped, thus was removed */
+ /*
+ * old_snap->id was skipped, thus was
+ * removed. If this rbd_dev is mapped to
+ * the removed snapshot, record that it no
+ * longer exists, to prevent further I/O.
+ */
+ if (rbd_dev->snap_id == old_snap->id)
+ rbd_dev->snap_exists = false;
__rbd_remove_snap_dev(rbd_dev, old_snap);
continue;
}
int ret, rc;
do {
- ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
+ ret = rbd_req_sync_watch(rbd_dev, rbd_dev->header_name,
rbd_dev->header.obj_version);
if (ret == -ERANGE) {
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_update_snaps(rbd_dev);
+ rc = __rbd_refresh_header(rbd_dev);
mutex_unlock(&ctl_mutex);
if (rc < 0)
return rc;
}
/*
- * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
+ * Finds the next token in *buf, dynamically allocates a buffer big
+ * enough to hold a copy of it, and copies the token into the new
+ * buffer. The copy is guaranteed to be terminated with '\0'. Note
+ * that a duplicate buffer is created even for a zero-length token.
+ *
+ * Returns a pointer to the newly-allocated duplicate, or a null
+ * pointer if memory for the duplicate was not available. If
+ * the lenp argument is a non-null pointer, the length of the token
+ * (not including the '\0') is returned in *lenp.
+ *
+ * If successful, the *buf pointer will be updated to point beyond
+ * the end of the found token.
+ *
+ * Note: uses GFP_KERNEL for allocation.
+ */
+static inline char *dup_token(const char **buf, size_t *lenp)
+{
+ char *dup;
+ size_t len;
+
+ len = next_token(buf);
+ dup = kmalloc(len + 1, GFP_KERNEL);
+ if (!dup)
+ return NULL;
+
+ memcpy(dup, *buf, len);
+ *(dup + len) = '\0';
+ *buf += len;
+
+ if (lenp)
+ *lenp = len;
+
+ return dup;
+}
+
+/*
+ * This fills in the pool_name, image_name, image_name_len, snap_name,
* rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
* on the list of monitor addresses and other options provided via
* /sys/bus/rbd/add.
+ *
+ * Note: rbd_dev is assumed to have been initially zero-filled.
*/
static int rbd_add_parse_args(struct rbd_device *rbd_dev,
const char *buf,
const char **mon_addrs,
size_t *mon_addrs_size,
char *options,
- size_t options_size)
+ size_t options_size)
{
- size_t len;
+ size_t len;
+ int ret;
/* The first four tokens are required */
if (!len || len >= options_size)
return -EINVAL;
- len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
- if (!len || len >= sizeof (rbd_dev->pool_name))
- return -EINVAL;
-
- len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
- if (!len || len >= sizeof (rbd_dev->obj))
- return -EINVAL;
+ ret = -ENOMEM;
+ rbd_dev->pool_name = dup_token(&buf, NULL);
+ if (!rbd_dev->pool_name)
+ goto out_err;
- /* We have the object length in hand, save it. */
+ rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len);
+ if (!rbd_dev->image_name)
+ goto out_err;
- rbd_dev->obj_len = len;
+ /* Create the name of the header object */
- BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
- < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
- sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
+ rbd_dev->header_name = kmalloc(rbd_dev->image_name_len
+ + sizeof (RBD_SUFFIX),
+ GFP_KERNEL);
+ if (!rbd_dev->header_name)
+ goto out_err;
+ sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX);
/*
- * The snapshot name is optional, but it's an error if it's
- * too long. If no snapshot is supplied, fill in the default.
+ * The snapshot name is optional. If none is is supplied,
+ * we use the default value.
*/
- len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
- if (!len)
+ rbd_dev->snap_name = dup_token(&buf, &len);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+ if (!len) {
+ /* Replace the empty name with the default */
+ kfree(rbd_dev->snap_name);
+ rbd_dev->snap_name
+ = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+
memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME));
- else if (len >= sizeof (rbd_dev->snap_name))
- return -EINVAL;
+ }
return 0;
+
+out_err:
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ rbd_dev->pool_name = NULL;
+
+ return ret;
}
static ssize_t rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
{
- struct rbd_device *rbd_dev;
+ char *options;
+ struct rbd_device *rbd_dev = NULL;
const char *mon_addrs = NULL;
size_t mon_addrs_size = 0;
- char *options = NULL;
struct ceph_osd_client *osdc;
int rc = -ENOMEM;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
- if (!rbd_dev)
- goto err_nomem;
options = kmalloc(count, GFP_KERNEL);
if (!options)
goto err_nomem;
+ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
+ if (!rbd_dev)
+ goto err_nomem;
/* static rbd_device initialization */
spin_lock_init(&rbd_dev->lock);
rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
if (rc < 0)
goto err_out_client;
- rbd_dev->poolid = rc;
+ rbd_dev->pool_id = rc;
/* register our block device */
rc = register_blkdev(0, rbd_dev->name);
err_out_client:
rbd_put_client(rbd_dev);
err_put_id:
+ if (rbd_dev->pool_name) {
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ }
rbd_id_put(rbd_dev);
err_nomem:
- kfree(options);
kfree(rbd_dev);
+ kfree(options);
dout("Error adding device %s\n", buf);
module_put(THIS_MODULE);
rbd_dev->watch_request);
}
if (rbd_dev->watch_event)
- rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_unwatch(rbd_dev, rbd_dev->header_name);
rbd_put_client(rbd_dev);
unregister_blkdev(rbd_dev->major, rbd_dev->name);
/* done with the id, and with the rbd_dev */
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->pool_name);
+ kfree(rbd_dev->image_name);
rbd_id_put(rbd_dev);
kfree(rbd_dev);
if (ret < 0)
goto err_unlock;
- ret = __rbd_update_snaps(rbd_dev);
+ ret = __rbd_refresh_header(rbd_dev);
if (ret < 0)
goto err_unlock;
mutex_unlock(&ctl_mutex);
/* make a best effort, don't error if failed */
- rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_notify(rbd_dev, rbd_dev->header_name);
ret = count;
kfree(name);