static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
-/* NBDExportMetaContexts represents a list of contexts to be exported,
+/*
+ * NBDMetaContexts represents a list of meta contexts in use,
* as selected by NBD_OPT_SET_META_CONTEXT. Also used for
- * NBD_OPT_LIST_META_CONTEXT. */
-typedef struct NBDExportMetaContexts {
- NBDExport *exp;
+ * NBD_OPT_LIST_META_CONTEXT.
+ */
+struct NBDMetaContexts {
+ const NBDExport *exp; /* associated export */
size_t count; /* number of negotiated contexts */
bool base_allocation; /* export base:allocation context (block status) */
bool allocation_depth; /* export qemu:allocation-depth */
* export qemu:dirty-bitmap:<export bitmap name>,
* sized by exp->nr_export_bitmaps
*/
-} NBDExportMetaContexts;
+};
struct NBDClient {
int refcount;
uint32_t check_align; /* If non-zero, check for aligned client requests */
- bool structured_reply;
- NBDExportMetaContexts export_meta;
+ NBDMode mode;
+ NBDMetaContexts contexts; /* Negotiated meta contexts */
uint32_t opt; /* Current option being negotiated */
uint32_t optlen; /* remaining length of data in ioc for the option being
return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
}
-static void nbd_check_meta_export(NBDClient *client)
+static void nbd_check_meta_export(NBDClient *client, NBDExport *exp)
{
- if (client->exp != client->export_meta.exp) {
- client->export_meta.count = 0;
+ if (exp != client->contexts.exp) {
+ client->contexts.count = 0;
}
}
[10 .. 133] reserved (0) [unless no_zeroes]
*/
trace_nbd_negotiate_handle_export_name();
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ error_setg(errp, "Extended headers already negotiated");
+ return -EINVAL;
+ }
if (client->optlen > NBD_MAX_STRING_SIZE) {
error_setg(errp, "Bad length received");
return -EINVAL;
error_setg(errp, "export not found");
return -EINVAL;
}
+ nbd_check_meta_export(client, client->exp);
myflags = client->exp->nbdflags;
- if (client->structured_reply) {
+ if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
stq_be_p(buf, client->exp->size);
stw_be_p(buf + 8, myflags);
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
- nbd_check_meta_export(client);
return 0;
}
errp, "export '%s' not present",
sane_name);
}
+ if (client->opt == NBD_OPT_GO) {
+ nbd_check_meta_export(client, exp);
+ }
/* Don't bother sending NBD_INFO_NAME unless client requested it */
if (sendname) {
/* Send NBD_INFO_EXPORT always */
myflags = exp->nbdflags;
- if (client->structured_reply) {
+ if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
+ if (client->mode >= NBD_MODE_EXTENDED &&
+ (client->contexts.count || client->opt == NBD_OPT_INFO)) {
+ myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+ }
trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
stq_be_p(buf, exp->size);
stw_be_p(buf + 8, myflags);
client->check_align = check_align;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
- nbd_check_meta_export(client);
rc = 1;
}
return rc;
* Handle queries to 'base' namespace. For now, only the base:allocation
* context is available. Return true if @query has been handled.
*/
-static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
if (!nbd_strshift(&query, "base:")) {
* and qemu:allocation-depth contexts are available. Return true if @query
* has been handled.
*/
-static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
size_t i;
* Return -errno on I/O error, 0 if option was completely handled by
* sending a reply about inconsistent lengths, or 1 on success. */
static int nbd_negotiate_meta_query(NBDClient *client,
- NBDExportMetaContexts *meta, Error **errp)
+ NBDMetaContexts *meta, Error **errp)
{
int ret;
g_autofree char *query = NULL;
* Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
*
* Return -errno on I/O error, or 0 if option was completely handled. */
-static int nbd_negotiate_meta_queries(NBDClient *client,
- NBDExportMetaContexts *meta, Error **errp)
+static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
{
int ret;
g_autofree char *export_name = NULL;
/* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
- NBDExportMetaContexts local_meta = {0};
+ NBDMetaContexts local_meta = {0};
+ NBDMetaContexts *meta;
uint32_t nb_queries;
size_t i;
size_t count = 0;
- if (client->opt == NBD_OPT_SET_META_CONTEXT && !client->structured_reply) {
+ if (client->opt == NBD_OPT_SET_META_CONTEXT &&
+ client->mode < NBD_MODE_STRUCTURED) {
return nbd_opt_invalid(client, errp,
"request option '%s' when structured reply "
"is not negotiated",
if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
/* Only change the caller's meta on SET. */
meta = &local_meta;
+ } else {
+ meta = &client->contexts;
}
g_free(meta->bitmaps);
if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
return -EIO;
}
+ client->mode = NBD_MODE_EXPORT_NAME;
trace_nbd_negotiate_options_flags(flags);
if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
fixedNewstyle = true;
flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
+ client->mode = NBD_MODE_SIMPLE;
}
if (flags & NBD_FLAG_C_NO_ZEROES) {
no_zeroes = true;
client->optlen = length;
if (length > NBD_MAX_BUFFER_SIZE) {
- error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
+ error_setg(errp, "len (%" PRIu32 ") is larger than max len (%u)",
length, NBD_MAX_BUFFER_SIZE);
return -EINVAL;
}
case NBD_OPT_STRUCTURED_REPLY:
if (length) {
ret = nbd_reject_length(client, false, errp);
- } else if (client->structured_reply) {
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ ret = nbd_negotiate_send_rep_err(
+ client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
+ "extended headers already negotiated");
+ } else if (client->mode >= NBD_MODE_STRUCTURED) {
ret = nbd_negotiate_send_rep_err(
client, NBD_REP_ERR_INVALID, errp,
"structured reply already negotiated");
} else {
ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
- client->structured_reply = true;
+ client->mode = NBD_MODE_STRUCTURED;
}
break;
case NBD_OPT_LIST_META_CONTEXT:
case NBD_OPT_SET_META_CONTEXT:
- ret = nbd_negotiate_meta_queries(client, &client->export_meta,
- errp);
+ ret = nbd_negotiate_meta_queries(client, errp);
+ break;
+
+ case NBD_OPT_EXTENDED_HEADERS:
+ if (length) {
+ ret = nbd_reject_length(client, false, errp);
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ ret = nbd_negotiate_send_rep_err(
+ client, NBD_REP_ERR_INVALID, errp,
+ "extended headers already negotiated");
+ } else {
+ ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+ client->mode = NBD_MODE_EXTENDED;
+ }
break;
default:
static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
Error **errp)
{
- uint8_t buf[NBD_REQUEST_SIZE];
- uint32_t magic;
+ uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+ uint32_t magic, expect;
int ret;
+ size_t size = client->mode >= NBD_MODE_EXTENDED ?
+ NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
- ret = nbd_read_eof(client, buf, sizeof(buf), errp);
+ ret = nbd_read_eof(client, buf, size, errp);
if (ret < 0) {
return ret;
}
return -EIO;
}
- /* Request
- [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
- [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
- [ 6 .. 7] type (NBD_CMD_READ, ...)
- [ 8 .. 15] cookie
- [16 .. 23] from
- [24 .. 27] len
+ /*
+ * Compact request
+ * [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
+ * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
+ * [ 6 .. 7] type (NBD_CMD_READ, ...)
+ * [ 8 .. 15] cookie
+ * [16 .. 23] from
+ * [24 .. 27] len
+ * Extended request
+ * [ 0 .. 3] magic (NBD_EXTENDED_REQUEST_MAGIC)
+ * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
+ * [ 6 .. 7] type (NBD_CMD_READ, ...)
+ * [ 8 .. 15] cookie
+ * [16 .. 23] from
+ * [24 .. 31] len
*/
magic = ldl_be_p(buf);
request->type = lduw_be_p(buf + 6);
request->cookie = ldq_be_p(buf + 8);
request->from = ldq_be_p(buf + 16);
- request->len = ldl_be_p(buf + 24);
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ request->len = ldq_be_p(buf + 24);
+ expect = NBD_EXTENDED_REQUEST_MAGIC;
+ } else {
+ request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+ expect = NBD_REQUEST_MAGIC;
+ }
trace_nbd_receive_request(magic, request->flags, request->type,
request->from, request->len);
- if (magic != NBD_REQUEST_MAGIC) {
- error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
+ if (magic != expect) {
+ error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
+ PRIx32 ")", magic, expect);
return -EINVAL;
}
return 0;
QTAILQ_REMOVE(&client->exp->clients, client, next);
blk_exp_unref(&client->exp->common);
}
- g_free(client->export_meta.bitmaps);
+ g_free(client->contexts.bitmaps);
g_free(client);
}
}
size_t i;
int ret;
+ GLOBAL_STATE_CODE();
assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
if (!nbd_server_is_running()) {
}
exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
+ bdrv_graph_rdlock_main_loop();
+
for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
exp->nr_export_bitmaps++;
}
QTAILQ_INSERT_TAIL(&exports, exp, next);
+ bdrv_graph_rdunlock_main_loop();
+
return 0;
fail:
+ bdrv_graph_rdunlock_main_loop();
g_free(exp->export_bitmaps);
g_free(exp->name);
g_free(exp->description);
NBDRequest *request,
uint32_t error,
void *data,
- size_t len,
+ uint64_t len,
Error **errp)
{
NBDSimpleReply reply;
};
assert(!len || !nbd_err);
- assert(!client->structured_reply || request->type != NBD_CMD_READ);
+ assert(len <= NBD_MAX_BUFFER_SIZE);
+ assert(client->mode < NBD_MODE_STRUCTURED ||
+ (client->mode == NBD_MODE_STRUCTURED &&
+ request->type != NBD_CMD_READ));
trace_nbd_co_send_simple_reply(request->cookie, nbd_err,
nbd_err_lookup(nbd_err), len);
set_be_simple_reply(&reply, nbd_err, request->cookie);
size_t niov, uint16_t flags, uint16_t type,
NBDRequest *request)
{
- /* TODO - handle structured vs. extended replies */
- NBDStructuredReplyChunk *chunk = iov->iov_base;
size_t i, length = 0;
for (i = 1; i < niov; i++) {
}
assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
- iov[0].iov_len = sizeof(*chunk);
- stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
- stw_be_p(&chunk->flags, flags);
- stw_be_p(&chunk->type, type);
- stq_be_p(&chunk->cookie, request->cookie);
- stl_be_p(&chunk->length, length);
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ NBDExtendedReplyChunk *chunk = iov->iov_base;
+
+ iov[0].iov_len = sizeof(*chunk);
+ stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
+ stw_be_p(&chunk->flags, flags);
+ stw_be_p(&chunk->type, type);
+ stq_be_p(&chunk->cookie, request->cookie);
+ stq_be_p(&chunk->offset, request->from);
+ stq_be_p(&chunk->length, length);
+ } else {
+ NBDStructuredReplyChunk *chunk = iov->iov_base;
+
+ iov[0].iov_len = sizeof(*chunk);
+ stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+ stw_be_p(&chunk->flags, flags);
+ stw_be_p(&chunk->type, type);
+ stq_be_p(&chunk->cookie, request->cookie);
+ stl_be_p(&chunk->length, length);
+ }
}
static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
NBDRequest *request,
uint64_t offset,
void *data,
- size_t size,
+ uint64_t size,
bool final,
Error **errp)
{
{.iov_base = data, .iov_len = size}
};
- assert(size);
+ assert(size && size <= NBD_MAX_BUFFER_SIZE);
trace_nbd_co_send_chunk_read(request->cookie, offset, data, size);
set_be_chunk(client, iov, 3, final ? NBD_REPLY_FLAG_DONE : 0,
NBD_REPLY_TYPE_OFFSET_DATA, request);
return nbd_co_send_iov(client, iov, 3, errp);
}
-/*ebb*/
+
static int coroutine_fn nbd_co_send_chunk_error(NBDClient *client,
NBDRequest *request,
uint32_t error,
NBDRequest *request,
uint64_t offset,
uint8_t *data,
- size_t size,
+ uint64_t size,
Error **errp)
{
int ret = 0;
NBDExport *exp = client->exp;
size_t progress = 0;
+ assert(size <= NBD_MAX_BUFFER_SIZE);
while (progress < size) {
int64_t pnum;
int status = blk_co_block_status_above(exp->common.blk, NULL,
}
typedef struct NBDExtentArray {
- NBDExtent *extents;
+ NBDExtent64 *extents;
unsigned int nb_alloc;
unsigned int count;
uint64_t total_length;
+ bool extended;
bool can_add;
bool converted_to_be;
} NBDExtentArray;
-static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
+static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
+ NBDMode mode)
{
NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
+ assert(mode >= NBD_MODE_STRUCTURED);
ea->nb_alloc = nb_alloc;
- ea->extents = g_new(NBDExtent, nb_alloc);
+ ea->extents = g_new(NBDExtent64, nb_alloc);
+ ea->extended = mode >= NBD_MODE_EXTENDED;
ea->can_add = true;
return ea;
int i;
assert(!ea->converted_to_be);
+ assert(ea->extended);
ea->can_add = false;
ea->converted_to_be = true;
for (i = 0; i < ea->count; i++) {
- ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
- ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
+ ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
+ ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
}
}
+/* Further modifications of the array after conversion are abandoned */
+static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
+{
+ int i;
+ NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
+
+ assert(!ea->converted_to_be);
+ assert(!ea->extended);
+ ea->can_add = false;
+ ea->converted_to_be = true;
+
+ for (i = 0; i < ea->count; i++) {
+ assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
+ extents[i].length = cpu_to_be32(ea->extents[i].length);
+ extents[i].flags = cpu_to_be32(ea->extents[i].flags);
+ }
+
+ return extents;
+}
+
/*
* Add extent to NBDExtentArray. If extent can't be added (no available space),
* return -1.
* would result in an incorrect range reported to the client)
*/
static int nbd_extent_array_add(NBDExtentArray *ea,
- uint32_t length, uint32_t flags)
+ uint64_t length, uint32_t flags)
{
assert(ea->can_add);
if (!length) {
return 0;
}
+ if (!ea->extended) {
+ assert(length <= UINT32_MAX);
+ }
/* Extend previous extent if flags are the same */
if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
- uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
+ uint64_t sum = length + ea->extents[ea->count - 1].length;
- if (sum <= UINT32_MAX) {
+ /*
+ * sum cannot overflow: the block layer bounds image size at
+ * 2^63, and ea->extents[].length comes from the block layer.
+ */
+ assert(sum >= length);
+ if (sum <= UINT32_MAX || ea->extended) {
ea->extents[ea->count - 1].length = sum;
ea->total_length += length;
return 0;
}
ea->total_length += length;
- ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
+ ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
ea->count++;
return 0;
bool last, uint32_t context_id, Error **errp)
{
NBDReply hdr;
- NBDStructuredMeta chunk;
- struct iovec iov[] = {
- {.iov_base = &hdr},
- {.iov_base = &chunk, .iov_len = sizeof(chunk)},
- {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
- };
+ NBDStructuredMeta meta;
+ NBDExtendedMeta meta_ext;
+ g_autofree NBDExtent32 *extents = NULL;
+ uint16_t type;
+ struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
+
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+
+ iov[1].iov_base = &meta_ext;
+ iov[1].iov_len = sizeof(meta_ext);
+ stl_be_p(&meta_ext.context_id, context_id);
+ stl_be_p(&meta_ext.count, ea->count);
+
+ nbd_extent_array_convert_to_be(ea);
+ iov[2].iov_base = ea->extents;
+ iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
+ } else {
+ type = NBD_REPLY_TYPE_BLOCK_STATUS;
- nbd_extent_array_convert_to_be(ea);
+ iov[1].iov_base = &meta;
+ iov[1].iov_len = sizeof(meta);
+ stl_be_p(&meta.context_id, context_id);
+
+ extents = nbd_extent_array_convert_to_narrow(ea);
+ iov[2].iov_base = extents;
+ iov[2].iov_len = ea->count * sizeof(extents[0]);
+ }
trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
ea->total_length, last);
- set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0,
- NBD_REPLY_TYPE_BLOCK_STATUS, request);
- stl_be_p(&chunk.context_id, context_id);
+ set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
+ request);
return nbd_co_send_iov(client, iov, 3, errp);
}
static int
coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
BlockBackend *blk, uint64_t offset,
- uint32_t length, bool dont_fragment,
+ uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
int ret;
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
- g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+ g_autoptr(NBDExtentArray) ea =
+ nbd_extent_array_new(nb_extents, client->mode);
if (context_id == NBD_META_ID_BASE_ALLOCATION) {
ret = blockstatus_to_extents(blk, offset, length, ea);
int64_t start, dirty_start, dirty_count;
int64_t end = offset + length;
bool full = false;
+ int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
bdrv_dirty_bitmap_lock(bitmap);
for (start = offset;
- bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
+ bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
&dirty_start, &dirty_count);
start = dirty_start + dirty_count)
{
NBDRequest *request,
BdrvDirtyBitmap *bitmap,
uint64_t offset,
- uint32_t length, bool dont_fragment,
+ uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
- g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+ g_autoptr(NBDExtentArray) ea =
+ nbd_extent_array_new(nb_extents, client->mode);
bitmap_to_extents(bitmap, offset, length, ea);
return nbd_co_send_extents(client, request, ea, last, context_id, errp);
}
+/*
+ * nbd_co_block_status_payload_read
+ * Called when a client wants a subset of negotiated contexts via a
+ * BLOCK_STATUS payload. Check the payload for valid length and
+ * contents. On success, return 0 with request updated to effective
+ * length. If request was invalid but all payload consumed, return 0
+ * with request->len and request->contexts->count set to 0 (which will
+ * trigger an appropriate NBD_EINVAL response later on). Return
+ * negative errno if the payload was not fully consumed.
+ */
+static int
+nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
+ Error **errp)
+{
+ uint64_t payload_len = request->len;
+ g_autofree char *buf = NULL;
+ size_t count, i, nr_bitmaps;
+ uint32_t id;
+
+ if (payload_len > NBD_MAX_BUFFER_SIZE) {
+ error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+ request->len, NBD_MAX_BUFFER_SIZE);
+ return -EINVAL;
+ }
+
+ assert(client->contexts.exp == client->exp);
+ nr_bitmaps = client->exp->nr_export_bitmaps;
+ request->contexts = g_new0(NBDMetaContexts, 1);
+ request->contexts->exp = client->exp;
+
+ if (payload_len % sizeof(uint32_t) ||
+ payload_len < sizeof(NBDBlockStatusPayload) ||
+ payload_len > (sizeof(NBDBlockStatusPayload) +
+ sizeof(id) * client->contexts.count)) {
+ goto skip;
+ }
+
+ buf = g_malloc(payload_len);
+ if (nbd_read(client->ioc, buf, payload_len,
+ "CMD_BLOCK_STATUS data", errp) < 0) {
+ return -EIO;
+ }
+ trace_nbd_co_receive_request_payload_received(request->cookie,
+ payload_len);
+ request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
+ count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
+ payload_len = 0;
+
+ for (i = 0; i < count; i++) {
+ id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
+ if (id == NBD_META_ID_BASE_ALLOCATION) {
+ if (!client->contexts.base_allocation ||
+ request->contexts->base_allocation) {
+ goto skip;
+ }
+ request->contexts->base_allocation = true;
+ } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
+ if (!client->contexts.allocation_depth ||
+ request->contexts->allocation_depth) {
+ goto skip;
+ }
+ request->contexts->allocation_depth = true;
+ } else {
+ unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
+
+ if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
+ request->contexts->bitmaps[idx]) {
+ goto skip;
+ }
+ request->contexts->bitmaps[idx] = true;
+ }
+ }
+
+ request->len = ldq_be_p(buf);
+ request->contexts->count = count;
+ return 0;
+
+ skip:
+ trace_nbd_co_receive_block_status_payload_compliance(request->from,
+ request->len);
+ request->len = request->contexts->count = 0;
+ return nbd_drop(client->ioc, payload_len, errp);
+}
+
/* nbd_co_receive_request
* Collect a client request. Return 0 if request looks valid, -EIO to drop
* connection right away, -EAGAIN to indicate we were interrupted and the
* to the client (although the caller may still need to disconnect after
* reporting the error).
*/
-static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
+static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
+ NBDRequest *request,
Error **errp)
{
NBDClient *client = req->client;
- int valid_flags;
+ bool extended_with_payload;
+ bool check_length = false;
+ bool check_rofs = false;
+ bool allocate_buffer = false;
+ bool payload_okay = false;
+ uint64_t payload_len = 0;
+ int valid_flags = NBD_CMD_FLAG_FUA;
int ret;
g_assert(qemu_in_coroutine());
trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
nbd_cmd_lookup(request->type));
-
- if (request->type != NBD_CMD_WRITE) {
- /* No payload, we are ready to read the next request. */
- req->complete = true;
+ extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
+ request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
+ if (extended_with_payload) {
+ payload_len = request->len;
+ check_length = true;
}
- if (request->type == NBD_CMD_DISC) {
+ switch (request->type) {
+ case NBD_CMD_DISC:
/* Special case: we're going to disconnect without a reply,
* whether or not flags, from, or len are bogus */
+ req->complete = true;
return -EIO;
- }
- if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
- request->type == NBD_CMD_CACHE)
- {
- if (request->len > NBD_MAX_BUFFER_SIZE) {
- error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
- request->len, NBD_MAX_BUFFER_SIZE);
- return -EINVAL;
+ case NBD_CMD_READ:
+ if (client->mode >= NBD_MODE_STRUCTURED) {
+ valid_flags |= NBD_CMD_FLAG_DF;
+ }
+ check_length = true;
+ allocate_buffer = true;
+ break;
+
+ case NBD_CMD_WRITE:
+ if (client->mode >= NBD_MODE_EXTENDED) {
+ if (!extended_with_payload) {
+ /* The client is noncompliant. Trace it, but proceed. */
+ trace_nbd_co_receive_ext_payload_compliance(request->from,
+ request->len);
+ }
+ valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
}
+ payload_okay = true;
+ payload_len = request->len;
+ check_length = true;
+ allocate_buffer = true;
+ check_rofs = true;
+ break;
+
+ case NBD_CMD_FLUSH:
+ break;
+
+ case NBD_CMD_TRIM:
+ check_rofs = true;
+ break;
+
+ case NBD_CMD_CACHE:
+ check_length = true;
+ break;
- if (request->type != NBD_CMD_CACHE) {
- req->data = blk_try_blockalign(client->exp->common.blk,
- request->len);
- if (req->data == NULL) {
- error_setg(errp, "No memory");
- return -ENOMEM;
+ case NBD_CMD_WRITE_ZEROES:
+ valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
+ check_rofs = true;
+ break;
+
+ case NBD_CMD_BLOCK_STATUS:
+ if (extended_with_payload) {
+ ret = nbd_co_block_status_payload_read(client, request, errp);
+ if (ret < 0) {
+ return ret;
}
+ /* payload now consumed */
+ check_length = false;
+ payload_len = 0;
+ valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+ } else {
+ request->contexts = &client->contexts;
}
+ valid_flags |= NBD_CMD_FLAG_REQ_ONE;
+ break;
+
+ default:
+ /* Unrecognized, will fail later */
+ ;
}
- if (request->type == NBD_CMD_WRITE) {
- if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
- errp) < 0)
- {
+ /* Payload and buffer handling. */
+ if (!payload_len) {
+ req->complete = true;
+ }
+ if (check_length && request->len > NBD_MAX_BUFFER_SIZE) {
+ /* READ, WRITE, CACHE */
+ error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+ request->len, NBD_MAX_BUFFER_SIZE);
+ return -EINVAL;
+ }
+ if (payload_len && !payload_okay) {
+ /*
+ * For now, we don't support payloads on other commands; but
+ * we can keep the connection alive by ignoring the payload.
+ * We will fail the command later with NBD_EINVAL for the use
+ * of an unsupported flag (and not for access beyond bounds).
+ */
+ assert(request->type != NBD_CMD_WRITE);
+ request->len = 0;
+ }
+ if (allocate_buffer) {
+ /* READ, WRITE */
+ req->data = blk_try_blockalign(client->exp->common.blk,
+ request->len);
+ if (req->data == NULL) {
+ error_setg(errp, "No memory");
+ return -ENOMEM;
+ }
+ }
+ if (payload_len) {
+ if (payload_okay) {
+ /* WRITE */
+ assert(req->data);
+ ret = nbd_read(client->ioc, req->data, payload_len,
+ "CMD_WRITE data", errp);
+ } else {
+ ret = nbd_drop(client->ioc, payload_len, errp);
+ }
+ if (ret < 0) {
return -EIO;
}
req->complete = true;
-
trace_nbd_co_receive_request_payload_received(request->cookie,
- request->len);
+ payload_len);
}
/* Sanity checks. */
- if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
- (request->type == NBD_CMD_WRITE ||
- request->type == NBD_CMD_WRITE_ZEROES ||
- request->type == NBD_CMD_TRIM)) {
+ if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && check_rofs) {
+ /* WRITE, TRIM, WRITE_ZEROES */
error_setg(errp, "Export is read-only");
return -EROFS;
}
if (request->from > client->exp->size ||
request->len > client->exp->size - request->from) {
- error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
+ error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu64
", Size: %" PRIu64, request->from, request->len,
client->exp->size);
return (request->type == NBD_CMD_WRITE ||
request->len,
client->check_align);
}
- valid_flags = NBD_CMD_FLAG_FUA;
- if (request->type == NBD_CMD_READ && client->structured_reply) {
- valid_flags |= NBD_CMD_FLAG_DF;
- } else if (request->type == NBD_CMD_WRITE_ZEROES) {
- valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
- } else if (request->type == NBD_CMD_BLOCK_STATUS) {
- valid_flags |= NBD_CMD_FLAG_REQ_ONE;
- }
if (request->flags & ~valid_flags) {
error_setg(errp, "unsupported flags for command %s (got 0x%x)",
nbd_cmd_lookup(request->type), request->flags);
const char *error_msg,
Error **errp)
{
- if (client->structured_reply && ret < 0) {
+ if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
+ } else if (client->mode >= NBD_MODE_EXTENDED) {
+ return nbd_co_send_chunk_done(client, request, errp);
} else {
return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
NULL, 0, errp);
NBDExport *exp = client->exp;
assert(request->type == NBD_CMD_READ);
+ assert(request->len <= NBD_MAX_BUFFER_SIZE);
/* XXX: NBD Protocol only documents use of FUA with WRITE */
if (request->flags & NBD_CMD_FLAG_FUA) {
}
}
- if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
- request->len)
+ if (client->mode >= NBD_MODE_STRUCTURED &&
+ !(request->flags & NBD_CMD_FLAG_DF) && request->len)
{
return nbd_co_send_sparse_read(client, request, request->from,
data, request->len, errp);
"reading from file failed", errp);
}
- if (client->structured_reply) {
+ if (client->mode >= NBD_MODE_STRUCTURED) {
if (request->len) {
return nbd_co_send_chunk_read(client, request, request->from, data,
request->len, true, errp);
NBDExport *exp = client->exp;
assert(request->type == NBD_CMD_CACHE);
+ assert(request->len <= NBD_MAX_BUFFER_SIZE);
ret = blk_co_preadv(exp->common.blk, request->from, request->len,
NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
if (request->flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
+ assert(request->len <= NBD_MAX_BUFFER_SIZE);
ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
flags);
return nbd_send_generic_reply(client, request, ret,
"discard failed", errp);
case NBD_CMD_BLOCK_STATUS:
- if (!request->len) {
- return nbd_send_generic_reply(client, request, -EINVAL,
- "need non-zero length", errp);
- }
- if (client->export_meta.count) {
+ assert(request->contexts);
+ assert(client->mode >= NBD_MODE_EXTENDED ||
+ request->len <= UINT32_MAX);
+ if (request->contexts->count) {
bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
- int contexts_remaining = client->export_meta.count;
+ int contexts_remaining = request->contexts->count;
- if (client->export_meta.base_allocation) {
+ if (!request->len) {
+ return nbd_send_generic_reply(client, request, -EINVAL,
+ "need non-zero length", errp);
+ }
+ if (request->contexts->base_allocation) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from,
}
}
- if (client->export_meta.allocation_depth) {
+ if (request->contexts->allocation_depth) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from, request->len,
}
}
+ assert(request->contexts->exp == client->exp);
for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
- if (!client->export_meta.bitmaps[i]) {
+ if (!request->contexts->bitmaps[i]) {
continue;
}
ret = nbd_co_send_bitmap(client, request,
assert(!contexts_remaining);
return 0;
+ } else if (client->contexts.count) {
+ return nbd_send_generic_reply(client, request, -EINVAL,
+ "CMD_BLOCK_STATUS payload not valid",
+ errp);
} else {
return nbd_send_generic_reply(client, request, -EINVAL,
"CMD_BLOCK_STATUS not negotiated",
} else {
ret = nbd_handle_request(client, &request, req->data, &local_err);
}
+ if (request.contexts && request.contexts != &client->contexts) {
+ assert(request.type == NBD_CMD_BLOCK_STATUS);
+ g_free(request.contexts->bitmaps);
+ g_free(request.contexts);
+ }
if (ret < 0) {
error_prepend(&local_err, "Failed to send reply: ");
goto disconnect;
}
- /* We must disconnect after NBD_CMD_WRITE if we did not
- * read the payload.
+ /*
+ * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
+ * payload if we did not read the payload.
*/
if (!req->complete) {
error_setg(&local_err, "Request handling failed in intermediate state");