X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=block.c;h=580cb77a704691da4e0bad77d6c38723af2f0e3f;hb=0d33415a4eafe532f3beef8272811d927236a353;hp=9ad725d2057d59075e3b07594641c28c315b6cf4;hpb=d45a5270d075ea589f0b0ddcf963a5fea1f500ac;p=qmiga%2Fqemu.git diff --git a/block.c b/block.c index 9ad725d205..580cb77a70 100644 --- a/block.c +++ b/block.c @@ -49,12 +49,14 @@ #include "qemu/timer.h" #include "qemu/cutils.h" #include "qemu/id.h" +#include "qemu/range.h" +#include "qemu/rcu.h" #include "block/coroutines.h" #ifdef CONFIG_BSD #include #include -#ifndef __DragonFly__ +#if defined(HAVE_SYS_DISK_H) #include #endif #endif @@ -82,22 +84,20 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, BdrvChildRole child_role, Error **errp); +static bool bdrv_recurse_has_child(BlockDriverState *bs, + BlockDriverState *child); + static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - BdrvChild **child, - Transaction *tran, - Error **errp); +static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, + BdrvChild *child, + Transaction *tran); static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran); static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, - Transaction *set_backings_tran, Error **errp); + Transaction *change_child_tran, Error **errp); static void bdrv_reopen_commit(BDRVReopenState *reopen_state); static void bdrv_reopen_abort(BDRVReopenState *reopen_state); @@ -265,7 +265,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { - return bs->read_only; + return !(bs->open_flags & BDRV_O_RDWR); } int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, @@ -317,7 +317,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, goto fail; } - bs->read_only = true; bs->open_flags &= ~BDRV_O_RDWR; return 0; @@ -400,7 +399,6 @@ BlockDriverState *bdrv_new(void) for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { QLIST_INIT(&bs->op_blockers[i]); } - notifier_with_return_list_init(&bs->before_write_notifiers); qemu_co_mutex_init(&bs->reqs_lock); qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; @@ -408,6 +406,9 @@ BlockDriverState *bdrv_new(void) qemu_co_queue_init(&bs->flush_queue); + qemu_co_mutex_init(&bs->bsc_modify_lock); + bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1); + for (i = 0; i < bdrv_drain_all_count; i++) { bdrv_drained_begin(bs); } @@ -1159,7 +1160,7 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) static char *bdrv_child_get_parent_desc(BdrvChild *c) { BlockDriverState *parent = c->opaque; - return g_strdup(bdrv_get_device_or_node_name(parent)); + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); } static void bdrv_child_cb_drained_begin(BdrvChild *child) @@ -1413,7 +1414,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, return 0; } -static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) { BlockDriverState *bs = c->opaque; @@ -1433,7 +1434,7 @@ const BdrvChildClass child_of_bds = { .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, .set_aio_ctx = bdrv_child_cb_set_aio_ctx, .update_filename = bdrv_child_cb_update_filename, - .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, + .get_parent_aio_context = child_of_bds_get_parent_aio_context, }; AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) @@ -1550,7 +1551,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, } bs->drv = drv; - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); bs->opaque = g_malloc0(drv->instance_size); if (drv->bdrv_file_open) { @@ -1607,16 +1607,26 @@ open_failed: return ret; } -BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, - int flags, Error **errp) +/* + * Create and open a block node. + * + * @options is a QDict of options to pass to the block drivers, or NULL for an + * empty set of options. The reference to the QDict belongs to the block layer + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + */ +BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, + const char *node_name, + QDict *options, int flags, + Error **errp) { BlockDriverState *bs; int ret; bs = bdrv_new(); bs->open_flags = flags; - bs->explicit_options = qdict_new(); - bs->options = qdict_new(); + bs->options = options ?: qdict_new(); + bs->explicit_options = qdict_clone_shallow(bs->options); bs->opaque = NULL; update_options_from_flags(bs->options, flags); @@ -1634,6 +1644,13 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, return bs; } +/* Create and open a block node. */ +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp) +{ + return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); +} + QemuOptsList bdrv_runtime_opts = { .name = "bdrv_common", .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), @@ -1721,6 +1738,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; + bool ro; assert(bs->file == NULL); assert(options != NULL && bs->options != options); @@ -1771,17 +1789,17 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, drv->format_name); - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); + ro = bdrv_is_read_only(bs); - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { + if (!ro && bdrv_is_whitelisted(drv, true)) { ret = bdrv_apply_auto_read_only(bs, NULL, NULL); } else { ret = -ENOTSUP; } if (ret < 0) { error_setg(errp, - !bs->read_only && bdrv_is_whitelisted(drv, true) + !ro && bdrv_is_whitelisted(drv, true) ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); @@ -1793,7 +1811,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, assert(qatomic_read(&bs->copy_on_read) == 0); if (bs->open_flags & BDRV_O_COPY_ON_READ) { - if (!bs->read_only) { + if (!ro) { bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); @@ -2036,27 +2054,38 @@ bool bdrv_is_writable(BlockDriverState *bs) static char *bdrv_child_user_desc(BdrvChild *c) { - if (c->klass->get_parent_desc) { - return c->klass->get_parent_desc(c); - } - - return g_strdup("another user"); + return c->klass->get_parent_desc(c); } +/* + * Check that @a allows everything that @b needs. @a and @b must reference same + * child node. + */ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) { - g_autofree char *user = NULL; - g_autofree char *perm_names = NULL; + const char *child_bs_name; + g_autofree char *a_user = NULL; + g_autofree char *b_user = NULL; + g_autofree char *perms = NULL; + + assert(a->bs); + assert(a->bs == b->bs); if ((b->perm & a->shared_perm) == b->perm) { return true; } - perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); - user = bdrv_child_user_desc(a); - error_setg(errp, "Conflicts with use by %s as '%s', which does not " - "allow '%s' on %s", - user, a->name, perm_names, bdrv_get_node_name(b->bs)); + child_bs_name = bdrv_get_node_name(b->bs); + a_user = bdrv_child_user_desc(a); + b_user = bdrv_child_user_desc(b); + perms = bdrv_perm_names(b->perm & ~a->shared_perm); + + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " + "both required by %s (uses node '%s' as '%s' child) and " + "unshared by %s (uses node '%s' as '%s' child).", + child_bs_name, perms, + b_user, child_bs_name, b->name, + a_user, child_bs_name, a->name); return false; } @@ -2248,12 +2277,14 @@ static TransactionActionDrv bdrv_replace_child_drv = { }; /* - * bdrv_replace_child + * bdrv_replace_child_tran * * Note: real unref of old_bs is done only on commit. + * + * The function doesn't update permissions, caller is responsible for this. */ -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) +static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { @@ -2645,6 +2676,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child, int drain_saldo; assert(!child->frozen); + assert(old_bs != new_bs); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); @@ -2761,6 +2793,14 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { /* * Common part of attaching bdrv child to bs or to blk or to job + * + * Resulting new child is returned through @child. + * At start *@child must be NULL. + * @child is saved to a new entry of @tran, so that *@child could be reverted to + * NULL on abort(). So referenced variable must live at least until transaction + * end. + * + * Function doesn't update permissions, caller is responsible for this. */ static int bdrv_attach_child_common(BlockDriverState *child_bs, const char *child_name, @@ -2776,6 +2816,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, assert(child); assert(*child == NULL); + assert(child_class->get_parent_desc); new_child = g_new(BdrvChild, 1); *new_child = (BdrvChild) { @@ -2835,6 +2876,12 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, return 0; } +/* + * Variable referenced by @child must live at least until transaction end. + * (see bdrv_attach_child_common() doc for details) + * + * Function doesn't update permissions, caller is responsible for this. + */ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, @@ -2849,6 +2896,12 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, assert(parent_bs->drv); + if (bdrv_recurse_has_child(child_bs, parent_bs)) { + error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", + child_bs->node_name, child_name, parent_bs->node_name); + return -EINVAL; + } + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); @@ -2917,12 +2970,15 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, child_role, perm, shared_perm, opaque, &child, tran, errp); if (ret < 0) { - bdrv_unref(child_bs); - return NULL; + goto out; } ret = bdrv_refresh_perms(child_bs, errp); + +out: tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); bdrv_unref(child_bs); return child; @@ -2963,6 +3019,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, out: tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); bdrv_unref(child_bs); @@ -3094,54 +3152,104 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) } /* - * Sets the bs->backing link of a BDS. A new reference is created; callers - * which don't need their own reference any more must call bdrv_unref(). + * Sets the bs->backing or bs->file link of a BDS. A new reference is created; + * callers which don't need their own reference any more must call bdrv_unref(). + * + * Function doesn't update permissions, caller is responsible for this. */ -static int bdrv_set_backing_noperm(BlockDriverState *bs, - BlockDriverState *backing_hd, - Transaction *tran, Error **errp) +static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + bool is_backing, + Transaction *tran, Error **errp) { int ret = 0; - bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && - bdrv_inherits_from_recursive(backing_hd, bs); + bool update_inherits_from = + bdrv_inherits_from_recursive(child_bs, parent_bs); + BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; + BdrvChildRole role; + + if (!parent_bs->drv) { + /* + * Node without drv is an object without a class :/. TODO: finally fix + * qcow2 driver to never clear bs->drv and implement format corruption + * handling in other way. + */ + error_setg(errp, "Node corrupted"); + return -EINVAL; + } - if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) { + if (child && child->frozen) { + error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'", + child->name, parent_bs->node_name, child->bs->node_name); return -EPERM; } - if (bs->backing) { - /* Cannot be frozen, we checked that above */ - bdrv_unset_inherits_from(bs, bs->backing, tran); - bdrv_remove_filter_or_cow_child(bs, tran); + if (is_backing && !parent_bs->drv->is_filter && + !parent_bs->drv->supports_backing) + { + error_setg(errp, "Driver '%s' of node '%s' does not support backing " + "files", parent_bs->drv->format_name, parent_bs->node_name); + return -EINVAL; } - if (!backing_hd) { + if (parent_bs->drv->is_filter) { + role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; + } else if (is_backing) { + role = BDRV_CHILD_COW; + } else { + /* + * We only can use same role as it is in existing child. We don't have + * infrastructure to determine role of file child in generic way + */ + if (!child) { + error_setg(errp, "Cannot set file child to format node without " + "file child"); + return -EINVAL; + } + role = child->role; + } + + if (child) { + bdrv_unset_inherits_from(parent_bs, child, tran); + bdrv_remove_file_or_backing_child(parent_bs, child, tran); + } + + if (!child_bs) { goto out; } - ret = bdrv_attach_child_noperm(bs, backing_hd, "backing", - &child_of_bds, bdrv_backing_role(bs), - &bs->backing, tran, errp); + ret = bdrv_attach_child_noperm(parent_bs, child_bs, + is_backing ? "backing" : "file", + &child_of_bds, role, + is_backing ? &parent_bs->backing : + &parent_bs->file, + tran, errp); if (ret < 0) { return ret; } /* - * If backing_hd was already part of bs's backing chain, and - * inherits_from pointed recursively to bs then let's update it to + * If inherits_from pointed recursively to bs then let's update it to * point directly to bs (else it will become NULL). */ if (update_inherits_from) { - bdrv_set_inherits_from(backing_hd, bs, tran); + bdrv_set_inherits_from(child_bs, parent_bs, tran); } out: - bdrv_refresh_limits(bs, tran, NULL); + bdrv_refresh_limits(parent_bs, tran, NULL); return 0; } +static int bdrv_set_backing_noperm(BlockDriverState *bs, + BlockDriverState *backing_hd, + Transaction *tran, Error **errp) +{ + return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); +} + int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp) { @@ -4019,6 +4127,19 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, NULL, 0, keep_old_opts); } +void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) +{ + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); + } + g_free(bs_queue); + } +} + /* * Reopen multiple BlockDriverStates atomically & transactionally. * @@ -4035,28 +4156,38 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, * * All affected nodes must be drained between bdrv_reopen_queue() and * bdrv_reopen_multiple(). + * + * To be called from the main thread, with all other AioContexts unlocked. */ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) { int ret = -1; BlockReopenQueueEntry *bs_entry, *next; + AioContext *ctx; Transaction *tran = tran_new(); g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bs_queue != NULL); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); ret = bdrv_flush(bs_entry->state.bs); + aio_context_release(ctx); if (ret < 0) { error_setg_errno(errp, -ret, "Error flushing drive"); - goto cleanup; + goto abort; } } QTAILQ_FOREACH(bs_entry, bs_queue, entry) { assert(bs_entry->state.bs->quiesce_counter > 0); + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); + aio_context_release(ctx); if (ret < 0) { goto abort; } @@ -4072,6 +4203,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) refresh_list = bdrv_topological_dfs(refresh_list, found, state->old_backing_bs); } + if (state->old_file_bs) { + refresh_list = bdrv_topological_dfs(refresh_list, found, + state->old_file_bs); + } } /* @@ -4095,7 +4230,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) * to first element. */ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); bdrv_reopen_commit(&bs_entry->state); + aio_context_release(ctx); } tran_commit(tran); @@ -4104,7 +4242,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) BlockDriverState *bs = bs_entry->state.bs; if (bs->drv->bdrv_reopen_commit_post) { + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); bs->drv->bdrv_reopen_commit_post(&bs_entry->state); + aio_context_release(ctx); } } @@ -4115,59 +4256,50 @@ abort: tran_abort(tran); QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { if (bs_entry->prepared) { + ctx = bdrv_get_aio_context(bs_entry->state.bs); + aio_context_acquire(ctx); bdrv_reopen_abort(&bs_entry->state); + aio_context_release(ctx); } - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); } cleanup: - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - g_free(bs_entry); - } - g_free(bs_queue); + bdrv_reopen_queue_free(bs_queue); return ret; } -int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - Error **errp) +int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp) { - int ret; + AioContext *ctx = bdrv_get_aio_context(bs); BlockReopenQueue *queue; - QDict *opts = qdict_new(); - - qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); + int ret; bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts, true); + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } + + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ret = bdrv_reopen_multiple(queue, errp); + + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } bdrv_subtree_drained_end(bs); return ret; } -static bool bdrv_reopen_can_attach(BlockDriverState *parent, - BdrvChild *child, - BlockDriverState *new_child, - Error **errp) +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp) { - AioContext *parent_ctx = bdrv_get_aio_context(parent); - AioContext *child_ctx = bdrv_get_aio_context(new_child); - GSList *ignore; - bool ret; + QDict *opts = qdict_new(); - ignore = g_slist_prepend(NULL, child); - ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); - g_slist_free(ignore); - if (ret) { - return ret; - } + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); - ignore = g_slist_prepend(NULL, child); - ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); - g_slist_free(ignore); - return ret; + return bdrv_reopen(bs, opts, true, errp); } /* @@ -4187,115 +4319,81 @@ static bool bdrv_reopen_can_attach(BlockDriverState *parent, * * Return 0 on success, otherwise return < 0 and set @errp. */ -static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, - Transaction *set_backings_tran, - Error **errp) +static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + bool is_backing, Transaction *tran, + Error **errp) { BlockDriverState *bs = reopen_state->bs; - BlockDriverState *overlay_bs, *below_bs, *new_backing_bs; + BlockDriverState *new_child_bs; + BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) : + child_bs(bs->file); + const char *child_name = is_backing ? "backing" : "file"; QObject *value; const char *str; - value = qdict_get(reopen_state->options, "backing"); + value = qdict_get(reopen_state->options, child_name); if (value == NULL) { return 0; } switch (qobject_type(value)) { case QTYPE_QNULL: - new_backing_bs = NULL; + assert(is_backing); /* The 'file' option does not allow a null value */ + new_child_bs = NULL; break; case QTYPE_QSTRING: str = qstring_get_str(qobject_to(QString, value)); - new_backing_bs = bdrv_lookup_bs(NULL, str, errp); - if (new_backing_bs == NULL) { + new_child_bs = bdrv_lookup_bs(NULL, str, errp); + if (new_child_bs == NULL) { return -EINVAL; - } else if (bdrv_recurse_has_child(new_backing_bs, bs)) { - error_setg(errp, "Making '%s' a backing file of '%s' " - "would create a cycle", str, bs->node_name); + } else if (bdrv_recurse_has_child(new_child_bs, bs)) { + error_setg(errp, "Making '%s' a %s child of '%s' would create a " + "cycle", str, child_name, bs->node_name); return -EINVAL; } break; default: - /* 'backing' does not allow any other data type */ + /* + * The options QDict has been flattened, so 'backing' and 'file' + * do not allow any other data type here. + */ g_assert_not_reached(); } - /* - * Check AioContext compatibility so that the bdrv_set_backing_hd() call in - * bdrv_reopen_commit() won't fail. - */ - if (new_backing_bs) { - if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { - return -EINVAL; - } + if (old_child_bs == new_child_bs) { + return 0; } - /* - * Ensure that @bs can really handle backing files, because we are - * about to give it one (or swap the existing one) - */ - if (bs->drv->is_filter) { - /* Filters always have a file or a backing child */ - if (!bs->backing) { - error_setg(errp, "'%s' is a %s filter node that does not support a " - "backing child", bs->node_name, bs->drv->format_name); - return -EINVAL; + if (old_child_bs) { + if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) { + return 0; } - } else if (!bs->drv->supports_backing) { - error_setg(errp, "Driver '%s' of node '%s' does not support backing " - "files", bs->drv->format_name, bs->node_name); - return -EINVAL; - } - - /* - * Find the "actual" backing file by skipping all links that point - * to an implicit node, if any (e.g. a commit filter node). - * We cannot use any of the bdrv_skip_*() functions here because - * those return the first explicit node, while we are looking for - * its overlay here. - */ - overlay_bs = bs; - for (below_bs = bdrv_filter_or_cow_bs(overlay_bs); - below_bs && below_bs->implicit; - below_bs = bdrv_filter_or_cow_bs(overlay_bs)) - { - overlay_bs = below_bs; - } - - /* If we want to replace the backing file we need some extra checks */ - if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) { - int ret; - /* Check for implicit nodes between bs and its backing file */ - if (bs != overlay_bs) { - error_setg(errp, "Cannot change backing link if '%s' has " - "an implicit backing file", bs->node_name); + if (old_child_bs->implicit) { + error_setg(errp, "Cannot replace implicit %s child of %s", + child_name, bs->node_name); return -EPERM; } + } + + if (bs->drv->is_filter && !old_child_bs) { /* - * Check if the backing link that we want to replace is frozen. - * Note that - * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing, - * because we know that overlay_bs == bs, and that @bs - * either is a filter that uses ->backing or a COW format BDS - * with bs->drv->supports_backing == true. + * Filters always have a file or a backing child, so we are trying to + * change wrong child */ - if (bdrv_is_backing_chain_frozen(overlay_bs, - child_bs(overlay_bs->backing), errp)) - { - return -EPERM; - } - reopen_state->replace_backing_bs = true; - reopen_state->old_backing_bs = bs->backing ? bs->backing->bs : NULL; - ret = bdrv_set_backing_noperm(bs, new_backing_bs, set_backings_tran, - errp); - if (ret < 0) { - return ret; - } + error_setg(errp, "'%s' is a %s filter node that does not support a " + "%s child", bs->node_name, bs->drv->format_name, child_name); + return -EINVAL; } - return 0; + if (is_backing) { + reopen_state->old_backing_bs = old_child_bs; + } else { + reopen_state->old_file_bs = old_child_bs; + } + + return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, + tran, errp); } /* @@ -4317,7 +4415,7 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, */ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, - Transaction *set_backings_tran, Error **errp) + Transaction *change_child_tran, Error **errp) { int ret = -1; int old_flags; @@ -4437,12 +4535,21 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, * either a reference to an existing node (using its node name) * or NULL to simply detach the current backing file. */ - ret = bdrv_reopen_parse_backing(reopen_state, set_backings_tran, errp); + ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, + change_child_tran, errp); if (ret < 0) { goto error; } qdict_del(reopen_state->options, "backing"); + /* Allow changing the 'file' option. In this case NULL is not allowed */ + ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, + change_child_tran, errp); + if (ret < 0) { + goto error; + } + qdict_del(reopen_state->options, "file"); + /* Options that are not handled are only okay if they are unchanged * compared to the old state. It is expected that some options are only * used for the initial open, but not reopen (e.g. filename) */ @@ -4541,24 +4648,24 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) /* set BDS specific flags now */ qobject_unref(bs->explicit_options); qobject_unref(bs->options); + qobject_ref(reopen_state->explicit_options); + qobject_ref(reopen_state->options); bs->explicit_options = reopen_state->explicit_options; bs->options = reopen_state->options; bs->open_flags = reopen_state->flags; - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); bs->detect_zeroes = reopen_state->detect_zeroes; - if (reopen_state->replace_backing_bs) { - qdict_del(bs->explicit_options, "backing"); - qdict_del(bs->options, "backing"); - } - /* Remove child references from bs->options and bs->explicit_options. * Child options were already removed in bdrv_reopen_queue_child() */ QLIST_FOREACH(child, &bs->children, next) { qdict_del(bs->explicit_options, child->name); qdict_del(bs->options, child->name); } + /* backing is probably removed, so it's not handled by previous loop */ + qdict_del(bs->explicit_options, "backing"); + qdict_del(bs->options, "backing"); + bdrv_refresh_limits(bs, NULL, NULL); } @@ -4619,6 +4726,8 @@ static void bdrv_close(BlockDriverState *bs) bs->explicit_options = NULL; qobject_unref(bs->full_open_options); bs->full_open_options = NULL; + g_free(bs->block_status_cache); + bs->block_status_cache = NULL; bdrv_release_named_dirty_bitmaps(bs); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -4750,7 +4859,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) } /* - * We don't have to restore child->bs here to undo bdrv_replace_child() + * We don't have to restore child->bs here to undo bdrv_replace_child_tran() * because that function is transactionable and it registered own completion * entries in @tran, so .abort() for bdrv_replace_child_safe() will be * called automatically. @@ -4771,22 +4880,23 @@ static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { }; /* - * A function to remove backing-chain child of @bs if exists: cow child for - * format nodes (always .backing) and filter child for filters (may be .file or - * .backing) + * A function to remove backing or file child of @bs. + * Function doesn't update permissions, caller is responsible for this. */ -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran) +static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, + BdrvChild *child, + Transaction *tran) { BdrvRemoveFilterOrCowChild *s; - BdrvChild *child = bdrv_filter_or_cow_child(bs); + + assert(child == bs->backing || child == bs->file); if (!child) { return; } if (child->bs) { - bdrv_replace_child(child, NULL, tran); + bdrv_replace_child_tran(child, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); @@ -4804,6 +4914,17 @@ static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, } } +/* + * A function to remove backing-chain child of @bs if exists: cow child for + * format nodes (always .backing) and filter child for filters (may be .file or + * .backing) + */ +static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, + Transaction *tran) +{ + bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran); +} + static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -4826,7 +4947,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - bdrv_replace_child(c, to, tran); + bdrv_replace_child_tran(c, to, tran); } return 0; @@ -4850,7 +4971,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, Transaction *tran = tran_new(); g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; - BlockDriverState *to_cow_parent; + BlockDriverState *to_cow_parent = NULL; int ret; if (detach_subchain) { @@ -4961,6 +5082,37 @@ out: return ret; } +/* Not for empty child */ +int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + Error **errp) +{ + int ret; + Transaction *tran = tran_new(); + g_autoptr(GHashTable) found = NULL; + g_autoptr(GSList) refresh_list = NULL; + BlockDriverState *old_bs = child->bs; + + bdrv_ref(old_bs); + bdrv_drained_begin(old_bs); + bdrv_drained_begin(new_bs); + + bdrv_replace_child_tran(child, new_bs, tran); + + found = g_hash_table_new(NULL, NULL); + refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); + refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); + + ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); + + tran_finalize(tran, ret); + + bdrv_drained_end(old_bs); + bdrv_drained_end(new_bs); + bdrv_unref(old_bs); + + return ret; +} + static void bdrv_delete(BlockDriverState *bs) { assert(bdrv_op_blocker_is_empty(bs)); @@ -4977,29 +5129,61 @@ static void bdrv_delete(BlockDriverState *bs) g_free(bs); } -BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, + +/* + * Replace @bs by newly created block node. + * + * @options is a QDict of options to pass to the block drivers, or NULL for an + * empty set of options. The reference to the QDict belongs to the block layer + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + */ +BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - BlockDriverState *new_node_bs; - Error *local_err = NULL; + ERRP_GUARD(); + int ret; + BlockDriverState *new_node_bs = NULL; + const char *drvname, *node_name; + BlockDriver *drv; - new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp); - if (new_node_bs == NULL) { + drvname = qdict_get_try_str(options, "driver"); + if (!drvname) { + error_setg(errp, "driver is not specified"); + goto fail; + } + + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver: '%s'", drvname); + goto fail; + } + + node_name = qdict_get_try_str(options, "node-name"); + + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, + errp); + options = NULL; /* bdrv_new_open_driver() eats options */ + if (!new_node_bs) { error_prepend(errp, "Could not create node: "); - return NULL; + goto fail; } bdrv_drained_begin(bs); - bdrv_replace_node(bs, new_node_bs, &local_err); + ret = bdrv_replace_node(bs, new_node_bs, errp); bdrv_drained_end(bs); - if (local_err) { - bdrv_unref(new_node_bs); - error_propagate(errp, local_err); - return NULL; + if (ret < 0) { + error_prepend(errp, "Could not replace node: "); + goto fail; } return new_node_bs; + +fail: + qobject_unref(options); + bdrv_unref(new_node_bs); + return NULL; } /* @@ -5032,7 +5216,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs, * -ENOTSUP - format driver doesn't support changing the backing file */ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, - const char *backing_fmt, bool warn) + const char *backing_fmt, bool require) { BlockDriver *drv = bs->drv; int ret; @@ -5046,10 +5230,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, return -EINVAL; } - if (warn && backing_file && !backing_fmt) { - warn_report("Deprecated use of backing file without explicit " - "backing format, use of this image requires " - "potentially unsafe format probing"); + if (require && backing_file && !backing_fmt) { + return -EINVAL; } if (drv->bdrv_change_backing_file != NULL) { @@ -6077,6 +6259,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, void bdrv_init(void) { +#ifdef CONFIG_BDRV_WHITELIST_TOOLS + use_bdrv_whitelist = 1; +#endif module_call_init(MODULE_INIT_BLOCK); } @@ -6200,6 +6385,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) { BdrvChild *child, *parent; int ret; + uint64_t cumulative_perms, cumulative_shared_perms; if (!bs->drv) { return -ENOMEDIUM; @@ -6230,6 +6416,13 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) } } + bdrv_get_cumulative_perm(bs, &cumulative_perms, + &cumulative_shared_perms); + if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) { + /* Our inactive parents still need write access. Inactivation failed. */ + return -EPERM; + } + bs->open_flags |= BDRV_O_INACTIVE; /* @@ -6537,9 +6730,13 @@ void bdrv_img_create(const char *filename, const char *fmt, } assert(full_backing); - /* backing files always opened read-only */ + /* + * No need to do I/O here, which allows us to open encrypted + * backing images without needing the secret + */ back_flags = flags; back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + back_flags |= BDRV_O_NO_IO; backing_options = qdict_new(); if (backing_fmt) { @@ -6555,24 +6752,11 @@ void bdrv_img_create(const char *filename, const char *fmt, goto out; } else { if (!backing_fmt) { - warn_report("Deprecated use of backing file without explicit " - "backing format (detected format of %s)", - bs->drv->format_name); - if (bs->drv != &bdrv_raw) { - /* - * A probe of raw deserves the most attention: - * leaving the backing format out of the image - * will ensure bs->probed is set (ensuring we - * don't accidentally commit into the backing - * file), and allow more spots to warn the users - * to fix their toolchain when opening this image - * later. For other images, we can safely record - * the format that we probed. - */ - backing_fmt = bs->drv->format_name; - qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt, - NULL); - } + error_setg(&local_err, + "Backing file specified without backing format"); + error_append_hint(&local_err, "Detected format of %s.", + bs->drv->format_name); + goto out; } if (size == -1) { /* Opened BS, have no size */ @@ -6589,9 +6773,9 @@ void bdrv_img_create(const char *filename, const char *fmt, } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */ } else if (backing_file && !backing_fmt) { - warn_report("Deprecated use of unopened backing file without " - "explicit backing format, use of this image requires " - "potentially unsafe format probing"); + error_setg(&local_err, + "Backing file specified without backing format"); + goto out; } if (size == -1) { @@ -7574,3 +7758,76 @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) { return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); } + +/** + * Check whether [offset, offset + bytes) overlaps with the cached + * block-status data region. + * + * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`, + * which is what bdrv_bsc_is_data()'s interface needs. + * Otherwise, *pnum is not touched. + */ +static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *pnum) +{ + BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache); + bool overlaps; + + overlaps = + qatomic_read(&bsc->valid) && + ranges_overlap(offset, bytes, bsc->data_start, + bsc->data_end - bsc->data_start); + + if (overlaps && pnum) { + *pnum = bsc->data_end - offset; + } + + return overlaps; +} + +/** + * See block_int.h for this function's documentation. + */ +bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) +{ + RCU_READ_LOCK_GUARD(); + + return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); +} + +/** + * See block_int.h for this function's documentation. + */ +void bdrv_bsc_invalidate_range(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + RCU_READ_LOCK_GUARD(); + + if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { + qatomic_set(&bs->block_status_cache->valid, false); + } +} + +/** + * See block_int.h for this function's documentation. + */ +void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); + BdrvBlockStatusCache *old_bsc; + + *new_bsc = (BdrvBlockStatusCache) { + .valid = true, + .data_start = offset, + .data_end = offset + bytes, + }; + + QEMU_LOCK_GUARD(&bs->bsc_modify_lock); + + old_bsc = qatomic_rcu_read(&bs->block_status_cache); + qatomic_rcu_set(&bs->block_status_cache, new_bsc); + if (old_bsc) { + g_free_rcu(old_bsc, rcu); + } +}