X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=block.c;h=0dd604d0f6a8d18ab7db0130638dbcb6ec9ffeff;hb=4f81baa33ed645fc17a9908236630b8154502ae5;hp=3bd594eb2aedfb40fc395b1f176db60c007eafae;hpb=dce4fd0c0192c35b15b16b2199b719022b901a8c;p=qmiga%2Fqemu.git diff --git a/block.c b/block.c index 3bd594eb2a..0dd604d0f6 100644 --- a/block.c +++ b/block.c @@ -27,6 +27,7 @@ #include "block/trace.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "block/dirty-bitmap.h" #include "block/fuse.h" #include "block/nbd.h" #include "block/qdict.h" @@ -93,8 +94,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); static void bdrv_remove_child(BdrvChild *child, Transaction *tran); -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran); static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -278,8 +277,8 @@ bool bdrv_is_read_only(BlockDriverState *bs) return !(bs->open_flags & BDRV_O_RDWR); } -int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, - bool ignore_allow_rdw, Error **errp) +static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp) { IO_CODE(); @@ -464,12 +463,18 @@ BlockDriver *bdrv_find_format(const char *format_name) /* The driver isn't registered, maybe we need to load a module */ for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { if (!strcmp(block_driver_modules[i].format_name, format_name)) { - block_module_load_one(block_driver_modules[i].library_name); + Error *local_err = NULL; + int rv = block_module_load(block_driver_modules[i].library_name, + &local_err); + if (rv > 0) { + return bdrv_do_find_format(format_name); + } else if (rv < 0) { + error_report_err(local_err); + } break; } } - - return bdrv_do_find_format(format_name); + return NULL; } static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) @@ -522,65 +527,25 @@ typedef struct CreateCo { Error *err; } CreateCo; -static void coroutine_fn bdrv_create_co_entry(void *opaque) +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp) { - Error *local_err = NULL; int ret; - - CreateCo *cco = opaque; - assert(cco->drv); - GLOBAL_STATE_CODE(); - - ret = cco->drv->bdrv_co_create_opts(cco->drv, - cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; -} - -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp) -{ - int ret; - GLOBAL_STATE_CODE(); - - Coroutine *co; - CreateCo cco = { - .drv = drv, - .filename = g_strdup(filename), - .opts = opts, - .ret = NOT_DONE, - .err = NULL, - }; + ERRP_GUARD(); + assert_bdrv_graph_readable(); if (!drv->bdrv_co_create_opts) { - error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); - ret = -ENOTSUP; - goto out; + error_setg(errp, "Driver '%s' does not support image creation", + drv->format_name); + return -ENOTSUP; } - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_create_co_entry(&cco); - } else { - co = qemu_coroutine_create(bdrv_create_co_entry, &cco); - qemu_coroutine_enter(co); - while (cco.ret == NOT_DONE) { - aio_poll(qemu_get_aio_context(), true); - } + ret = drv->bdrv_co_create_opts(drv, filename, opts, errp); + if (ret < 0 && !*errp) { + error_setg_errno(errp, -ret, "Could not create image"); } - ret = cco.ret; - if (ret < 0) { - if (cco.err) { - error_propagate(errp, cco.err); - } else { - error_setg_errno(errp, -ret, "Could not create image"); - } - } - -out: - g_free(cco.filename); return ret; } @@ -693,8 +658,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, options = qdict_new(); qdict_put_str(options, "driver", drv->format_name); - blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_RESIZE, errp); + blk = blk_co_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_RESIZE, errp); if (!blk) { error_prepend(errp, "Protocol driver '%s' does not support image " "creation, and opening the image failed: ", @@ -719,7 +684,8 @@ out: return ret; } -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp) { QemuOpts *protocol_opts; BlockDriver *drv; @@ -760,7 +726,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) goto out; } - ret = bdrv_create(drv, filename, protocol_opts, errp); + ret = bdrv_co_create(drv, filename, protocol_opts, errp); out: qemu_opts_del(protocol_opts); qobject_unref(qdict); @@ -774,6 +740,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) IO_CODE(); assert(bs != NULL); + assert_bdrv_graph_readable(); if (!bs->drv) { error_setg(errp, "Block node '%s' is not opened", bs->filename); @@ -981,12 +948,16 @@ BlockDriver *bdrv_find_protocol(const char *filename, for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) { if (block_driver_modules[i].protocol_name && !strcmp(block_driver_modules[i].protocol_name, protocol)) { - block_module_load_one(block_driver_modules[i].library_name); + int rv = block_module_load(block_driver_modules[i].library_name, errp); + if (rv > 0) { + drv1 = bdrv_do_find_protocol(protocol); + } else if (rv < 0) { + return NULL; + } break; } } - drv1 = bdrv_do_find_protocol(protocol); if (!drv1) { error_setg(errp, "Unknown protocol '%s'", protocol); } @@ -1066,22 +1037,24 @@ static int find_image_format(BlockBackend *file, const char *filename, * Set the current 'total_sectors' value * Return 0 on success, -errno on error. */ -int refresh_total_sectors(BlockDriverState *bs, int64_t hint) +int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs, + int64_t hint) { BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); if (!drv) { return -ENOMEDIUM; } - /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ + /* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */ if (bdrv_is_sg(bs)) return 0; /* query actual device if possible, otherwise just trust the hint */ - if (drv->bdrv_getlength) { - int64_t length = drv->bdrv_getlength(bs); + if (drv->bdrv_co_getlength) { + int64_t length = drv->bdrv_co_getlength(bs); if (length < 0) { return length; } @@ -1218,20 +1191,19 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_do_drained_begin_quiesce(bs, NULL, false); + bdrv_do_drained_begin_quiesce(bs, NULL); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; - return bdrv_drain_poll(bs, false, NULL, false); + return bdrv_drain_poll(bs, NULL, false); } -static void bdrv_child_cb_drained_end(BdrvChild *child, - int *drained_end_counter) +static void bdrv_child_cb_drained_end(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_drained_end_no_poll(bs, drained_end_counter); + bdrv_drained_end(bs); } static int bdrv_child_cb_inactivate(BdrvChild *child) @@ -1435,11 +1407,11 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, *child_flags = flags; } -static void bdrv_child_cb_attach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_INSERT_HEAD(&bs->children, child, next); if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { /* @@ -1475,11 +1447,9 @@ static void bdrv_child_cb_attach(BdrvChild *child) assert(!bs->file); bs->file = child; } - - bdrv_apply_subtree_drain(child, bs); } -static void bdrv_child_cb_detach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -1487,9 +1457,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } - bdrv_unapply_subtree_drain(child, bs); - - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_REMOVE(child, next); if (child == bs->backing) { assert(child != bs->file); @@ -1533,7 +1501,7 @@ const BdrvChildClass child_of_bds = { AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) { - GLOBAL_STATE_CODE(); + IO_CODE(); return c->klass->get_parent_aio_context(c); } @@ -1637,6 +1605,11 @@ out: g_free(gen_node_name); } +/* + * The caller must always hold @bs AioContext lock, because this function calls + * bdrv_refresh_total_sectors() which polls when called from non-coroutine + * context. + */ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, QDict *options, int open_flags, Error **errp) @@ -1688,7 +1661,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; - ret = refresh_total_sectors(bs, bs->total_sectors); + ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { error_setg_errno(errp, -ret, "Could not refresh total sector count"); return ret; @@ -1705,8 +1678,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, assert(is_power_of_2(bs->bl.request_alignment)); for (i = 0; i < bs->quiesce_counter; i++) { - if (drv->bdrv_co_drain_begin) { - drv->bdrv_co_drain_begin(bs); + if (drv->bdrv_drain_begin) { + drv->bdrv_drain_begin(bs); } } @@ -2404,6 +2377,20 @@ static void bdrv_replace_child_abort(void *opaque) GLOBAL_STATE_CODE(); /* old_bs reference is transparently moved from @s to @s->child */ + if (!s->child->bs) { + /* + * The parents were undrained when removing old_bs from the child. New + * requests can't have been made, though, because the child was empty. + * + * TODO Make bdrv_replace_child_noperm() transactionable to avoid + * undraining the parent in the first place. Once this is done, having + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ + bdrv_parent_drained_begin_single(s->child); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2419,12 +2406,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * Note: real unref of old_bs is done only on commit. * + * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be + * kept drained until the transaction is completed. + * * The function doesn't update permissions, caller is responsible for this. */ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); + + assert(child->quiesced_parent); + assert(!new_bs || new_bs->quiesce_counter); + *s = (BdrvReplaceChildState) { .child = child, .old_bs = child->bs, @@ -2513,8 +2507,12 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, return 0; } -static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, - Transaction *tran, Error **errp) +/* + * @list is a product of bdrv_topological_dfs() (may be called several times) - + * a topologically sorted subgraph. + */ +static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) { int ret; BlockDriverState *bs; @@ -2536,6 +2534,24 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, return 0; } +/* + * @list is any list of nodes. List is completed by all subtrees and + * topologically sorted. It's not a problem if some node occurs in the @list + * several times. + */ +static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) +{ + g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL); + g_autoptr(GSList) refresh_list = NULL; + + for ( ; list; list = list->next) { + refresh_list = bdrv_topological_dfs(refresh_list, found, list->data); + } + + return bdrv_do_refresh_perms(refresh_list, q, tran, errp); +} + void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t *shared_perm) { @@ -2583,15 +2599,24 @@ char *bdrv_perm_names(uint64_t perm) } -static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) +/* @tran is allowed to be NULL. In this case no rollback is possible */ +static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, + Error **errp) { int ret; - Transaction *tran = tran_new(); + Transaction *local_tran = NULL; g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); GLOBAL_STATE_CODE(); - ret = bdrv_list_refresh_perms(list, NULL, tran, errp); - tran_finalize(tran, ret); + if (!tran) { + tran = local_tran = tran_new(); + } + + ret = bdrv_do_refresh_perms(list, NULL, tran, errp); + + if (local_tran) { + tran_finalize(local_tran, ret); + } return ret; } @@ -2607,7 +2632,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, bdrv_child_set_perm(c, perm, shared, tran); - ret = bdrv_refresh_perms(c->bs, &local_err); + ret = bdrv_refresh_perms(c->bs, tran, &local_err); tran_finalize(tran, ret); @@ -2816,14 +2841,41 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } +/* + * Replaces the node that a BdrvChild points to without updating permissions. + * + * If @new_bs is non-NULL, the parent of @child must already be drained through + * @child. + */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; - int drain_saldo; assert(!child->frozen); + + /* + * If we want to change the BdrvChild to point to a drained node as its new + * child->bs, we need to make sure that its new parent is drained, too. In + * other words, either child->quiesce_parent must already be true or we must + * be able to set it and keep the parent's quiesce_counter consistent with + * that, but without polling or starting new requests (this function + * guarantees that it doesn't poll, and starting new requests would be + * against the invariants of drain sections). + * + * To keep things simple, we pick the first option (child->quiesce_parent + * must already be true). We also generalise the rule a bit to make it + * easier to verify in callers and more likely to be covered in test cases: + * The parent must be quiesced through this child even if new_bs isn't + * currently drained. + * + * The only exception is for callers that always pass new_bs == NULL. In + * this case, we obviously never need to consider the case of a drained + * new_bs, so we can keep the callers simpler by allowing them not to drain + * the parent. + */ + assert(!new_bs || child->quiesced_parent); assert(old_bs != new_bs); GLOBAL_STATE_CODE(); @@ -2831,59 +2883,33 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; - - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ - while (drain_saldo > 0 && child->klass->drained_begin) { - bdrv_parent_drained_begin_single(child, true); - drain_saldo--; - } - + /* TODO Pull this up into the callers to avoid polling here */ + bdrv_graph_wrlock(); if (old_bs) { - /* Detach first so that the recursive drain sections coming from @child - * are already gone and we only end the drain sections that came from - * elsewhere. */ if (child->klass->detach) { child->klass->detach(child); } - assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } child->bs = new_bs; if (new_bs) { - assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* - * Detaching the old node may have led to the new node's - * quiesce_counter having been decreased. Not a problem, we - * just need to recognize this here and then invoke - * drained_end appropriately more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - - /* Attach only after starting new drained sections, so that recursive - * drain sections coming from @child don't get an extra .drained_begin - * callback. */ if (child->klass->attach) { child->klass->attach(child); } } + bdrv_graph_wrunlock(); /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. + * If the parent was drained through this BdrvChild previously, but new_bs + * is not drained, allow requests to come in only after the new node has + * been attached. */ - while (drain_saldo < 0 && child->klass->drained_end) { + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { bdrv_parent_drained_end_single(child); - drain_saldo++; } } @@ -3016,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); + /* + * Let every new BdrvChild start with a drained parent. Inserting the child + * in the graph with bdrv_replace_child_noperm() will undrain it if + * @child_bs is not drained. + * + * The child was only just created and is not yet visible in global state + * until bdrv_replace_child_noperm() inserts it into the graph, so nobody + * could have sent requests and polling is not necessary. + * + * Note that this means that the parent isn't fully drained yet, we only + * stop new requests from coming in. This is fine, we don't care about the + * old requests here, they are not for this child. If another place enters a + * drain section for the same parent, but wants it to be fully quiesced, it + * will not run most of the the code in .drained_begin() again (which is not + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ + bdrv_parent_drained_begin_single(new_child); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); @@ -3060,30 +3104,6 @@ static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, tran, errp); } -static void bdrv_detach_child(BdrvChild *child) -{ - BlockDriverState *old_bs = child->bs; - - GLOBAL_STATE_CODE(); - bdrv_replace_child_noperm(child, NULL); - bdrv_child_free(child); - - if (old_bs) { - /* - * Update permissions for old node. We're just taking a parent away, so - * we're loosening restrictions. Errors of permission update are not - * fatal in this case, ignore them. - */ - bdrv_refresh_perms(old_bs, NULL); - - /* - * When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext - */ - bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL); - } -} - /* * This function steals the reference to child_bs from the caller. * That reference is later dropped by bdrv_root_unref_child(). @@ -3115,7 +3135,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, goto out; } - ret = bdrv_refresh_perms(child_bs, errp); + ret = bdrv_refresh_perms(child_bs, tran, errp); out: tran_finalize(tran, ret); @@ -3156,7 +3176,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, goto out; } - ret = bdrv_refresh_perms(parent_bs, errp); + ret = bdrv_refresh_perms(parent_bs, tran, errp); if (ret < 0) { goto out; } @@ -3172,12 +3192,28 @@ out: /* Callers must ensure that child->frozen is false. */ void bdrv_root_unref_child(BdrvChild *child) { - BlockDriverState *child_bs; + BlockDriverState *child_bs = child->bs; GLOBAL_STATE_CODE(); + bdrv_replace_child_noperm(child, NULL); + bdrv_child_free(child); + + if (child_bs) { + /* + * Update permissions for old node. We're just taking a parent away, so + * we're loosening restrictions. Errors of permission update are not + * fatal in this case, ignore them. + */ + bdrv_refresh_perms(child_bs, NULL, NULL); + + /* + * When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext + */ + bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, + NULL); + } - child_bs = child->bs; - bdrv_detach_child(child); bdrv_unref(child_bs); } @@ -3396,24 +3432,35 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); } -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp) +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp) { int ret; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); if (ret < 0) { goto out; } - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); + return ret; +} + +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) +{ + int ret; + GLOBAL_STATE_CODE(); + bdrv_drained_begin(bs); + ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); bdrv_drained_end(bs); return ret; @@ -3758,14 +3805,16 @@ out: * The reference parameter may be used to specify an existing block device which * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). + * + * The caller must always hold @filename AioContext lock, because this + * function eventually calls bdrv_refresh_total_sectors() which polls + * when called from non-coroutine context. */ -static BlockDriverState *bdrv_open_inherit(const char *filename, - const char *reference, - QDict *options, int flags, - BlockDriverState *parent, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - Error **errp) +static BlockDriverState * no_coroutine_fn +bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + int flags, BlockDriverState *parent, + const BdrvChildClass *child_class, BdrvChildRole child_role, + Error **errp) { int ret; BlockBackend *file = NULL; @@ -3781,6 +3830,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, assert(!child_class || !flags); assert(!child_class == !parent); GLOBAL_STATE_CODE(); + assert(!qemu_in_coroutine()); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -4046,6 +4096,11 @@ close_and_fail: return NULL; } +/* + * The caller must always hold @filename AioContext lock, because this + * function eventually calls bdrv_refresh_total_sectors() which polls + * when called from non-coroutine context. + */ BlockDriverState *bdrv_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp) { @@ -4143,7 +4198,9 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + * bs is drained here and undrained by bdrv_reopen_queue_free(). + * + * To be called with bs->aio_context locked. */ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -4163,12 +4220,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, int flags; QemuOpts *opts; - /* Make sure that the caller remembered to use a drained section. This is - * important to avoid graph changes between the recursive queuing here and - * bdrv_reopen_multiple(). */ - assert(bs->quiesce_counter > 0); GLOBAL_STATE_CODE(); + bdrv_drained_begin(bs); + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QTAILQ_INIT(bs_queue); @@ -4302,6 +4357,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, return bs_queue; } +/* To be called with bs->aio_context locked */ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) @@ -4318,6 +4374,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); + + aio_context_acquire(ctx); + bdrv_drained_end(bs_entry->state.bs); + aio_context_release(ctx); + qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); g_free(bs_entry); @@ -4351,7 +4413,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) BlockReopenQueueEntry *bs_entry, *next; AioContext *ctx; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; assert(qemu_get_current_aio_context() == qemu_get_aio_context()); @@ -4381,18 +4442,15 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) bs_entry->prepared = true; } - found = g_hash_table_new(NULL, NULL); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { BDRVReopenState *state = &bs_entry->state; - refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); + refresh_list = g_slist_prepend(refresh_list, state->bs); if (state->old_backing_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_backing_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs); } if (state->old_file_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_file_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_file_bs); } } @@ -4465,18 +4523,16 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, GLOBAL_STATE_CODE(); - bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + if (ctx != qemu_get_aio_context()) { aio_context_release(ctx); } - - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ret = bdrv_reopen_multiple(queue, errp); if (ctx != qemu_get_aio_context()) { aio_context_acquire(ctx); } - bdrv_subtree_drained_end(bs); return ret; } @@ -5057,23 +5113,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) } if (child->bs) { + BlockDriverState *bs = child->bs; + bdrv_drained_begin(bs); bdrv_replace_child_tran(child, NULL, tran); + bdrv_drained_end(bs); } tran_add(tran, &bdrv_remove_child_drv, child); } -/* - * A function to remove backing-chain child of @bs if exists: cow child for - * format nodes (always .backing) and filter child for filters (may be .file or - * .backing) - */ -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran) +static void undrain_on_clean_cb(void *opaque) { - bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); + bdrv_drained_end(opaque); } +static TransactionActionDrv undrain_on_clean = { + .clean = undrain_on_clean_cb, +}; + static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -5083,6 +5140,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, GLOBAL_STATE_CODE(); + bdrv_drained_begin(from); + bdrv_drained_begin(to); + tran_add(tran, &undrain_on_clean, from); + tran_add(tran, &undrain_on_clean, to); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { @@ -5120,7 +5182,6 @@ static int bdrv_replace_node_common(BlockDriverState *from, Error **errp) { Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *to_cow_parent = NULL; int ret; @@ -5158,13 +5219,11 @@ static int bdrv_replace_node_common(BlockDriverState *from, } if (detach_subchain) { - bdrv_remove_filter_or_cow_child(to_cow_parent, tran); + bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } - found = g_hash_table_new(NULL, NULL); - - refresh_list = bdrv_topological_dfs(refresh_list, found, to); - refresh_list = bdrv_topological_dfs(refresh_list, found, from); + refresh_list = g_slist_prepend(refresh_list, to); + refresh_list = g_slist_prepend(refresh_list, from); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); if (ret < 0) { @@ -5209,6 +5268,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) * child. * * This function does not create any image files. + * + * The caller must hold the AioContext lock for @bs_top. */ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, Error **errp) @@ -5216,11 +5277,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, int ret; BdrvChild *child; Transaction *tran = tran_new(); + AioContext *old_context, *new_context = NULL; GLOBAL_STATE_CODE(); assert(!bs_new->backing); + old_context = bdrv_get_aio_context(bs_top); + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", &child_of_bds, bdrv_backing_role(bs_new), tran, errp); @@ -5229,17 +5293,35 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, goto out; } + /* + * bdrv_attach_child_noperm could change the AioContext of bs_top. + * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily + * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE + * that assumes the new lock is taken. + */ + new_context = bdrv_get_aio_context(bs_top); + + if (old_context != new_context) { + aio_context_release(old_context); + aio_context_acquire(new_context); + } + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); if (ret < 0) { goto out; } - ret = bdrv_refresh_perms(bs_new, errp); + ret = bdrv_refresh_perms(bs_new, tran, errp); out: tran_finalize(tran, ret); bdrv_refresh_limits(bs_top, NULL, NULL); + if (new_context && old_context != new_context) { + aio_context_release(new_context); + aio_context_acquire(old_context); + } + return ret; } @@ -5249,7 +5331,6 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, { int ret; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *old_bs = child->bs; @@ -5261,9 +5342,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_replace_child_tran(child, new_bs, tran); - found = g_hash_table_new(NULL, NULL); - refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); - refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); + refresh_list = g_slist_prepend(refresh_list, old_bs); + refresh_list = g_slist_prepend(refresh_list, new_bs); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); @@ -5363,6 +5443,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { IO_CODE(); + assert_bdrv_graph_readable(); if (bs->drv == NULL) { return -ENOMEDIUM; } @@ -5585,7 +5666,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, GLOBAL_STATE_CODE(); bdrv_ref(top); - bdrv_subtree_drained_begin(top); + bdrv_drained_begin(base); if (!top->drv || !base->drv) { goto exit; @@ -5658,13 +5739,13 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, ret = 0; exit: - bdrv_subtree_drained_end(top); + bdrv_drained_end(base); bdrv_unref(top); return ret; } /** - * Implementation of BlockDriver.bdrv_get_allocated_file_size() that + * Implementation of BlockDriver.bdrv_co_get_allocated_file_size() that * sums the size of all data-bearing children. (This excludes backing * children.) */ @@ -5677,7 +5758,7 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)) { - child_size = bdrv_get_allocated_file_size(child->bs); + child_size = bdrv_co_get_allocated_file_size(child->bs); if (child_size < 0) { return child_size; } @@ -5692,7 +5773,7 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) * Length of a allocated file in bytes. Sparse files are counted by actual * allocated space. Return < 0 if error or unknown. */ -int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) +int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs) { BlockDriver *drv = bs->drv; IO_CODE(); @@ -5700,8 +5781,8 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) if (!drv) { return -ENOMEDIUM; } - if (drv->bdrv_get_allocated_file_size) { - return drv->bdrv_get_allocated_file_size(bs); + if (drv->bdrv_co_get_allocated_file_size) { + return drv->bdrv_co_get_allocated_file_size(bs); } if (drv->bdrv_file_open) { @@ -5713,7 +5794,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) return -ENOTSUP; } else if (drv->is_filter) { /* Filter drivers default to the size of their filtered child */ - return bdrv_get_allocated_file_size(bdrv_filter_bs(bs)); + return bdrv_co_get_allocated_file_size(bdrv_filter_bs(bs)); } else { /* Other drivers default to summing their children's sizes */ return bdrv_sum_allocated_file_size(bs); @@ -5759,16 +5840,17 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, /** * Return number of sectors on success, -errno on error. */ -int64_t bdrv_nb_sectors(BlockDriverState *bs) +int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs) { BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); if (!drv) return -ENOMEDIUM; if (drv->has_variable_length) { - int ret = refresh_total_sectors(bs, bs->total_sectors); + int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { return ret; } @@ -5780,11 +5862,13 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs) * Return length in bytes on success, -errno on error. * The length is always a multiple of BDRV_SECTOR_SIZE. */ -int64_t bdrv_getlength(BlockDriverState *bs) +int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs) { - int64_t ret = bdrv_nb_sectors(bs); + int64_t ret; IO_CODE(); + assert_bdrv_graph_readable(); + ret = bdrv_co_nb_sectors(bs); if (ret < 0) { return ret; } @@ -6244,7 +6328,7 @@ void bdrv_get_backing_filename(BlockDriverState *bs, pstrcpy(filename, filename_size, bs->backing_file); } -int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { int ret; BlockDriver *drv = bs->drv; @@ -6253,15 +6337,15 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) if (!drv) { return -ENOMEDIUM; } - if (!drv->bdrv_get_info) { + if (!drv->bdrv_co_get_info) { BlockDriverState *filtered = bdrv_filter_bs(bs); if (filtered) { - return bdrv_get_info(filtered, bdi); + return bdrv_co_get_info(filtered, bdi); } return -ENOTSUP; } memset(bdi, 0, sizeof(*bdi)); - ret = drv->bdrv_get_info(bs, bdi); + ret = drv->bdrv_co_get_info(bs, bdi); if (ret < 0) { return ret; } @@ -6294,14 +6378,14 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) return drv->bdrv_get_specific_stats(bs); } -void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) +void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event) { IO_CODE(); - if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { + if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) { return; } - bs->drv->bdrv_debug_event(bs, event); + bs->drv->bdrv_co_debug_event(bs, event); } static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) @@ -6534,7 +6618,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp) */ if (bs->open_flags & BDRV_O_INACTIVE) { bs->open_flags &= ~BDRV_O_INACTIVE; - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, NULL, errp); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; return ret; @@ -6550,7 +6634,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp) bdrv_dirty_bitmap_skip_store(bm, false); } - ret = refresh_total_sectors(bs, bs->total_sectors); + ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; error_setg_errno(errp, -ret, "Could not refresh total sector count"); @@ -6578,6 +6662,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) IO_CODE(); assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); if (bs->drv->bdrv_co_invalidate_cache) { bs->drv->bdrv_co_invalidate_cache(bs, &local_err); @@ -6679,7 +6764,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) * We only tried to loosen restrictions, so errors are not fatal, ignore * them. */ - bdrv_refresh_perms(bs, NULL); + bdrv_refresh_perms(bs, NULL, NULL); /* Recursively inactivate children */ QLIST_FOREACH(child, &bs->children, next) { @@ -6740,20 +6825,21 @@ out: /** * Return TRUE if the media is present */ -bool bdrv_is_inserted(BlockDriverState *bs) +bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs) { BlockDriver *drv = bs->drv; BdrvChild *child; IO_CODE(); + assert_bdrv_graph_readable(); if (!drv) { return false; } - if (drv->bdrv_is_inserted) { - return drv->bdrv_is_inserted(bs); + if (drv->bdrv_co_is_inserted) { + return drv->bdrv_co_is_inserted(bs); } QLIST_FOREACH(child, &bs->children, next) { - if (!bdrv_is_inserted(child->bs)) { + if (!bdrv_co_is_inserted(child->bs)) { return false; } } @@ -6763,13 +6849,14 @@ bool bdrv_is_inserted(BlockDriverState *bs) /** * If eject_flag is TRUE, eject the media. Otherwise, close the tray */ -void bdrv_eject(BlockDriverState *bs, bool eject_flag) +void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag) { BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); - if (drv && drv->bdrv_eject) { - drv->bdrv_eject(bs, eject_flag); + if (drv && drv->bdrv_co_eject) { + drv->bdrv_co_eject(bs, eject_flag); } } @@ -6777,14 +6864,15 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag) * Lock or unlock the media (if it is locked, the user won't be able * to eject it manually). */ -void bdrv_lock_medium(BlockDriverState *bs, bool locked) +void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked) { BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); trace_bdrv_lock_medium(bs, locked); - if (drv && drv->bdrv_lock_medium) { - drv->bdrv_lock_medium(bs, locked); + if (drv && drv->bdrv_co_lock_medium) { + drv->bdrv_co_lock_medium(bs, locked); } } @@ -6884,6 +6972,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } +/* + * Must not be called while holding the lock of an AioContext other than the + * current one. + */ void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, bool quiet, @@ -7132,12 +7224,6 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) } } -void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) -{ - IO_CODE(); - aio_co_enter(bdrv_get_aio_context(bs), co); -} - static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) { GLOBAL_STATE_CODE(); @@ -7171,7 +7257,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = NULL; } @@ -7185,7 +7270,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, aio_disable_external(new_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = new_context; if (bs->drv && bs->drv->bdrv_attach_aio_context) { @@ -7266,7 +7350,6 @@ static void bdrv_set_aio_context_commit(void *opaque) BlockDriverState *bs = (BlockDriverState *) state->bs; AioContext *new_context = state->new_ctx; AioContext *old_context = bdrv_get_aio_context(bs); - assert_bdrv_graph_writable(bs); /* * Take the old AioContex when detaching it from bs.