OSDN Git Service

qcow2: Mark qcow2_signal_corruption() and callers GRAPH_RDLOCK
[qmiga/qemu.git] / block / replication.c
1 /*
2  * Replication Block filter
3  *
4  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
5  * Copyright (c) 2016 Intel Corporation
6  * Copyright (c) 2016 FUJITSU LIMITED
7  *
8  * Author:
9  *   Wen Congyang <wency@cn.fujitsu.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  */
14
15 #include "qemu/osdep.h"
16 #include "qemu/module.h"
17 #include "qemu/option.h"
18 #include "block/nbd.h"
19 #include "block/blockjob.h"
20 #include "block/block_int.h"
21 #include "block/block_backup.h"
22 #include "sysemu/block-backend.h"
23 #include "qapi/error.h"
24 #include "qapi/qmp/qdict.h"
25 #include "block/replication.h"
26
27 typedef enum {
28     BLOCK_REPLICATION_NONE,             /* block replication is not started */
29     BLOCK_REPLICATION_RUNNING,          /* block replication is running */
30     BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
31     BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
32     BLOCK_REPLICATION_DONE,             /* block replication is done */
33 } ReplicationStage;
34
35 typedef struct BDRVReplicationState {
36     ReplicationMode mode;
37     ReplicationStage stage;
38     BlockJob *commit_job;
39     BdrvChild *hidden_disk;
40     BdrvChild *secondary_disk;
41     BlockJob *backup_job;
42     char *top_id;
43     ReplicationState *rs;
44     Error *blocker;
45     bool orig_hidden_read_only;
46     bool orig_secondary_read_only;
47     int error;
48 } BDRVReplicationState;
49
50 static void replication_start(ReplicationState *rs, ReplicationMode mode,
51                               Error **errp);
52 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
53 static void replication_get_error(ReplicationState *rs, Error **errp);
54 static void replication_stop(ReplicationState *rs, bool failover,
55                              Error **errp);
56
57 #define REPLICATION_MODE        "mode"
58 #define REPLICATION_TOP_ID      "top-id"
59 static QemuOptsList replication_runtime_opts = {
60     .name = "replication",
61     .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
62     .desc = {
63         {
64             .name = REPLICATION_MODE,
65             .type = QEMU_OPT_STRING,
66         },
67         {
68             .name = REPLICATION_TOP_ID,
69             .type = QEMU_OPT_STRING,
70         },
71         { /* end of list */ }
72     },
73 };
74
75 static ReplicationOps replication_ops = {
76     .start = replication_start,
77     .checkpoint = replication_do_checkpoint,
78     .get_error = replication_get_error,
79     .stop = replication_stop,
80 };
81
82 static int replication_open(BlockDriverState *bs, QDict *options,
83                             int flags, Error **errp)
84 {
85     int ret;
86     BDRVReplicationState *s = bs->opaque;
87     QemuOpts *opts = NULL;
88     const char *mode;
89     const char *top_id;
90
91     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
92     if (ret < 0) {
93         return ret;
94     }
95
96     ret = -EINVAL;
97     opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
98     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
99         goto fail;
100     }
101
102     mode = qemu_opt_get(opts, REPLICATION_MODE);
103     if (!mode) {
104         error_setg(errp, "Missing the option mode");
105         goto fail;
106     }
107
108     if (!strcmp(mode, "primary")) {
109         s->mode = REPLICATION_MODE_PRIMARY;
110         top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
111         if (top_id) {
112             error_setg(errp,
113                        "The primary side does not support option top-id");
114             goto fail;
115         }
116     } else if (!strcmp(mode, "secondary")) {
117         s->mode = REPLICATION_MODE_SECONDARY;
118         top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
119         s->top_id = g_strdup(top_id);
120         if (!s->top_id) {
121             error_setg(errp, "Missing the option top-id");
122             goto fail;
123         }
124     } else {
125         error_setg(errp,
126                    "The option mode's value should be primary or secondary");
127         goto fail;
128     }
129
130     s->rs = replication_new(bs, &replication_ops);
131
132     ret = 0;
133
134 fail:
135     qemu_opts_del(opts);
136     return ret;
137 }
138
139 static void replication_close(BlockDriverState *bs)
140 {
141     BDRVReplicationState *s = bs->opaque;
142     Job *commit_job;
143     GLOBAL_STATE_CODE();
144
145     if (s->stage == BLOCK_REPLICATION_RUNNING) {
146         replication_stop(s->rs, false, NULL);
147     }
148     if (s->stage == BLOCK_REPLICATION_FAILOVER) {
149         commit_job = &s->commit_job->job;
150         assert(commit_job->aio_context == qemu_get_current_aio_context());
151         job_cancel_sync(commit_job, false);
152     }
153
154     if (s->mode == REPLICATION_MODE_SECONDARY) {
155         g_free(s->top_id);
156     }
157
158     replication_remove(s->rs);
159 }
160
161 static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
162                                    BdrvChildRole role,
163                                    BlockReopenQueue *reopen_queue,
164                                    uint64_t perm, uint64_t shared,
165                                    uint64_t *nperm, uint64_t *nshared)
166 {
167     if (role & BDRV_CHILD_PRIMARY) {
168         *nperm = BLK_PERM_CONSISTENT_READ;
169     } else {
170         *nperm = 0;
171     }
172
173     if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
174         *nperm |= BLK_PERM_WRITE;
175     }
176     *nshared = BLK_PERM_CONSISTENT_READ
177                | BLK_PERM_WRITE
178                | BLK_PERM_WRITE_UNCHANGED;
179     return;
180 }
181
182 static int64_t coroutine_fn GRAPH_RDLOCK
183 replication_co_getlength(BlockDriverState *bs)
184 {
185     return bdrv_co_getlength(bs->file->bs);
186 }
187
188 static int replication_get_io_status(BDRVReplicationState *s)
189 {
190     switch (s->stage) {
191     case BLOCK_REPLICATION_NONE:
192         return -EIO;
193     case BLOCK_REPLICATION_RUNNING:
194         return 0;
195     case BLOCK_REPLICATION_FAILOVER:
196         return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
197     case BLOCK_REPLICATION_FAILOVER_FAILED:
198         return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
199     case BLOCK_REPLICATION_DONE:
200         /*
201          * active commit job completes, and active disk and secondary_disk
202          * is swapped, so we can operate bs->file directly
203          */
204         return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
205     default:
206         abort();
207     }
208 }
209
210 static int replication_return_value(BDRVReplicationState *s, int ret)
211 {
212     if (s->mode == REPLICATION_MODE_SECONDARY) {
213         return ret;
214     }
215
216     if (ret < 0) {
217         s->error = ret;
218         ret = 0;
219     }
220
221     return ret;
222 }
223
224 static int coroutine_fn GRAPH_RDLOCK
225 replication_co_readv(BlockDriverState *bs, int64_t sector_num,
226                      int remaining_sectors, QEMUIOVector *qiov)
227 {
228     BDRVReplicationState *s = bs->opaque;
229     int ret;
230
231     if (s->mode == REPLICATION_MODE_PRIMARY) {
232         /* We only use it to forward primary write requests */
233         return -EIO;
234     }
235
236     ret = replication_get_io_status(s);
237     if (ret < 0) {
238         return ret;
239     }
240
241     ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
242                          remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
243
244     return replication_return_value(s, ret);
245 }
246
247 static int coroutine_fn GRAPH_RDLOCK
248 replication_co_writev(BlockDriverState *bs, int64_t sector_num,
249                       int remaining_sectors, QEMUIOVector *qiov, int flags)
250 {
251     BDRVReplicationState *s = bs->opaque;
252     QEMUIOVector hd_qiov;
253     uint64_t bytes_done = 0;
254     BdrvChild *top = bs->file;
255     BdrvChild *base = s->secondary_disk;
256     BdrvChild *target;
257     int ret;
258     int64_t n;
259
260     ret = replication_get_io_status(s);
261     if (ret < 0) {
262         goto out;
263     }
264
265     if (ret == 0) {
266         ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE,
267                               remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
268         return replication_return_value(s, ret);
269     }
270
271     /*
272      * Failover failed, only write to active disk if the sectors
273      * have already been allocated in active disk/hidden disk.
274      */
275     qemu_iovec_init(&hd_qiov, qiov->niov);
276     while (remaining_sectors > 0) {
277         int64_t count;
278
279         ret = bdrv_co_is_allocated_above(top->bs, base->bs, false,
280                                          sector_num * BDRV_SECTOR_SIZE,
281                                          remaining_sectors * BDRV_SECTOR_SIZE,
282                                          &count);
283         if (ret < 0) {
284             goto out1;
285         }
286
287         assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
288         n = count >> BDRV_SECTOR_BITS;
289         qemu_iovec_reset(&hd_qiov);
290         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
291
292         target = ret ? top : base;
293         ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE,
294                               n * BDRV_SECTOR_SIZE, &hd_qiov, 0);
295         if (ret < 0) {
296             goto out1;
297         }
298
299         remaining_sectors -= n;
300         sector_num += n;
301         bytes_done += count;
302     }
303
304 out1:
305     qemu_iovec_destroy(&hd_qiov);
306 out:
307     return ret;
308 }
309
310 static void GRAPH_UNLOCKED
311 secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
312 {
313     BDRVReplicationState *s = bs->opaque;
314     BdrvChild *active_disk = bs->file;
315     Error *local_err = NULL;
316     int ret;
317
318     GRAPH_RDLOCK_GUARD_MAINLOOP();
319
320     if (!s->backup_job) {
321         error_setg(errp, "Backup job was cancelled unexpectedly");
322         return;
323     }
324
325     backup_do_checkpoint(s->backup_job, &local_err);
326     if (local_err) {
327         error_propagate(errp, local_err);
328         return;
329     }
330
331     if (!active_disk->bs->drv) {
332         error_setg(errp, "Active disk %s is ejected",
333                    active_disk->bs->node_name);
334         return;
335     }
336
337     ret = bdrv_make_empty(active_disk, errp);
338     if (ret < 0) {
339         return;
340     }
341
342     if (!s->hidden_disk->bs->drv) {
343         error_setg(errp, "Hidden disk %s is ejected",
344                    s->hidden_disk->bs->node_name);
345         return;
346     }
347
348     ret = bdrv_make_empty(s->hidden_disk, errp);
349     if (ret < 0) {
350         return;
351     }
352 }
353
354 /* This function is supposed to be called twice:
355  * first with writable = true, then with writable = false.
356  * The first call puts s->hidden_disk and s->secondary_disk in
357  * r/w mode, and the second puts them back in their original state.
358  */
359 static void reopen_backing_file(BlockDriverState *bs, bool writable,
360                                 Error **errp)
361 {
362     BDRVReplicationState *s = bs->opaque;
363     BdrvChild *hidden_disk, *secondary_disk;
364     BlockReopenQueue *reopen_queue = NULL;
365
366     /*
367      * s->hidden_disk and s->secondary_disk may not be set yet, as they will
368      * only be set after the children are writable.
369      */
370     hidden_disk = bs->file->bs->backing;
371     secondary_disk = hidden_disk->bs->backing;
372
373     if (writable) {
374         s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
375         s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
376     }
377
378     if (s->orig_hidden_read_only) {
379         QDict *opts = qdict_new();
380         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
381         reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
382                                          opts, true);
383     }
384
385     if (s->orig_secondary_read_only) {
386         QDict *opts = qdict_new();
387         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
388         reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
389                                          opts, true);
390     }
391
392     if (reopen_queue) {
393         AioContext *ctx = bdrv_get_aio_context(bs);
394         if (ctx != qemu_get_aio_context()) {
395             aio_context_release(ctx);
396         }
397         bdrv_reopen_multiple(reopen_queue, errp);
398         if (ctx != qemu_get_aio_context()) {
399             aio_context_acquire(ctx);
400         }
401     }
402 }
403
404 static void backup_job_cleanup(BlockDriverState *bs)
405 {
406     BDRVReplicationState *s = bs->opaque;
407     BlockDriverState *top_bs;
408
409     s->backup_job = NULL;
410
411     top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
412     if (!top_bs) {
413         return;
414     }
415     bdrv_op_unblock_all(top_bs, s->blocker);
416     error_free(s->blocker);
417     reopen_backing_file(bs, false, NULL);
418 }
419
420 static void backup_job_completed(void *opaque, int ret)
421 {
422     BlockDriverState *bs = opaque;
423     BDRVReplicationState *s = bs->opaque;
424
425     if (s->stage != BLOCK_REPLICATION_FAILOVER) {
426         /* The backup job is cancelled unexpectedly */
427         s->error = -EIO;
428     }
429
430     backup_job_cleanup(bs);
431 }
432
433 static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
434 {
435     BdrvChild *child;
436
437     /* The bs itself is the top_bs */
438     if (top_bs == bs) {
439         return true;
440     }
441
442     /* Iterate over top_bs's children */
443     QLIST_FOREACH(child, &top_bs->children, next) {
444         if (child->bs == bs || check_top_bs(child->bs, bs)) {
445             return true;
446         }
447     }
448
449     return false;
450 }
451
452 static void replication_start(ReplicationState *rs, ReplicationMode mode,
453                               Error **errp)
454 {
455     BlockDriverState *bs = rs->opaque;
456     BDRVReplicationState *s;
457     BlockDriverState *top_bs;
458     BdrvChild *active_disk, *hidden_disk, *secondary_disk;
459     int64_t active_length, hidden_length, disk_length;
460     AioContext *aio_context;
461     Error *local_err = NULL;
462     BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
463
464     GLOBAL_STATE_CODE();
465
466     aio_context = bdrv_get_aio_context(bs);
467     aio_context_acquire(aio_context);
468     s = bs->opaque;
469
470     if (s->stage == BLOCK_REPLICATION_DONE ||
471         s->stage == BLOCK_REPLICATION_FAILOVER) {
472         /*
473          * This case happens when a secondary is promoted to primary.
474          * Ignore the request because the secondary side of replication
475          * doesn't have to do anything anymore.
476          */
477         aio_context_release(aio_context);
478         return;
479     }
480
481     if (s->stage != BLOCK_REPLICATION_NONE) {
482         error_setg(errp, "Block replication is running or done");
483         aio_context_release(aio_context);
484         return;
485     }
486
487     if (s->mode != mode) {
488         error_setg(errp, "The parameter mode's value is invalid, needs %d,"
489                    " but got %d", s->mode, mode);
490         aio_context_release(aio_context);
491         return;
492     }
493
494     switch (s->mode) {
495     case REPLICATION_MODE_PRIMARY:
496         break;
497     case REPLICATION_MODE_SECONDARY:
498         active_disk = bs->file;
499         if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
500             error_setg(errp, "Active disk doesn't have backing file");
501             aio_context_release(aio_context);
502             return;
503         }
504
505         hidden_disk = active_disk->bs->backing;
506         if (!hidden_disk->bs || !hidden_disk->bs->backing) {
507             error_setg(errp, "Hidden disk doesn't have backing file");
508             aio_context_release(aio_context);
509             return;
510         }
511
512         bdrv_graph_rdlock_main_loop();
513         secondary_disk = hidden_disk->bs->backing;
514         if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
515             error_setg(errp, "The secondary disk doesn't have block backend");
516             bdrv_graph_rdunlock_main_loop();
517             aio_context_release(aio_context);
518             return;
519         }
520         bdrv_graph_rdunlock_main_loop();
521
522         /* verify the length */
523         active_length = bdrv_getlength(active_disk->bs);
524         hidden_length = bdrv_getlength(hidden_disk->bs);
525         disk_length = bdrv_getlength(secondary_disk->bs);
526         if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
527             active_length != hidden_length || hidden_length != disk_length) {
528             error_setg(errp, "Active disk, hidden disk, secondary disk's length"
529                        " are not the same");
530             aio_context_release(aio_context);
531             return;
532         }
533
534         /* Must be true, or the bdrv_getlength() calls would have failed */
535         assert(active_disk->bs->drv && hidden_disk->bs->drv);
536
537         bdrv_graph_rdlock_main_loop();
538         if (!active_disk->bs->drv->bdrv_make_empty ||
539             !hidden_disk->bs->drv->bdrv_make_empty) {
540             error_setg(errp,
541                        "Active disk or hidden disk doesn't support make_empty");
542             aio_context_release(aio_context);
543             bdrv_graph_rdunlock_main_loop();
544             return;
545         }
546         bdrv_graph_rdunlock_main_loop();
547
548         /* reopen the backing file in r/w mode */
549         reopen_backing_file(bs, true, &local_err);
550         if (local_err) {
551             error_propagate(errp, local_err);
552             aio_context_release(aio_context);
553             return;
554         }
555
556         bdrv_graph_wrlock(bs);
557
558         bdrv_ref(hidden_disk->bs);
559         s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
560                                            &child_of_bds, BDRV_CHILD_DATA,
561                                            &local_err);
562         if (local_err) {
563             error_propagate(errp, local_err);
564             bdrv_graph_wrunlock();
565             aio_context_release(aio_context);
566             return;
567         }
568
569         bdrv_ref(secondary_disk->bs);
570         s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
571                                               "secondary disk", &child_of_bds,
572                                               BDRV_CHILD_DATA, &local_err);
573         if (local_err) {
574             error_propagate(errp, local_err);
575             bdrv_graph_wrunlock();
576             aio_context_release(aio_context);
577             return;
578         }
579
580         /* start backup job now */
581         error_setg(&s->blocker,
582                    "Block device is in use by internal backup job");
583
584         top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
585         if (!top_bs || !bdrv_is_root_node(top_bs) ||
586             !check_top_bs(top_bs, bs)) {
587             error_setg(errp, "No top_bs or it is invalid");
588             bdrv_graph_wrunlock();
589             reopen_backing_file(bs, false, NULL);
590             aio_context_release(aio_context);
591             return;
592         }
593         bdrv_op_block_all(top_bs, s->blocker);
594         bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
595
596         bdrv_graph_wrunlock();
597
598         s->backup_job = backup_job_create(
599                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
600                                 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
601                                 &perf,
602                                 BLOCKDEV_ON_ERROR_REPORT,
603                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
604                                 backup_job_completed, bs, NULL, &local_err);
605         if (local_err) {
606             error_propagate(errp, local_err);
607             backup_job_cleanup(bs);
608             aio_context_release(aio_context);
609             return;
610         }
611         job_start(&s->backup_job->job);
612         break;
613     default:
614         aio_context_release(aio_context);
615         abort();
616     }
617
618     s->stage = BLOCK_REPLICATION_RUNNING;
619
620     if (s->mode == REPLICATION_MODE_SECONDARY) {
621         secondary_do_checkpoint(bs, errp);
622     }
623
624     s->error = 0;
625     aio_context_release(aio_context);
626 }
627
628 static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
629 {
630     BlockDriverState *bs = rs->opaque;
631     BDRVReplicationState *s;
632     AioContext *aio_context;
633
634     aio_context = bdrv_get_aio_context(bs);
635     aio_context_acquire(aio_context);
636     s = bs->opaque;
637
638     if (s->stage == BLOCK_REPLICATION_DONE ||
639         s->stage == BLOCK_REPLICATION_FAILOVER) {
640         /*
641          * This case happens when a secondary was promoted to primary.
642          * Ignore the request because the secondary side of replication
643          * doesn't have to do anything anymore.
644          */
645         aio_context_release(aio_context);
646         return;
647     }
648
649     if (s->mode == REPLICATION_MODE_SECONDARY) {
650         secondary_do_checkpoint(bs, errp);
651     }
652     aio_context_release(aio_context);
653 }
654
655 static void replication_get_error(ReplicationState *rs, Error **errp)
656 {
657     BlockDriverState *bs = rs->opaque;
658     BDRVReplicationState *s;
659     AioContext *aio_context;
660
661     aio_context = bdrv_get_aio_context(bs);
662     aio_context_acquire(aio_context);
663     s = bs->opaque;
664
665     if (s->stage == BLOCK_REPLICATION_NONE) {
666         error_setg(errp, "Block replication is not running");
667         aio_context_release(aio_context);
668         return;
669     }
670
671     if (s->error) {
672         error_setg(errp, "I/O error occurred");
673         aio_context_release(aio_context);
674         return;
675     }
676     aio_context_release(aio_context);
677 }
678
679 static void replication_done(void *opaque, int ret)
680 {
681     BlockDriverState *bs = opaque;
682     BDRVReplicationState *s = bs->opaque;
683
684     if (ret == 0) {
685         s->stage = BLOCK_REPLICATION_DONE;
686
687         bdrv_graph_wrlock(NULL);
688         bdrv_unref_child(bs, s->secondary_disk);
689         s->secondary_disk = NULL;
690         bdrv_unref_child(bs, s->hidden_disk);
691         s->hidden_disk = NULL;
692         bdrv_graph_wrunlock();
693
694         s->error = 0;
695     } else {
696         s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
697         s->error = -EIO;
698     }
699 }
700
701 static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
702 {
703     BlockDriverState *bs = rs->opaque;
704     BDRVReplicationState *s;
705     AioContext *aio_context;
706
707     aio_context = bdrv_get_aio_context(bs);
708     aio_context_acquire(aio_context);
709     s = bs->opaque;
710
711     if (s->stage == BLOCK_REPLICATION_DONE ||
712         s->stage == BLOCK_REPLICATION_FAILOVER) {
713         /*
714          * This case happens when a secondary was promoted to primary.
715          * Ignore the request because the secondary side of replication
716          * doesn't have to do anything anymore.
717          */
718         aio_context_release(aio_context);
719         return;
720     }
721
722     if (s->stage != BLOCK_REPLICATION_RUNNING) {
723         error_setg(errp, "Block replication is not running");
724         aio_context_release(aio_context);
725         return;
726     }
727
728     switch (s->mode) {
729     case REPLICATION_MODE_PRIMARY:
730         s->stage = BLOCK_REPLICATION_DONE;
731         s->error = 0;
732         break;
733     case REPLICATION_MODE_SECONDARY:
734         /*
735          * This BDS will be closed, and the job should be completed
736          * before the BDS is closed, because we will access hidden
737          * disk, secondary disk in backup_job_completed().
738          */
739         if (s->backup_job) {
740             aio_context_release(aio_context);
741             job_cancel_sync(&s->backup_job->job, true);
742             aio_context_acquire(aio_context);
743         }
744
745         if (!failover) {
746             secondary_do_checkpoint(bs, errp);
747             s->stage = BLOCK_REPLICATION_DONE;
748             aio_context_release(aio_context);
749             return;
750         }
751
752         s->stage = BLOCK_REPLICATION_FAILOVER;
753         s->commit_job = commit_active_start(
754                             NULL, bs->file->bs, s->secondary_disk->bs,
755                             JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
756                             NULL, replication_done, bs, true, errp);
757         break;
758     default:
759         aio_context_release(aio_context);
760         abort();
761     }
762     aio_context_release(aio_context);
763 }
764
765 static const char *const replication_strong_runtime_opts[] = {
766     REPLICATION_MODE,
767     REPLICATION_TOP_ID,
768
769     NULL
770 };
771
772 static BlockDriver bdrv_replication = {
773     .format_name                = "replication",
774     .instance_size              = sizeof(BDRVReplicationState),
775
776     .bdrv_open                  = replication_open,
777     .bdrv_close                 = replication_close,
778     .bdrv_child_perm            = replication_child_perm,
779
780     .bdrv_co_getlength          = replication_co_getlength,
781     .bdrv_co_readv              = replication_co_readv,
782     .bdrv_co_writev             = replication_co_writev,
783
784     .is_filter                  = true,
785
786     .strong_runtime_opts        = replication_strong_runtime_opts,
787 };
788
789 static void bdrv_replication_init(void)
790 {
791     bdrv_register(&bdrv_replication);
792 }
793
794 block_init(bdrv_replication_init);