Skip to content

Commit

Permalink
block: Fix AioContext switch for bs->drv == NULL
Browse files Browse the repository at this point in the history
Even for block nodes with bs->drv == NULL, we can't just ignore a
bdrv_set_aio_context() call. Leaving the node in its old context can
mean that it's still in an iothread context in bdrv_close_all() during
shutdown, resulting in an attempted unlock of the AioContext lock which
we don't hold.

This is an example stack trace of a related crash:

 #0  0x00007ffff59da57f in raise () at /lib64/libc.so.6
 #1  0x00007ffff59c4895 in abort () at /lib64/libc.so.6
 #2  0x0000555555b97b1e in error_exit (err=<optimized out>, msg=msg@entry=0x555555d386d0 <__func__.19059> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36
 qemu#3  0x0000555555b97f7f in qemu_mutex_unlock_impl (mutex=mutex@entry=0x5555568002f0, file=file@entry=0x555555d378df "util/async.c", line=line@entry=507) at util/qemu-thread-posix.c:97
 qemu#4  0x0000555555b92f55 in aio_context_release (ctx=ctx@entry=0x555556800290) at util/async.c:507
 qemu#5  0x0000555555b05cf8 in bdrv_prwv_co (child=child@entry=0x7fffc80012f0, offset=offset@entry=131072, qiov=qiov@entry=0x7fffffffd4f0, is_write=is_write@entry=true, flags=flags@entry=0)
         at block/io.c:833
 qemu#6  0x0000555555b060a9 in bdrv_pwritev (qiov=0x7fffffffd4f0, offset=131072, child=0x7fffc80012f0) at block/io.c:990
 qemu#7  0x0000555555b060a9 in bdrv_pwrite (child=0x7fffc80012f0, offset=131072, buf=<optimized out>, bytes=<optimized out>) at block/io.c:990
 qemu#8  0x0000555555ae172b in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568cc740, i=i@entry=0) at block/qcow2-cache.c:51
 qemu#9  0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568cc740) at block/qcow2-cache.c:248
 qemu#10 0x0000555555ae15de in qcow2_cache_flush (bs=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
 qemu#11 0x0000555555ae16b1 in qcow2_cache_flush_dependency (c=0x5555568a1700, c=0x5555568a1700, bs=0x555556810680) at block/qcow2-cache.c:194
 qemu#12 0x0000555555ae16b1 in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568a1700, i=i@entry=0) at block/qcow2-cache.c:194
 qemu#13 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568a1700) at block/qcow2-cache.c:248
 qemu#14 0x0000555555ae15de in qcow2_cache_flush (bs=bs@entry=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
 qemu#15 0x0000555555ad242c in qcow2_inactivate (bs=bs@entry=0x555556810680) at block/qcow2.c:2124
 qemu#16 0x0000555555ad2590 in qcow2_close (bs=0x555556810680) at block/qcow2.c:2153
 qemu#17 0x0000555555ab0c62 in bdrv_close (bs=0x555556810680) at block.c:3358
 qemu#18 0x0000555555ab0c62 in bdrv_delete (bs=0x555556810680) at block.c:3542
 qemu#19 0x0000555555ab0c62 in bdrv_unref (bs=0x555556810680) at block.c:4598
 qemu#20 0x0000555555af4d72 in blk_remove_bs (blk=blk@entry=0x5555568103d0) at block/block-backend.c:785
 qemu#21 0x0000555555af4dbb in blk_remove_all_bs () at block/block-backend.c:483
 qemu#22 0x0000555555aae02f in bdrv_close_all () at block.c:3412
 qemu#23 0x00005555557f9796 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4776

The reproducer I used is a qcow2 image on gluster volume, where the
virtual disk size (4 GB) is larger than the gluster volume size (64M),
so we can easily trigger an ENOSPC. This backend is assigned to a
virtio-blk device using an iothread, and then from the guest a
'dd if=/dev/zero of=/dev/vda bs=1G count=1' causes the VM to stop
because of an I/O error. qemu_gluster_co_flush_to_disk() sets
bs->drv = NULL on error, so when virtio-blk stops the dataplane, the
block nodes stay in the iothread AioContext. A 'quit' monitor command
issued from this paused state crashes the process.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1631227
Cc: [email protected]
Signed-off-by: Kevin Wolf <[email protected]>
Reviewed-by: Eric Blake <[email protected]>
Reviewed-by: Max Reitz <[email protected]>
Reviewed-by: Stefano Garzarella <[email protected]>
  • Loading branch information
kevmw committed Apr 30, 2019
1 parent 36b9986 commit 1bffe1a
Showing 1 changed file with 2 additions and 10 deletions.
12 changes: 2 additions & 10 deletions block.c
Original file line number Diff line number Diff line change
Expand Up @@ -5672,10 +5672,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
BdrvAioNotifier *baf, *baf_tmp;
BdrvChild *child;

if (!bs->drv) {
return;
}

assert(!bs->walking_aio_notifiers);
bs->walking_aio_notifiers = true;
QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
Expand All @@ -5690,7 +5686,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
*/
bs->walking_aio_notifiers = false;

if (bs->drv->bdrv_detach_aio_context) {
if (bs->drv && bs->drv->bdrv_detach_aio_context) {
bs->drv->bdrv_detach_aio_context(bs);
}
QLIST_FOREACH(child, &bs->children, next) {
Expand All @@ -5709,10 +5705,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
BdrvAioNotifier *ban, *ban_tmp;
BdrvChild *child;

if (!bs->drv) {
return;
}

if (bs->quiesce_counter) {
aio_disable_external(new_context);
}
Expand All @@ -5722,7 +5714,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
QLIST_FOREACH(child, &bs->children, next) {
bdrv_attach_aio_context(child->bs, new_context);
}
if (bs->drv->bdrv_attach_aio_context) {
if (bs->drv && bs->drv->bdrv_attach_aio_context) {
bs->drv->bdrv_attach_aio_context(bs, new_context);
}

Expand Down

0 comments on commit 1bffe1a

Please sign in to comment.