Skip to content

Commit

Permalink
Fix synchronous behavior in __vdev_disk_physio()
Browse files Browse the repository at this point in the history
Commit b39c22b set the READ_SYNC and WRITE_SYNC flags for a bio
based on the ZIO_PRIORITY_* flag passed in.  This had the unnoticed
side-effect of making the vdev_disk_io_start() synchronous for
certain I/Os.

This in turn resulted in vdev_disk_io_start() being able to
re-dispatch zio's which would result in a RCU stalls when a disk
was removed from the system.  Additionally, this could negatively
impact performance and may explain the performance regressions
reported in both openzfs#3829 and openzfs#3780.

This patch resolves the issue by making the blocking behavior
dependant on a 'wait' flag being passed rather than overloading
the passed bio flags.

Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to
non-rotational devices where there is no benefit to queuing to
aggregate the I/O.

Signed-off-by: Brian Behlendorf <[email protected]>
Issue openzfs#3780
Issue openzfs#3829
Issue openzfs#3652
  • Loading branch information
behlendorf committed Sep 24, 2015
1 parent 56b3986 commit 393ee23
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 81 deletions.
50 changes: 0 additions & 50 deletions config/kernel-bio-rw-syncio.m4

This file was deleted.

3 changes: 0 additions & 3 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
ZFS_AC_KERNEL_BIO_RW_SYNC
ZFS_AC_KERNEL_BIO_RW_SYNCIO
ZFS_AC_KERNEL_REQ_SYNC
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
Expand Down
36 changes: 8 additions & 28 deletions module/zfs/vdev_disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr)
sizeof (struct bio *) * dr->dr_bio_count);
}

static int
vdev_disk_dio_is_sync(dio_request_t *dr)
{
#ifdef HAVE_BIO_RW_SYNC
/* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
return (dr->dr_rw & (1 << BIO_RW_SYNC));
#else
#ifdef HAVE_BIO_RW_SYNCIO
/* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
#else
#ifdef HAVE_REQ_SYNC
/* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
return (dr->dr_rw & REQ_SYNC);
#else
#error "Unable to determine bio sync flag"
#endif /* HAVE_REQ_SYNC */
#endif /* HAVE_BIO_RW_SYNC */
#endif /* HAVE_BIO_RW_SYNCIO */
}

static void
vdev_disk_dio_get(dio_request_t *dr)
{
Expand Down Expand Up @@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error)
rc = vdev_disk_dio_put(dr);

/* Wake up synchronous waiter this is the last outstanding bio */
if ((rc == 1) && vdev_disk_dio_is_sync(dr))
if (rc == 1)
complete(&dr->dr_comp);

BIO_END_IO_RETURN(0);
Expand Down Expand Up @@ -514,7 +493,7 @@ vdev_submit_bio(int rw, struct bio *bio)

static int
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
size_t kbuf_size, uint64_t kbuf_offset, int flags)
size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait)
{
dio_request_t *dr;
caddr_t bio_ptr;
Expand Down Expand Up @@ -605,7 +584,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
* only synchronous consumer is vdev_disk_read_rootlabel() all other
* IO originating from vdev_disk_io_start() is asynchronous.
*/
if (vdev_disk_dio_is_sync(dr)) {
if (wait) {
wait_for_completion(&dr->dr_comp);
error = dr->dr_error;
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
Expand All @@ -621,7 +600,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
size_t size, uint64_t offset, int flags)
{
bio_set_flags_failfast(bdev, &flags);
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags));
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1));
}

BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc)
Expand Down Expand Up @@ -672,6 +651,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
zio_priority_t pri = zio->io_priority;
int flags, error;

switch (zio->io_type) {
Expand Down Expand Up @@ -711,14 +691,14 @@ vdev_disk_io_start(zio_t *zio)
zio_execute(zio);
return;
case ZIO_TYPE_WRITE:
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot))
flags = WRITE_SYNC;
else
flags = WRITE;
break;

case ZIO_TYPE_READ:
if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot))
flags = READ_SYNC;
else
flags = READ;
Expand All @@ -731,7 +711,7 @@ vdev_disk_io_start(zio_t *zio)
}

error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
zio->io_size, zio->io_offset, flags);
zio->io_size, zio->io_offset, flags, 0);
if (error) {
zio->io_error = error;
zio_interrupt(zio);
Expand Down

0 comments on commit 393ee23

Please sign in to comment.