Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce module parameters for handling IO timeouts (WIP) #2387

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/sys/vdev_disk.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,19 @@
#ifdef _KERNEL
#include <sys/vdev.h>

#ifdef __linux__
#include <linux/blkdev.h>
#endif

typedef struct vdev_disk {
ddi_devid_t vd_devid;
char *vd_minor;
#ifdef __linux__
struct block_device *vd_bdev;
rq_timed_out_fn *vd_rq_timed_out_fn;
unsigned int vd_rq_timeout;
atomic_t vd_rq_timeout_ticks;
#endif
} vdev_disk_t;

extern int vdev_disk_physio(struct block_device *, caddr_t,
Expand Down
22 changes: 22 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,28 @@ Total size of the per-disk cache
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
\fBzfs_vdev_disk_timeout_ms\fR (uint)
.ad
.RS 12n
ZFS will configure the block layer to time out after this many milliseconds at vdev open time.
.sp
Default value: \fB1000\fR.
.RE

.sp
.ne 2
.na
\fBzfs_vdev_disk_timeout_ticks\fR (uint)
.ad
.RS 12n
Number of consecutive IO timeouts that are tolerated before the block layer will mark the device as bad.
.sp
Default value: \fB30\fR.
.RE

.sp
.ne 2
.na
Expand Down
52 changes: 50 additions & 2 deletions module/zfs/vdev_disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

char *zfs_vdev_scheduler = VDEV_SCHEDULER;
static void *zfs_vdev_holder = VDEV_HOLDER;
static unsigned int vdev_disk_timeout_ms = 1000;
static unsigned int vdev_disk_timeout_ticks = 30;

/*
* Virtual device vector for disks.
Expand Down Expand Up @@ -106,6 +108,24 @@ vdev_disk_error(zio_t *zio)
#endif
}

static enum blk_eh_timer_return
vdev_disk_time_out_handler(struct request *req)
{
dio_request_t *dr = req->bio->bi_private;
vdev_disk_t *vd = dr->dr_zio->io_vd->vdev_tsd;

dr->dr_error = ETIME;

if (atomic_inc_return(&vd->vd_rq_timeout_ticks)
> vdev_disk_timeout_ticks)
return (BLK_EH_HANDLED);

if (vd->vd_rq_timed_out_fn)
return (vd->vd_rq_timed_out_fn(req));

return BLK_EH_NOT_HANDLED;
}

/*
* Use the Linux 'noop' elevator for zfs managed block devices. This
* strikes the ideal balance by allowing the zfs elevator to do all
Expand Down Expand Up @@ -294,6 +314,14 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
v->vdev_tsd = vd;
vd->vd_bdev = bdev;

/* Set IO timeout */
vd->vd_rq_timeout = bdev->bd_queue->rq_timeout;
bdev->bd_queue->rq_timeout = vdev_disk_timeout_ms * HZ / 1000;

vd->vd_rq_timed_out_fn = bdev->bd_queue->rq_timed_out_fn;
bdev->bd_queue->rq_timed_out_fn = &vdev_disk_time_out_handler;

atomic_set(&vd->vd_rq_timeout_ticks, 0);
skip_open:
/* Determine the physical block size */
block_size = vdev_bdev_block_size(vd->vd_bdev);
Expand All @@ -320,13 +348,19 @@ static void
vdev_disk_close(vdev_t *v)
{
vdev_disk_t *vd = v->vdev_tsd;
struct block_device *bdev;

if (v->vdev_reopening || vd == NULL)
return;

if (vd->vd_bdev != NULL)
vdev_bdev_close(vd->vd_bdev,
bdev = vd->vd_bdev;
if (vd->vd_bdev != NULL) {
bdev->bd_queue->rq_timeout = vd->vd_rq_timeout;
bdev->bd_queue->rq_timed_out_fn = vd->vd_rq_timed_out_fn;

vdev_bdev_close(bdev,
vdev_bdev_mode(spa_mode(v->vdev_spa)));
}

kmem_free(vd, sizeof (vdev_disk_t));
v->vdev_tsd = NULL;
Expand Down Expand Up @@ -446,6 +480,12 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error)
if (dr->dr_error == 0)
dr->dr_error = -error;

/* Reset timeout tick count */
if (dr->dr_error == 0) {
vdev_disk_t *vd = dr->dr_zio->io_vd->vdev_tsd;
atomic_set(&vd->vd_rq_timeout_ticks, 0);
}

/* Drop reference aquired by __vdev_disk_physio */
rc = vdev_disk_dio_put(dr);

Expand Down Expand Up @@ -845,5 +885,13 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
return (0);
}

module_param_named(zfs_vdev_disk_timeout_ms, vdev_disk_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zfs_vdev_disk_timeout_ms, "Disk IO timeout");

module_param_named(zfs_vdev_disk_timeout_ticks, vdev_disk_timeout_ticks, uint,
0644);
MODULE_PARM_DESC(zfs_vdev_disk_timeout_ticks, "Consecutive timeout limit");


module_param(zfs_vdev_scheduler, charp, 0644);
MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");