From f023cc302751d3093d22216c5ecc7fb6aaaec68c Mon Sep 17 00:00:00 2001 From: Giuseppe Di Natale <dinatale2@llnl.gov> Date: Thu, 29 Jun 2017 16:57:00 -0700 Subject: [PATCH] Enforce request limits on zvols ZVOLs do not handle heavy random IO works loads. ZVOLs should limit the number of outstanding in-flight IO requests. This should improve performance. Signed-off-by: Giuseppe Di Natale <dinatale2@llnl.gov> --- include/linux/blkdev_compat.h | 6 ++++++ module/zfs/zvol.c | 40 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 428664a0b6b3..5b2f0af8b909 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -139,6 +139,12 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) #endif } +static inline unsigned long +blk_queue_nr_requests(struct request_queue *q) +{ + return (q->nr_requests); +} + #ifndef HAVE_GET_DISK_RO static inline int get_disk_ro(struct gendisk *disk) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 60fab5cc68bf..bbffa5b4e098 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -131,6 +131,10 @@ struct zvol_state { kmutex_t zv_state_lock; /* protects zvol_state_t */ atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ + kcondvar_t zv_write_cv; /* write queue wait */ + unsigned long zv_writes; /* in-flight writes */ + kcondvar_t zv_read_cv; /* read queue wait */ + unsigned long zv_reads; /* in-flight reads */ }; typedef enum { @@ -786,6 +790,11 @@ zvol_write(void *arg) generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); + + mutex_enter(&zv->zv_state_lock); + zv->zv_writes--; + cv_signal(&zv->zv_write_cv); + mutex_exit(&zv->zv_state_lock); } /* @@ -873,6 +882,11 @@ zvol_discard(void *arg) generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); + + mutex_enter(&zv->zv_state_lock); + zv->zv_writes--; + cv_signal(&zv->zv_write_cv); + mutex_exit(&zv->zv_state_lock); } static void @@ -914,6 +928,11 @@ zvol_read(void *arg) generic_end_io_acct(READ, &zv->zv_disk->part0, start_jif); BIO_END_IO(bio, -error); kmem_free(zvr, sizeof (zv_request_t)); + + mutex_enter(&zv->zv_state_lock); + zv->zv_reads--; + cv_signal(&zv->zv_read_cv); + mutex_exit(&zv->zv_state_lock); } static MAKE_REQUEST_FN_RET @@ -962,6 +981,12 @@ zvol_request(struct request_queue *q, struct bio *bio) goto out; } + mutex_enter(&zv->zv_state_lock); + while (zv->zv_writes >= blk_queue_nr_requests(zv->zv_queue)) + cv_wait(&zv->zv_write_cv, &zv->zv_state_lock); + zv->zv_writes++; + mutex_exit(&zv->zv_state_lock); + zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP); zvr->zv = zv; zvr->bio = bio; @@ -994,6 +1019,12 @@ zvol_request(struct request_queue *q, struct bio *bio) zvol_write(zvr); } } else { + mutex_enter(&zv->zv_state_lock); + while (zv->zv_reads >= blk_queue_nr_requests(zv->zv_queue)) + cv_wait(&zv->zv_read_cv, &zv->zv_state_lock); + zv->zv_reads++; + mutex_exit(&zv->zv_state_lock); + zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP); zvr->zv = zv; zvr->bio = bio; @@ -1655,6 +1686,10 @@ zvol_alloc(dev_t dev, const char *name) list_link_init(&zv->zv_next); mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&zv->zv_write_cv, NULL, CV_DEFAULT, NULL); + cv_init(&zv->zv_read_cv, NULL, CV_DEFAULT, NULL); + zv->zv_reads = 0; + zv->zv_writes = 0; zv->zv_queue = blk_alloc_queue(GFP_ATOMIC); if (zv->zv_queue == NULL) @@ -1741,7 +1776,12 @@ zvol_free(void *arg) ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS); + ASSERT0(zv->zv_reads); + ASSERT0(zv->zv_writes); + mutex_destroy(&zv->zv_state_lock); + cv_destroy(&zv->zv_write_cv); + cv_destroy(&zv->zv_read_cv); kmem_free(zv, sizeof (zvol_state_t)); }