From f023cc302751d3093d22216c5ecc7fb6aaaec68c Mon Sep 17 00:00:00 2001
From: Giuseppe Di Natale <dinatale2@llnl.gov>
Date: Thu, 29 Jun 2017 16:57:00 -0700
Subject: [PATCH] Enforce request limits on zvols

ZVOLs do not handle heavy random IO works loads. ZVOLs
should limit the number of outstanding in-flight IO requests.
This should improve performance.

Signed-off-by: Giuseppe Di Natale <dinatale2@llnl.gov>
---
 include/linux/blkdev_compat.h |  6 ++++++
 module/zfs/zvol.c             | 40 +++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
index 428664a0b6b3..5b2f0af8b909 100644
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@@ -139,6 +139,12 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
 #endif
 }
 
+static inline unsigned long
+blk_queue_nr_requests(struct request_queue *q)
+{
+	return (q->nr_requests);
+}
+
 #ifndef HAVE_GET_DISK_RO
 static inline int
 get_disk_ro(struct gendisk *disk)
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 60fab5cc68bf..bbffa5b4e098 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -131,6 +131,10 @@ struct zvol_state {
 	kmutex_t		zv_state_lock;	/* protects zvol_state_t */
 	atomic_t		zv_suspend_ref;	/* refcount for suspend */
 	krwlock_t		zv_suspend_lock;	/* suspend lock */
+	kcondvar_t		zv_write_cv;	/* write queue wait */
+	unsigned long		zv_writes;	/* in-flight writes */
+	kcondvar_t		zv_read_cv;	/* read queue wait */
+	unsigned long		zv_reads;	/* in-flight reads */
 };
 
 typedef enum {
@@ -786,6 +790,11 @@ zvol_write(void *arg)
 	generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
+
+	mutex_enter(&zv->zv_state_lock);
+	zv->zv_writes--;
+	cv_signal(&zv->zv_write_cv);
+	mutex_exit(&zv->zv_state_lock);
 }
 
 /*
@@ -873,6 +882,11 @@ zvol_discard(void *arg)
 	generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
+
+	mutex_enter(&zv->zv_state_lock);
+	zv->zv_writes--;
+	cv_signal(&zv->zv_write_cv);
+	mutex_exit(&zv->zv_state_lock);
 }
 
 static void
@@ -914,6 +928,11 @@ zvol_read(void *arg)
 	generic_end_io_acct(READ, &zv->zv_disk->part0, start_jif);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
+
+	mutex_enter(&zv->zv_state_lock);
+	zv->zv_reads--;
+	cv_signal(&zv->zv_read_cv);
+	mutex_exit(&zv->zv_state_lock);
 }
 
 static MAKE_REQUEST_FN_RET
@@ -962,6 +981,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 			goto out;
 		}
 
+		mutex_enter(&zv->zv_state_lock);
+		while (zv->zv_writes >= blk_queue_nr_requests(zv->zv_queue))
+			cv_wait(&zv->zv_write_cv, &zv->zv_state_lock);
+		zv->zv_writes++;
+		mutex_exit(&zv->zv_state_lock);
+
 		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
 		zvr->zv = zv;
 		zvr->bio = bio;
@@ -994,6 +1019,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 				zvol_write(zvr);
 		}
 	} else {
+		mutex_enter(&zv->zv_state_lock);
+		while (zv->zv_reads >= blk_queue_nr_requests(zv->zv_queue))
+			cv_wait(&zv->zv_read_cv, &zv->zv_state_lock);
+		zv->zv_reads++;
+		mutex_exit(&zv->zv_state_lock);
+
 		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
 		zvr->zv = zv;
 		zvr->bio = bio;
@@ -1655,6 +1686,10 @@ zvol_alloc(dev_t dev, const char *name)
 	list_link_init(&zv->zv_next);
 
 	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&zv->zv_write_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&zv->zv_read_cv, NULL, CV_DEFAULT, NULL);
+	zv->zv_reads = 0;
+	zv->zv_writes = 0;
 
 	zv->zv_queue = blk_alloc_queue(GFP_ATOMIC);
 	if (zv->zv_queue == NULL)
@@ -1741,7 +1776,12 @@ zvol_free(void *arg)
 
 	ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS);
 
+	ASSERT0(zv->zv_reads);
+	ASSERT0(zv->zv_writes);
+
 	mutex_destroy(&zv->zv_state_lock);
+	cv_destroy(&zv->zv_write_cv);
+	cv_destroy(&zv->zv_read_cv);
 
 	kmem_free(zv, sizeof (zvol_state_t));
 }