Skip to content

Commit

Permalink
zvol: Support blk-mq for better performance
Browse files Browse the repository at this point in the history
Add support for the kernel's block multiqueue (blk-mq) interface in
the zvol block driver.  blk-mq creates multiple request queues on
different CPUs rather than having a single request queue.  This can
improve zvol performance with multithreaded reads/writes.

This implementation uses the blk-mq interfaces on 4.13 or newer
kernels.  Building against older kernels will fall back to the
older BIO interfaces.

Note that you must set the `zvol_use_blk_mq` module param to
enable the blk-mq API.  It is disabled by default.

Signed-off-by: Tony Hutter <[email protected]>
Issue #12483
  • Loading branch information
tonyhutter committed Jan 6, 2022
1 parent 1135d0a commit 2c5b268
Show file tree
Hide file tree
Showing 12 changed files with 814 additions and 110 deletions.
32 changes: 32 additions & 0 deletions config/kernel-blk-queue.m4
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
])
])

dnl #
dnl # See if kernel supports block multi-queue and blk_status_t.
dnl # blk_status_t represents the new status codes introduced in the 4.13
dnl # kernel patch:
dnl #
dnl # block: introduce new block status code type
dnl #
dnl # We do not currently support the "old" block multi-queue interfaces from
dnl # prior kernels.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
ZFS_LINUX_TEST_SRC([blk_mq], [
#include <linux/blk-mq.h>
], [
struct blk_mq_tag_set tag_set = {0};
(void) blk_mq_alloc_tag_set(&tag_set);
return BLK_STS_OK;
], [$NO_UNUSED_BUT_SET_VARIABLE])
])

AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
ZFS_LINUX_TEST_RESULT([blk_mq], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
], [
AC_MSG_RESULT(no)
])
])

AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
Expand All @@ -326,6 +356,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
ZFS_AC_KERNEL_SRC_BLK_MQ
])

AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
Expand All @@ -339,4 +370,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
ZFS_AC_KERNEL_BLK_MQ
])
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/zpool_influxdb/Makefile
tests/zfs-tests/tests/functional/zvol/Makefile
tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/Makefile
tests/zfs-tests/tests/functional/zvol/zvol_stress/Makefile
tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile
tests/zfs-tests/tests/functional/zvol/zvol_misc/Makefile
tests/zfs-tests/tests/functional/zvol/zvol_swap/Makefile
Expand Down
18 changes: 11 additions & 7 deletions include/os/linux/kernel/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,30 +230,34 @@ errno_to_bi_status(int error)
#ifdef HAVE_BIO_BI_STATUS
#define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status)
#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
#define BIO_END_IO(bio, error) bio_set_bi_status(bio, error)
#define BIO_END_IO(bio, error, finalize) bio_set_bi_status(bio, error, \
finalize)
static inline void
bio_set_bi_status(struct bio *bio, int error)
bio_set_bi_status(struct bio *bio, int error, int finalize)
{
ASSERT3S(error, <=, 0);
bio->bi_status = errno_to_bi_status(-error);
bio_endio(bio);
if (finalize)
bio_endio(bio);
}
#else
#define BIO_END_IO_ERROR(bio) (-(bio->bi_error))
#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x)
#define BIO_END_IO(bio, error) bio_set_bi_error(bio, error)
#define BIO_END_IO(bio, error, finalize) bio_set_bi_error(bio, error, \
finalize)
static inline void
bio_set_bi_error(struct bio *bio, int error)
bio_set_bi_error(struct bio *bio, int error, int finalize)
{
ASSERT3S(error, <=, 0);
bio->bi_error = error;
bio_endio(bio);
if (finalize)
bio_endio(bio);
}
#endif /* HAVE_BIO_BI_STATUS */

#else
#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z)
#define BIO_END_IO(bio, error) bio_endio(bio, error);
#define BIO_END_IO(bio, error, finalize) bio_endio(bio, error);
#endif /* HAVE_1ARG_BIO_END_IO_T */

/*
Expand Down
52 changes: 49 additions & 3 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -2184,9 +2184,55 @@ for each I/O submitter.
When unset, requests are handled asynchronously by a thread pool.
The number of requests which can be handled concurrently is controlled by
.Sy zvol_threads .
.
.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
Max number of threads which can handle zvol I/O requests concurrently.
.Sy zvol_request_sync
is ignored when running on a kernel that supports block multiqueue
.Pq Li blk-mq .
.
.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
The number of threads to use for processing zvol block IOs.
On older
.No non- Ns Li blk-mq
kernels,
.Sy zvol_threads
is the total number of threads to use for all zvols.
On kernels that support
.Li blk-mq
.Sy zvol_threads
is the total number of threads per zvol.
If
.Sy 0
(the default) then internally set
.Sy zvol_threads
to the number of CPUs present.
.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
Set to
.Sy 1
to use the
.Li blk-mq
API for zvols.
Set to
.Sy 0
(the default) to use the legacy zvol APIs.
This setting can give better or worse zvol performance depending on
the workload.
This parameter will only appear if your kernel supports
.Li blk-mq
and is only read and assigned to a zvol at zvol load time.
.
.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
The queue_depth value for the zvol
.Li blk-mq
interface.
This parameter will only appear if your kernel supports
.Li blk-mq
and is only read at zvol load time.
If
.Sy 0
(the default) then use the kernel's default queue depth.
If you set
.Sy zvol_blk_mq_queue_depth
lower than the kernel's minimum queue depth, it will be internally
capped to the kernel's minimum queue depth (currently 4 on 5.15 kernels).
.
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
Defines zvol block devices behaviour when
Expand Down
Loading

0 comments on commit 2c5b268

Please sign in to comment.