From 6c2fc5691615e478d1360101b3ec22906e8ce7a8 Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Tue, 1 Aug 2023 11:27:58 -0400
Subject: [PATCH 01/37] Linux 6.5 compat: register_sysctl_table removed

Additionally, the .child element of ctl_table has been removed in 6.5.
This change adds a new test for the pre-6.5 register_sysctl_table()
function, and uses the old code in that case. If it isn't found, then
the parentage entries in the tables are removed, and the register_sysctl
call is provided the paths of "kernel/spl", "kernel/spl/kmem", and
"kernel/spl/kstat" directly, to populate each subdirectory over three
calls, as is the new API.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15098
---
 config/kernel-register_sysctl_table.m4 | 27 ++++++++++++++++++++++++++
 config/kernel.m4                       |  2 ++
 module/os/linux/spl/spl-proc.c         | 26 ++++++++++++++++++++++---
 3 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 config/kernel-register_sysctl_table.m4

diff --git a/config/kernel-register_sysctl_table.m4 b/config/kernel-register_sysctl_table.m4
new file mode 100644
index 000000000000..f18316b32b6d
--- /dev/null
+++ b/config/kernel-register_sysctl_table.m4
@@ -0,0 +1,27 @@
+dnl #
+dnl # Linux 6.5 removes register_sysctl_table
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
+	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
+		#include <linux/sysctl.h>
+
+		static struct ctl_table dummy_table[] = {
+			{}
+		};
+
+    ],[
+		struct ctl_table_header *h
+			__attribute((unused)) = register_sysctl_table(dummy_table);
+    ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
+	AC_MSG_CHECKING([whether register_sysctl_table exists])
+	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_SYSCTL_REGISTER_TABLE, 1,
+			[sysctl_register_table exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 1487fa2e7793..28bd361d33ff 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -160,6 +160,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
+	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -299,6 +300,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
+	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index 01f5619e1893..bcc356ae55b6 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -624,6 +624,7 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
@@ -634,9 +635,11 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
+#endif
 	{},
 };
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
@@ -648,21 +651,38 @@ static struct ctl_table spl_dir[] = {
 
 static struct ctl_table spl_root[] = {
 	{
-	.procname = "kernel",
-	.mode = 0555,
-	.child = spl_dir,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= spl_dir,
 	},
 	{}
 };
+#endif
 
 int
 spl_proc_init(void)
 {
 	int rc = 0;
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
+#else
+	spl_header = register_sysctl("kernel/spl", spl_table);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+
+	if (register_sysctl("kernel/spl/kmem", spl_kmem_table) == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	if (register_sysctl("kernel/spl/kstat", spl_kstat_table) == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+#endif
 
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {

From c0f075c06b914b02e175f8de670b7b440630c7bc Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Tue, 1 Aug 2023 11:32:38 -0400
Subject: [PATCH 02/37] Linux 6.5 compat: use disk_check_media_change when it
 exists

When disk_check_media_change() exists, then define
zfs_check_media_change() to simply call disk_check_media_change() on
the bd_disk member of its argument. Since disk_check_media_change()
is newer than when revalidate_disk was present in bops, we should
be able to safely do this via a macro, instead of recreating a new
implementation of the inline function that forces revalidation.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15101
---
 include/os/linux/kernel/linux/blkdev_compat.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h
index e0f20ba32008..1641dd92a918 100644
--- a/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/include/os/linux/kernel/linux/blkdev_compat.h
@@ -347,6 +347,7 @@ zfs_check_media_change(struct block_device *bdev)
 #define	vdev_bdev_reread_part(bdev)	zfs_check_media_change(bdev)
 #elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
 #define	vdev_bdev_reread_part(bdev)	disk_check_media_change(bdev->bd_disk)
+#define	zfs_check_media_change(bdev)	disk_check_media_change(bdev->bd_disk)
 #else
 /*
  * This is encountered if check_disk_change() and bdev_check_media_change()

From d76de9fb170d58f81edac5729c365fc3fd60a22f Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Tue, 1 Aug 2023 11:37:20 -0400
Subject: [PATCH 03/37] Linux 6.5 compat: blkdev changes

Multiple changes to the blkdev API were introduced in Linux 6.5. This
includes passing (void* holder) to blkdev_put, adding a new
blk_holder_ops* arg to blkdev_get_by_path, adding a new blk_mode_t type
that replaces uses of fmode_t, and removing an argument from the release
handler on block_device_operations that we weren't using. The open
function definition has also changed to take gendisk* and blk_mode_t, so
update it accordingly, too.

Implement local wrappers for blkdev_get_by_path() and
vdev_blkdev_put() so that the in-line calls are cleaner, and place the
conditionally-compiled implementation details inside of both of these
local wrappers. Both calls are exclusively used within vdev_disk.c, at
this time.

Add blk_mode_is_open_write() to test FMODE_WRITE / BLK_OPEN_WRITE
The wrapper function is now used for testing using the appropriate
method for the kernel, whether the open mode is writable or not.

Emphasize fmode_t arg in zvol_release is not used

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15099
---
 config/kernel-blkdev.m4                       | 84 ++++++++++++++++++-
 config/kernel-block-device-operations.m4      | 35 +++++++-
 include/os/linux/kernel/linux/blkdev_compat.h |  6 ++
 module/os/linux/zfs/vdev_disk.c               | 65 ++++++++++++--
 module/os/linux/zfs/zfs_vnops_os.c            |  2 +-
 module/os/linux/zfs/zpl_ctldir.c              |  2 +-
 module/os/linux/zfs/zvol_os.c                 | 28 ++++++-
 7 files changed, 203 insertions(+), 19 deletions(-)

diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4
index 887acee670ba..e04a2bd2c3b6 100644
--- a/config/kernel-blkdev.m4
+++ b/config/kernel-blkdev.m4
@@ -16,12 +16,63 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [
 	])
 ])
 
+dnl #
+dnl # 6.5.x API change,
+dnl # blkdev_get_by_path() takes 4 args
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
+	ZFS_LINUX_TEST_SRC([blkdev_get_by_path_4arg], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		const char *path = "path";
+		fmode_t mode = 0;
+		void *holder = NULL;
+		struct blk_holder_ops h;
+
+		bdev = blkdev_get_by_path(path, mode, holder, &h);
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
-	AC_MSG_CHECKING([whether blkdev_get_by_path() exists])
+	AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
 	ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
 		AC_MSG_RESULT(yes)
 	], [
-		ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 4 args])
+		ZFS_LINUX_TEST_RESULT([blkdev_get_by_path_4arg], [
+			AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH_4ARG, 1,
+				[blkdev_get_by_path() exists and takes 4 args])
+			AC_MSG_RESULT(yes)
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
+		])
+	])
+])
+
+dnl #
+dnl # 6.5.x API change
+dnl # blk_mode_t was added as a type to supercede some places where fmode_t
+dnl # is used
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T], [
+	ZFS_LINUX_TEST_SRC([blk_mode_t], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		blk_mode_t m __attribute((unused)) = (blk_mode_t)0;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
+	AC_MSG_CHECKING([whether blk_mode_t is defined])
+	ZFS_LINUX_TEST_RESULT([blk_mode_t], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MODE_T, 1, [blk_mode_t is defined])
+	], [
+		AC_MSG_RESULT(no)
 	])
 ])
 
@@ -41,12 +92,35 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT], [
 	])
 ])
 
+dnl #
+dnl # 6.5.x API change.
+dnl # blkdev_put() takes (void* holder) as arg 2
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [
+	ZFS_LINUX_TEST_SRC([blkdev_put_holder], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		void *holder = NULL;
+
+		blkdev_put(bdev, holder);
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
 	AC_MSG_CHECKING([whether blkdev_put() exists])
 	ZFS_LINUX_TEST_RESULT([blkdev_put], [
 		AC_MSG_RESULT(yes)
 	], [
-		ZFS_LINUX_TEST_ERROR([blkdev_put()])
+		AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
+		ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1,
+				[blkdev_put() accepts void* as arg 2])
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_put()])
+		])
 	])
 ])
 
@@ -495,7 +569,9 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
+	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
 	ZFS_AC_KERNEL_SRC_BLKDEV_PUT
+	ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
 	ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART
 	ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV
 	ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV
@@ -510,6 +586,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
 	ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@@ -530,4 +607,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
 	ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
 	ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
 ])
diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4
index 84e39dc8a2f6..d13c1337b1fb 100644
--- a/config/kernel-block-device-operations.m4
+++ b/config/kernel-block-device-operations.m4
@@ -49,12 +49,42 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
 	], [], [])
 ])
 
+dnl #
+dnl # 5.9.x API change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [
+	ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [
+		#include <linux/blkdev.h>
+
+		void blk_release(struct gendisk *g) {
+			(void) g;
+			return;
+		}
+
+		static const struct block_device_operations
+		    bops __attribute__ ((unused)) = {
+			.open		= NULL,
+			.release	= blk_release,
+			.ioctl		= NULL,
+			.compat_ioctl	= NULL,
+		};
+	], [], [])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
-	AC_MSG_CHECKING([whether bops->release() is void])
+	AC_MSG_CHECKING([whether bops->release() is void and takes 2 args])
 	ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [
 		AC_MSG_RESULT(yes)
 	],[
-		ZFS_LINUX_TEST_ERROR([bops->release()])
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether bops->release() is void and takes 1 arg])
+		ZFS_LINUX_TEST_RESULT([block_device_operations_release_void_1arg], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [1],
+				[Define if release() in block_device_operations takes 1 arg])
+		],[
+			ZFS_LINUX_TEST_ERROR([bops->release()])
+		])
 	])
 ])
 
@@ -92,6 +122,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
+	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 ])
 
diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h
index 1641dd92a918..f111e648ccf7 100644
--- a/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/include/os/linux/kernel/linux/blkdev_compat.h
@@ -398,6 +398,12 @@ vdev_lookup_bdev(const char *path, dev_t *dev)
 #endif
 }
 
+#if defined(HAVE_BLK_MODE_T)
+#define	blk_mode_is_open_write(flag)	((flag) & BLK_OPEN_WRITE)
+#else
+#define	blk_mode_is_open_write(flag)	((flag) & FMODE_WRITE)
+#endif
+
 /*
  * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
  */
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 925ee9d9fe9c..48ac55f07034 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -80,9 +80,22 @@ typedef struct dio_request {
 
 static unsigned int zfs_vdev_failfast_mask = 1;
 
+#ifdef HAVE_BLK_MODE_T
+static blk_mode_t
+#else
 static fmode_t
+#endif
 vdev_bdev_mode(spa_mode_t spa_mode)
 {
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = 0;
+
+	if (spa_mode & SPA_MODE_READ)
+		mode |= BLK_OPEN_READ;
+
+	if (spa_mode & SPA_MODE_WRITE)
+		mode |= BLK_OPEN_WRITE;
+#else
 	fmode_t mode = 0;
 
 	if (spa_mode & SPA_MODE_READ)
@@ -90,6 +103,7 @@ vdev_bdev_mode(spa_mode_t spa_mode)
 
 	if (spa_mode & SPA_MODE_WRITE)
 		mode |= FMODE_WRITE;
+#endif
 
 	return (mode);
 }
@@ -197,12 +211,47 @@ vdev_disk_kobj_evt_post(vdev_t *v)
 	}
 }
 
+#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
+/*
+ * Define a dummy struct blk_holder_ops for kernel versions
+ * prior to 6.5.
+ */
+struct blk_holder_ops {};
+#endif
+
+static struct block_device *
+vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
+    const struct blk_holder_ops *hops)
+{
+#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
+#else
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | FMODE_EXCL, holder));
+#endif
+}
+
+static void
+vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
+{
+#ifdef HAVE_BLKDEV_PUT_HOLDER
+	return (blkdev_put(bdev, holder));
+#else
+	return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
+#endif
+}
+
 static int
 vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
     uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	struct block_device *bdev;
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#else
 	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#endif
 	hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
 	vdev_disk_t *vd;
 
@@ -252,15 +301,15 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 					reread_part = B_TRUE;
 			}
 
-			blkdev_put(bdev, mode | FMODE_EXCL);
+			vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
 		}
 
 		if (reread_part) {
-			bdev = blkdev_get_by_path(disk_name, mode | FMODE_EXCL,
-			    zfs_vdev_holder);
+			bdev = vdev_blkdev_get_by_path(disk_name, mode,
+			    zfs_vdev_holder, NULL);
 			if (!IS_ERR(bdev)) {
 				int error = vdev_bdev_reread_part(bdev);
-				blkdev_put(bdev, mode | FMODE_EXCL);
+				vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
 				if (error == 0) {
 					timeout = MSEC2NSEC(
 					    zfs_vdev_open_timeout_ms * 2);
@@ -305,8 +354,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 	hrtime_t start = gethrtime();
 	bdev = ERR_PTR(-ENXIO);
 	while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
-		bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
-		    zfs_vdev_holder);
+		bdev = vdev_blkdev_get_by_path(v->vdev_path, mode,
+		    zfs_vdev_holder, NULL);
 		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
 			/*
 			 * There is no point of waiting since device is removed
@@ -382,8 +431,8 @@ vdev_disk_close(vdev_t *v)
 		return;
 
 	if (vd->vd_bdev != NULL) {
-		blkdev_put(vd->vd_bdev,
-		    vdev_bdev_mode(spa_mode(v->vdev_spa)) | FMODE_EXCL);
+		vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa),
+		    zfs_vdev_holder);
 	}
 
 	rw_destroy(&vd->vd_lock);
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 234c4d5ef0e0..33baac9db06b 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -186,7 +186,7 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 		return (error);
 
 	/* Honor ZFS_APPENDONLY file attribute */
-	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c
index 68a7de78f471..7786444fea35 100644
--- a/module/os/linux/zfs/zpl_ctldir.c
+++ b/module/os/linux/zfs/zpl_ctldir.c
@@ -42,7 +42,7 @@
 static int
 zpl_common_open(struct inode *ip, struct file *filp)
 {
-	if (filp->f_mode & FMODE_WRITE)
+	if (blk_mode_is_open_write(filp->f_mode))
 		return (-EACCES);
 
 	return (generic_file_open(ip, filp));
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 38bc8e2c4eeb..7a95b54bdf0d 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -671,7 +671,11 @@ zvol_request(struct request_queue *q, struct bio *bio)
 }
 
 static int
+#ifdef HAVE_BLK_MODE_T
+zvol_open(struct gendisk *disk, blk_mode_t flag)
+#else
 zvol_open(struct block_device *bdev, fmode_t flag)
+#endif
 {
 	zvol_state_t *zv;
 	int error = 0;
@@ -686,10 +690,14 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 	/*
 	 * Obtain a copy of private_data under the zvol_state_lock to make
 	 * sure that either the result of zvol free code path setting
-	 * bdev->bd_disk->private_data to NULL is observed, or zvol_os_free()
+	 * disk->private_data to NULL is observed, or zvol_os_free()
 	 * is not called on this zv because of the positive zv_open_count.
 	 */
+#ifdef HAVE_BLK_MODE_T
+	zv = disk->private_data;
+#else
 	zv = bdev->bd_disk->private_data;
+#endif
 	if (zv == NULL) {
 		rw_exit(&zvol_state_lock);
 		return (SET_ERROR(-ENXIO));
@@ -769,14 +777,15 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 			}
 		}
 
-		error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
+		error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
 
 		if (drop_namespace)
 			mutex_exit(&spa_namespace_lock);
 	}
 
 	if (error == 0) {
-		if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+		if ((blk_mode_is_open_write(flag)) &&
+		    (zv->zv_flags & ZVOL_RDONLY)) {
 			if (zv->zv_open_count == 0)
 				zvol_last_close(zv);
 
@@ -791,14 +800,25 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 		rw_exit(&zv->zv_suspend_lock);
 
 	if (error == 0)
+#ifdef HAVE_BLK_MODE_T
+		disk_check_media_change(disk);
+#else
 		zfs_check_media_change(bdev);
+#endif
 
 	return (error);
 }
 
 static void
-zvol_release(struct gendisk *disk, fmode_t mode)
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
+zvol_release(struct gendisk *disk)
+#else
+zvol_release(struct gendisk *disk, fmode_t unused)
+#endif
 {
+#if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
+	(void) unused;
+#endif
 	zvol_state_t *zv;
 	boolean_t drop_suspend = B_TRUE;
 

From 0bf2c5365ed3afa65545393d8da2317699f18b30 Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Sun, 23 Jul 2023 01:34:29 -0400
Subject: [PATCH 04/37] Linux 6.4 compat: iter_iov() function now used to get
 old iov member

The iov_iter->iov member is now iov_iter->__iov and must be accessed via
the accessor function iter_iov(). Create a wrapper that is conditionally
compiled to use the access method appropriate for the target kernel
version.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15100
---
 config/kernel-vfs-iov_iter.m4  | 23 +++++++++++++++++++++++
 include/os/linux/spl/sys/uio.h |  6 ++++++
 module/os/linux/zfs/zpl_file.c |  8 +++-----
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4
index e0617faab02c..cc5a7ab0c237 100644
--- a/config/kernel-vfs-iov_iter.m4
+++ b/config/kernel-vfs-iov_iter.m4
@@ -93,6 +93,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 		struct iov_iter iter = { 0 };
 		__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
 	])
+
+	ZFS_LINUX_TEST_SRC([iter_iov], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		__attribute__((unused)) const struct iovec *iov = iter_iov(&iter);
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
@@ -201,4 +209,19 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
 		AC_DEFINE(HAVE_VFS_IOV_ITER, 1,
 		    [All required iov_iter interfaces are available])
 	])
+
+	dnl #
+	dnl # Kernel 6.5 introduces the iter_iov() function that returns the
+	dnl # __iov member of an iov_iter*. The iov member was renamed to this
+	dnl # __iov member, and is intended to be accessed via the helper
+	dnl # function now.
+	dnl #
+	AC_MSG_CHECKING([whether iter_iov() is available])
+	ZFS_LINUX_TEST_RESULT([iter_iov], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ITER_IOV, 1,
+		    [iter_iov() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
 ])
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index fe2b5c07a018..082e930e46b3 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -173,4 +173,10 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
 }
 #endif
 
+#if defined(HAVE_ITER_IOV)
+#define	zfs_uio_iter_iov(iter)	iter_iov((iter))
+#else
+#define	zfs_uio_iter_iov(iter)	(iter)->iov
+#endif
+
 #endif /* SPL_UIO_H */
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index 73526db731c4..aedafd6002b9 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -300,17 +300,15 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
 {
 #if defined(HAVE_VFS_IOV_ITER)
 	zfs_uio_iov_iter_init(uio, to, pos, count, skip);
-#else
-#ifdef HAVE_IOV_ITER_TYPE
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
+#elif defined(HAVE_IOV_ITER_TYPE)
+	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
 	    iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
 #else
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
+	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
 	    to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
 #endif
-#endif
 }
 
 static ssize_t

From 8be6308e85cffb2247753033fbfc3641f731af51 Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Sun, 30 Jul 2023 15:23:47 -0400
Subject: [PATCH 05/37] Linux 4.20 compat: wrapper function for iov_iter type
 access

An iov_iter_type() function to access the "type" member of the struct
iov_iter was added at one point. Move the conditional logic to decide
which method to use for accessing it into a macro and simplify the
zpl_uio_init code.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15100
---
 include/os/linux/spl/sys/uio.h | 6 ++++++
 module/os/linux/zfs/zpl_file.c | 7 ++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index 082e930e46b3..cce097e16fbc 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -179,4 +179,10 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
 #define	zfs_uio_iter_iov(iter)	(iter)->iov
 #endif
 
+#if defined(HAVE_IOV_ITER_TYPE)
+#define	zfs_uio_iov_iter_type(iter)	iov_iter_type((iter))
+#else
+#define	zfs_uio_iov_iter_type(iter)	(iter)->type
+#endif
+
 #endif /* SPL_UIO_H */
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index aedafd6002b9..f6af2ebd1163 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -300,13 +300,10 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
 {
 #if defined(HAVE_VFS_IOV_ITER)
 	zfs_uio_iov_iter_init(uio, to, pos, count, skip);
-#elif defined(HAVE_IOV_ITER_TYPE)
-	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
-	    iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
-	    count, skip);
 #else
 	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
-	    to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
+	    zfs_uio_iov_iter_type(to) & ITER_KVEC ?
+	    UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
 #endif
 }

From 3a68f3c50f82ec87d74b255e3a0db504ff366211 Mon Sep 17 00:00:00 2001
From: Brian Atkinson <batkinson@lanl.gov>
Date: Tue, 1 Aug 2023 17:48:19 -0400
Subject: [PATCH 06/37] Revert "Linux 6.5 compat: register_sysctl_table
 removed"

This reverts commit b35374fd6474603170fd9a3c7503da6eb13ac712 as there
are error messages when loading the SPL module. Errors seemed to be tied
to duplicate a duplicate entry.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
Closes #15134
---
 config/kernel-register_sysctl_table.m4 | 27 --------------------------
 config/kernel.m4                       |  2 --
 module/os/linux/spl/spl-proc.c         | 26 +++----------------------
 3 files changed, 3 insertions(+), 52 deletions(-)
 delete mode 100644 config/kernel-register_sysctl_table.m4

diff --git a/config/kernel-register_sysctl_table.m4 b/config/kernel-register_sysctl_table.m4
deleted file mode 100644
index f18316b32b6d..000000000000
--- a/config/kernel-register_sysctl_table.m4
+++ /dev/null
@@ -1,27 +0,0 @@
-dnl #
-dnl # Linux 6.5 removes register_sysctl_table
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
-	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
-		#include <linux/sysctl.h>
-
-		static struct ctl_table dummy_table[] = {
-			{}
-		};
-
-    ],[
-		struct ctl_table_header *h
-			__attribute((unused)) = register_sysctl_table(dummy_table);
-    ])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
-	AC_MSG_CHECKING([whether register_sysctl_table exists])
-	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
-		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_SYSCTL_REGISTER_TABLE, 1,
-			[sysctl_register_table exists])
-	],[
-		AC_MSG_RESULT([no])
-	])
-])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 28bd361d33ff..1487fa2e7793 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -160,7 +160,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
-	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -300,7 +299,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
-	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index bcc356ae55b6..01f5619e1893 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -624,7 +624,6 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
-#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
@@ -635,11 +634,9 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
-#endif
 	{},
 };
 
-#ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
@@ -651,38 +648,21 @@ static struct ctl_table spl_dir[] = {
 
 static struct ctl_table spl_root[] = {
 	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= spl_dir,
+	.procname = "kernel",
+	.mode = 0555,
+	.child = spl_dir,
 	},
 	{}
 };
-#endif
 
 int
 spl_proc_init(void)
 {
 	int rc = 0;
 
-#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
-#else
-	spl_header = register_sysctl("kernel/spl", spl_table);
-	if (spl_header == NULL)
-		return (-EUNATCH);
-
-	if (register_sysctl("kernel/spl/kmem", spl_kmem_table) == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-
-	if (register_sysctl("kernel/spl/kstat", spl_kstat_table) == NULL) {
-		rc = -EUNATCH;
-		goto out;
-	}
-#endif
 
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {

From 31a4673c05ea942498a278d9dd519f251b501db1 Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Wed, 2 Aug 2023 17:05:46 -0400
Subject: [PATCH 07/37] Linux 6.5 compat: register_sysctl_table removed

Additionally, the .child element of ctl_table has been removed in 6.5.
This change adds a new test for the pre-6.5 register_sysctl_table()
function, and uses the old code in that case. If it isn't found, then
the parentage entries in the tables are removed, and the register_sysctl
call is provided the paths of "kernel/spl", "kernel/spl/kmem", and
"kernel/spl/kstat" directly, to populate each subdirectory over three
calls, as is the new API.

Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15138
---
 config/kernel-register_sysctl_table.m4 | 27 ++++++++++++++++++++++++++
 config/kernel.m4                       |  2 ++
 module/os/linux/spl/spl-proc.c         | 26 ++++++++++++++++++++++---
 3 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 config/kernel-register_sysctl_table.m4

diff --git a/config/kernel-register_sysctl_table.m4 b/config/kernel-register_sysctl_table.m4
new file mode 100644
index 000000000000..a5e934f56d29
--- /dev/null
+++ b/config/kernel-register_sysctl_table.m4
@@ -0,0 +1,27 @@
+dnl #
+dnl # Linux 6.5 removes register_sysctl_table
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
+	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
+		#include <linux/sysctl.h>
+
+		static struct ctl_table dummy_table[] = {
+			{}
+		};
+
+    ],[
+		struct ctl_table_header *h
+			__attribute((unused)) = register_sysctl_table(dummy_table);
+    ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
+	AC_MSG_CHECKING([whether register_sysctl_table exists])
+	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_REGISTER_SYSCTL_TABLE, 1,
+			[register_sysctl_table exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 1487fa2e7793..28bd361d33ff 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -160,6 +160,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
+	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -299,6 +300,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
+	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index 01f5619e1893..bcc356ae55b6 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -624,6 +624,7 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
@@ -634,9 +635,11 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
+#endif
 	{},
 };
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
@@ -648,21 +651,38 @@ static struct ctl_table spl_dir[] = {
 
 static struct ctl_table spl_root[] = {
 	{
-	.procname = "kernel",
-	.mode = 0555,
-	.child = spl_dir,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= spl_dir,
 	},
 	{}
 };
+#endif
 
 int
 spl_proc_init(void)
 {
 	int rc = 0;
 
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
+#else
+	spl_header = register_sysctl("kernel/spl", spl_table);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+
+	if (register_sysctl("kernel/spl/kmem", spl_kmem_table) == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	if (register_sysctl("kernel/spl/kstat", spl_kstat_table) == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+#endif
 
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {

From 5a22de144abf2829bc8112e17a7a7e542da53dc5 Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Mon, 7 Aug 2023 18:47:46 -0400
Subject: [PATCH 08/37] Linux 6.5 compat: replace generic_file_splice_read with
 filemap_splice_read

The generic_file_splice_read function was removed in Linux 6.5 in favor
of filemap_splice_read. Add an autoconf test for filemap_splice_read and
use it if it is found as the handler for .splice_read in the
file_operations struct. Additionally, ITER_PIPE was removed in 6.5. This
change removes the ITER_* macros that OpenZFS doesn't use from being
tested in config/kernel-vfs-iov_iter.m4. The removal of ITER_PIPE was
causing the test to fail, which also affected the code responsible for
setting the .splice_read handler, above. That behavior caused run-time
panics on Linux 6.5.

Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15155
---
 config/kernel-filemap-splice-read.m4 | 25 +++++++++++++++++++++++++
 config/kernel-vfs-iov_iter.m4        |  3 +--
 config/kernel.m4                     |  2 ++
 module/os/linux/zfs/zpl_file.c       |  4 ++++
 4 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 config/kernel-filemap-splice-read.m4

diff --git a/config/kernel-filemap-splice-read.m4 b/config/kernel-filemap-splice-read.m4
new file mode 100644
index 000000000000..5199b7373e4d
--- /dev/null
+++ b/config/kernel-filemap-splice-read.m4
@@ -0,0 +1,25 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP_SPLICE_READ], [
+	dnl #
+	dnl # Kernel 6.5 - generic_file_splice_read was removed in favor
+	dnl # of filemap_splice_read for the .splice_read member of the
+	dnl # file_operations struct.
+	dnl #
+	ZFS_LINUX_TEST_SRC([has_filemap_splice_read], [
+		#include <linux/fs.h>
+
+		struct file_operations fops __attribute__((unused)) = {
+			.splice_read = filemap_splice_read,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FILEMAP_SPLICE_READ], [
+	AC_MSG_CHECKING([whether filemap_splice_read() exists])
+	ZFS_LINUX_TEST_RESULT([has_filemap_splice_read], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILEMAP_SPLICE_READ, 1,
+		    [filemap_splice_read exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4
index cc5a7ab0c237..ff560ff3eef0 100644
--- a/config/kernel-vfs-iov_iter.m4
+++ b/config/kernel-vfs-iov_iter.m4
@@ -6,8 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
-		int type __attribute__ ((unused)) =
-		    ITER_IOVEC | ITER_KVEC | ITER_BVEC | ITER_PIPE;
+		int type __attribute__ ((unused)) = ITER_KVEC;
 	])
 
 	ZFS_LINUX_TEST_SRC([iov_iter_advance], [
diff --git a/config/kernel.m4 b/config/kernel.m4
index 28bd361d33ff..309f1819be48 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -161,6 +161,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
 	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_SRC_FILEMAP_SPLICE_READ
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -301,6 +302,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
 	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_FILEMAP_SPLICE_READ
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index f6af2ebd1163..24cc1064a8fc 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -1323,7 +1323,11 @@ const struct file_operations zpl_file_operations = {
 	.read_iter	= zpl_iter_read,
 	.write_iter	= zpl_iter_write,
 #ifdef HAVE_VFS_IOV_ITER
+#ifdef HAVE_FILEMAP_SPLICE_READ
+	.splice_read	= filemap_splice_read,
+#else
 	.splice_read	= generic_file_splice_read,
+#endif
 	.splice_write	= iter_file_splice_write,
 #endif
 #else

From 58a707375fef23697cb029fbad0e7e92bc78025b Mon Sep 17 00:00:00 2001
From: Coleman Kane <ckane@colemankane.org>
Date: Tue, 8 Aug 2023 18:42:32 -0400
Subject: [PATCH 09/37] Linux 6.5 compat: Use copy_splice_read instead of
 filemap_splice_read

Using the filemap_splice_read function for the splice_read handler was
leading to occasional data corruption under certain circumstances. Favor
using copy_splice_read instead, which does not demonstrate the same
erroneous behavior under the tested failure cases.

Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Coleman Kane <ckane@colemankane.org>
Closes #15164
---
 config/kernel-filemap-splice-read.m4 | 18 +++++++++---------
 config/kernel.m4                     |  4 ++--
 module/os/linux/zfs/zpl_file.c       |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/config/kernel-filemap-splice-read.m4 b/config/kernel-filemap-splice-read.m4
index 5199b7373e4d..4c83b31d738a 100644
--- a/config/kernel-filemap-splice-read.m4
+++ b/config/kernel-filemap-splice-read.m4
@@ -1,24 +1,24 @@
-AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP_SPLICE_READ], [
+AC_DEFUN([ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ], [
 	dnl #
 	dnl # Kernel 6.5 - generic_file_splice_read was removed in favor
-	dnl # of filemap_splice_read for the .splice_read member of the
+	dnl # of copy_splice_read for the .splice_read member of the
 	dnl # file_operations struct.
 	dnl #
-	ZFS_LINUX_TEST_SRC([has_filemap_splice_read], [
+	ZFS_LINUX_TEST_SRC([has_copy_splice_read], [
 		#include <linux/fs.h>
 
 		struct file_operations fops __attribute__((unused)) = {
-			.splice_read = filemap_splice_read,
+			.splice_read = copy_splice_read,
 		};
 	],[])
 ])
 
-AC_DEFUN([ZFS_AC_KERNEL_FILEMAP_SPLICE_READ], [
-	AC_MSG_CHECKING([whether filemap_splice_read() exists])
-	ZFS_LINUX_TEST_RESULT([has_filemap_splice_read], [
+AC_DEFUN([ZFS_AC_KERNEL_COPY_SPLICE_READ], [
+	AC_MSG_CHECKING([whether copy_splice_read() exists])
+	ZFS_LINUX_TEST_RESULT([has_copy_splice_read], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_FILEMAP_SPLICE_READ, 1,
-		    [filemap_splice_read exists])
+		AC_DEFINE(HAVE_COPY_SPLICE_READ, 1,
+		    [copy_splice_read exists])
 	],[
 		AC_MSG_RESULT(no)
 	])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 309f1819be48..df194ec72207 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -161,7 +161,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
 	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
-	ZFS_AC_KERNEL_SRC_FILEMAP_SPLICE_READ
+	ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -302,7 +302,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
 	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
-	ZFS_AC_KERNEL_FILEMAP_SPLICE_READ
+	ZFS_AC_KERNEL_COPY_SPLICE_READ
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index 24cc1064a8fc..3caa0fc6c214 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -1323,8 +1323,8 @@ const struct file_operations zpl_file_operations = {
 	.read_iter	= zpl_iter_read,
 	.write_iter	= zpl_iter_write,
 #ifdef HAVE_VFS_IOV_ITER
-#ifdef HAVE_FILEMAP_SPLICE_READ
-	.splice_read	= filemap_splice_read,
+#ifdef HAVE_COPY_SPLICE_READ
+	.splice_read	= copy_splice_read,
 #else
 	.splice_read	= generic_file_splice_read,
 #endif

From c7ee59a160f74049de1e459b3c3c63671784703f Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Sat, 2 Sep 2023 02:21:40 +0200
Subject: [PATCH 10/37] Linux 6.5 compat: safe cleanup in spl_proc_fini()

If we fail to create a proc entry in spl_proc_init() we may end up
calling unregister_sysctl_table() twice: one in the failure path of
spl_proc_init() and another time during spl_proc_fini().

Avoid the double call to unregister_sysctl_table() and while at it
refactor the code a bit to reduce code duplication.

This was accidentally introduced when the spl code was
updated for Linux 6.5 compatibility.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Closes #15234
Closes #15235
---
 module/os/linux/spl/spl-proc.c | 36 +++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index bcc356ae55b6..5cb5a6dadb05 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -659,6 +659,21 @@ static struct ctl_table spl_root[] = {
 };
 #endif
 
+static void spl_proc_cleanup(void)
+{
+	remove_proc_entry("kstat", proc_spl);
+	remove_proc_entry("slab", proc_spl_kmem);
+	remove_proc_entry("kmem", proc_spl);
+	remove_proc_entry("taskq-all", proc_spl);
+	remove_proc_entry("taskq", proc_spl);
+	remove_proc_entry("spl", NULL);
+
+	if (spl_header) {
+		unregister_sysctl_table(spl_header);
+		spl_header = NULL;
+	}
+}
+
 int
 spl_proc_init(void)
 {
@@ -723,15 +738,8 @@ spl_proc_init(void)
 		goto out;
 	}
 out:
-	if (rc) {
-		remove_proc_entry("kstat", proc_spl);
-		remove_proc_entry("slab", proc_spl_kmem);
-		remove_proc_entry("kmem", proc_spl);
-		remove_proc_entry("taskq-all", proc_spl);
-		remove_proc_entry("taskq", proc_spl);
-		remove_proc_entry("spl", NULL);
-		unregister_sysctl_table(spl_header);
-	}
+	if (rc)
+		spl_proc_cleanup();
 
 	return (rc);
 }
@@ -739,13 +747,5 @@ spl_proc_init(void)
 void
 spl_proc_fini(void)
 {
-	remove_proc_entry("kstat", proc_spl);
-	remove_proc_entry("slab", proc_spl_kmem);
-	remove_proc_entry("kmem", proc_spl);
-	remove_proc_entry("taskq-all", proc_spl);
-	remove_proc_entry("taskq", proc_spl);
-	remove_proc_entry("spl", NULL);
-
-	ASSERT(spl_header != NULL);
-	unregister_sysctl_table(spl_header);
+	spl_proc_cleanup();
 }

From cacc599aa20b6aba0ac8173386cea2f8b435068e Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Thu, 7 Sep 2023 23:36:32 +0200
Subject: [PATCH 11/37] Linux 6.5 compat: spl: properly unregister sysctl
 entries

When register_sysctl_table() is unavailable we fail to properly
unregister sysctl entries under "kernel/spl".

This leads to errors like the following when spl is unloaded/reloaded,
making impossible to properly reload the spl module:

[  746.995704] sysctl duplicate entry: /kernel/spl/kmem/slab_kvmem_total

Fix by cleaning up all the sub-entries inside "kernel/spl" when the
spl module is unloaded.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Closes #15239
---
 module/os/linux/spl/spl-proc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index 5cb5a6dadb05..f0f929d3ce90 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -47,6 +47,10 @@ static unsigned long table_min = 0;
 static unsigned long table_max = ~0;
 
 static struct ctl_table_header *spl_header = NULL;
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+static struct ctl_table_header *spl_kmem = NULL;
+static struct ctl_table_header *spl_kstat = NULL;
+#endif
 static struct proc_dir_entry *proc_spl = NULL;
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
@@ -668,6 +672,16 @@ static void spl_proc_cleanup(void)
 	remove_proc_entry("taskq", proc_spl);
 	remove_proc_entry("spl", NULL);
 
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+	if (spl_kstat) {
+		unregister_sysctl_table(spl_kstat);
+		spl_kstat = NULL;
+	}
+	if (spl_kmem) {
+		unregister_sysctl_table(spl_kmem);
+		spl_kmem = NULL;
+	}
+#endif
 	if (spl_header) {
 		unregister_sysctl_table(spl_header);
 		spl_header = NULL;
@@ -688,12 +702,13 @@ spl_proc_init(void)
 	if (spl_header == NULL)
 		return (-EUNATCH);
 
-	if (register_sysctl("kernel/spl/kmem", spl_kmem_table) == NULL) {
+	spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table);
+	if (spl_kmem == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
-
-	if (register_sysctl("kernel/spl/kstat", spl_kstat_table) == NULL) {
+	spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table);
+	if (spl_kstat == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}

From c011ef8c917473405a769d2b0bab1ead2e49dcc1 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Tue, 12 Sep 2023 12:51:11 -0700
Subject: [PATCH 12/37] Linux 6.5 compat: META (#15265)

Update the META file to reflect compatibility with the 6.5
kernel.

Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 META | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/META b/META
index 0953cc51922f..9ffe90458dbd 100644
--- a/META
+++ b/META
@@ -6,5 +6,5 @@ Release:       rc4
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 6.4
+Linux-Maximum: 6.5
 Linux-Minimum: 3.10

From 11943656f9233086260236e9eef5d752d9abe84c Mon Sep 17 00:00:00 2001
From: George Amanakis <gamanakis@gmail.com>
Date: Tue, 19 Sep 2023 02:06:35 +0200
Subject: [PATCH 13/37] Update the MOS directory on spa_upgrade_errlog()

spa_upgrade_errlog() does not update the MOS directory when the
head_errlog feature is enabled. In this case if spa_errlog_sync() is not
called, the MOS dir references the old errlog_last and errlog_sync
objects. Thus when doing a scrub a panic will occur:

Call Trace:
 dump_stack+0x6d/0x8b
 panic+0x101/0x2e3
 spl_panic+0xcf/0x102 [spl]
 delete_errlog+0x124/0x130 [zfs]
 spa_errlog_sync+0x256/0x260 [zfs]
 spa_sync_iterate_to_convergence+0xe5/0x250 [zfs]
 spa_sync+0x2f7/0x670 [zfs]
 txg_sync_thread+0x22d/0x2d0 [zfs]
 thread_generic_wrapper+0x83/0xa0 [spl]
 kthread+0x104/0x140
 ret_from_fork+0x1f/0x40

Fix this by updating the related MOS directory objects in
spa_upgrade_errlog().

Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #15279
Closes #15277
---
 module/zfs/spa_errlog.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c
index 2e5c22c11490..5dd08f597f33 100644
--- a/module/zfs/spa_errlog.c
+++ b/module/zfs/spa_errlog.c
@@ -930,12 +930,21 @@ spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx)
 	if (spa->spa_errlog_last != 0) {
 		sync_upgrade_errlog(spa, spa->spa_errlog_last, &newobj, tx);
 		spa->spa_errlog_last = newobj;
+
+		(void) zap_update(spa->spa_meta_objset,
+		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
+		    sizeof (uint64_t), 1, &spa->spa_errlog_last, tx);
 	}
 
 	if (spa->spa_errlog_scrub != 0) {
 		sync_upgrade_errlog(spa, spa->spa_errlog_scrub, &newobj, tx);
 		spa->spa_errlog_scrub = newobj;
+
+		(void) zap_update(spa->spa_meta_objset,
+		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
+		    sizeof (uint64_t), 1, &spa->spa_errlog_scrub, tx);
 	}
+
 	mutex_exit(&spa->spa_errlog_lock);
 }
 

From b9b9cdcdb148b50b8b647eb4bc300ec6c4ffd916 Mon Sep 17 00:00:00 2001
From: ednadolski-ix <137826107+ednadolski-ix@users.noreply.github.com>
Date: Sat, 9 Sep 2023 11:23:29 -0600
Subject: [PATCH 14/37] update max_variance limit in zdb_block_size_histogram
 test for CI

Commit 2d7843401a628ef8c483229742dd58bca70bc27e had previously
updated this hardcoded limit to allow for CI testing. As there
is no deterministic pass/fail value, the need has arisen for
one more small increase.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Edmund Nadolski <edmund.nadolski@ixsystems.com>
Closes #15252
---
 .../functional/cli_root/zdb/zdb_block_size_histogram.ksh  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
index 0a4d24fa695a..cfa26f54b11f 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
@@ -204,11 +204,11 @@ function histo_check_test_pool
 	# 4096 blocksize count for asize.   For verification we stick
 	# to just lsize counts.
 	#
-	# The max_variance is hard-coded here at 12% to leave us some
-	# margin.  Testing has shown this normally to be in the range
-	# of 2%-8%, but it may be as large as 11%.
+	# Variances are expected since this test does not account for
+	# metadata. The hardcoded limit here is empirical and should
+	# not be construed as deterministic.
 	###################
-	let max_variance=12
+	let max_variance=15
 	let fail_value=0
 	let error_count=0
 	log_note "Comparisons for ${pool}"

From 228b064d1b0acb991b7e27502f59f2ce22de8c4b Mon Sep 17 00:00:00 2001
From: Laura Hild <lsh@jlab.org>
Date: Mon, 11 Sep 2023 17:58:19 -0400
Subject: [PATCH 15/37] Remove implication that child `disk`s aren't vdevs in
 zpoolconcepts(7)

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Laura Hild <lsh@jlab.org>
Closes #15247
---
 man/man7/zpoolconcepts.7 | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7
index db3fd4926236..98f3ee7cd660 100644
--- a/man/man7/zpoolconcepts.7
+++ b/man/man7/zpoolconcepts.7
@@ -203,11 +203,9 @@ For more information, see the
 section.
 .El
 .Pp
-Virtual devices cannot be nested, so a mirror or raidz virtual device can only
-contain files or disks.
-Mirrors of mirrors
-.Pq or other combinations
-are not allowed.
+Virtual devices cannot be nested arbitrarily.
+A mirror, raidz or draid virtual device can only be created with files or disks.
+Mirrors of mirrors or other such combinations are not allowed.
 .Pp
 A pool can have any number of virtual devices at the top of the configuration
 .Po known as

From 0ce7a068e9ec894cabef8f87c8603a06df055dd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 18 Sep 2023 18:08:41 +0200
Subject: [PATCH 16/37] check-zstd-symbols: also ignore __pfx_ symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b341b20d648bb7e9a3307c33163e7399f0913e66

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #15282
Closes #15284
---
 module/Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module/Makefile.in b/module/Makefile.in
index 5b71e1abf79e..9b34b3dfaec7 100644
--- a/module/Makefile.in
+++ b/module/Makefile.in
@@ -168,4 +168,4 @@ gen-zstd-symbols:
 	for obj in $(addprefix zstd/,$(ZSTD_UPSTREAM_OBJS)); do echo; echo "/* $${obj#zstd/}: */"; @OBJDUMP@ -t $$obj | awk '$$2 == "g" && !/ zfs_/ {print "#define\t" $$6 " zfs_" $$6}' | sort; done >> zstd/include/zstd_compat_wrapper.h
 
 check-zstd-symbols:
-	@OBJDUMP@ -t $(addprefix zstd/,$(ZSTD_UPSTREAM_OBJS)) | awk '/file format/ {print}  $$2 == "g" && !/ zfs_/ {++ret; print}  END {exit ret}'
+	@OBJDUMP@ -t $(addprefix zstd/,$(ZSTD_UPSTREAM_OBJS)) | awk '/file format/ {print}  $$2 == "g" && (!/ zfs_/ && !/ __pfx_zfs_/) {++ret; print}  END {exit ret}'

From 54c6fbd378eaa402eff34acf6a91c02d6cf9da11 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Mon, 18 Sep 2023 16:25:58 -0700
Subject: [PATCH 17/37] zed: Allow autoreplace and fault LEDs for removed vdevs

Allow zed to autoreplace vdevs marked as REMOVED.  Also update
statechange-led zedlet to toggle fault LEDs for REMOVED vdevs.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15281
---
 cmd/zed/agents/zfs_mod.c         | 1 +
 cmd/zed/zed.d/statechange-led.sh | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index a8d084bb4bd3..2f040ff7582c 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -372,6 +372,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	/* Only autoreplace bad disks */
 	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
+	    (vs->vs_state != VDEV_STATE_REMOVED) &&
 	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
 		zed_log_msg(LOG_INFO, "  not autoreplacing since disk isn't in "
 		    "a bad state (currently %llu)", vs->vs_state);
diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh
index 46bfc1b866f1..40cb61f17307 100755
--- a/cmd/zed/zed.d/statechange-led.sh
+++ b/cmd/zed/zed.d/statechange-led.sh
@@ -121,7 +121,7 @@ state_to_val()
 {
 	state="$1"
 	case "$state" in
-		FAULTED|DEGRADED|UNAVAIL)
+		FAULTED|DEGRADED|UNAVAIL|REMOVED)
 			echo 1
 			;;
 		ONLINE)

From f7a07d76ee5a6b698540c6873f194350539a7065 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 19 Sep 2023 01:53:33 +0200
Subject: [PATCH 18/37] Retire z_nr_znodes

Added in ab26409db753 ("Linux 3.1 compat, super_block->s_shrink"), with
the only consumer which needed the count getting retired in 066e82522101
("Linux compat: Minimum kernel version 3.10").

The counter gets in the way of not maintaining the list to begin with.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #15274
---
 include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 1 -
 include/os/linux/zfs/sys/zfs_vfsops_os.h   | 1 -
 module/os/freebsd/zfs/zfs_vfsops.c         | 8 +++-----
 module/os/freebsd/zfs/zfs_znode.c          | 2 --
 module/os/linux/zfs/zfs_ctldir.c           | 1 -
 module/os/linux/zfs/zfs_vfsops.c           | 7 +++----
 module/os/linux/zfs/zfs_znode.c            | 2 --
 7 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index f765d38dbac8..24bb03575f33 100644
--- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -93,7 +93,6 @@ struct zfsvfs {
 	zfs_teardown_lock_t z_teardown_lock;
 	zfs_teardown_inactive_lock_t z_teardown_inactive_lock;
 	list_t		z_all_znodes;	/* all vnodes in the fs */
-	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
 	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
 	struct zfsctl_root	*z_ctldir;	/* .zfs directory pointer */
 	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h
index e320b8de4222..b4d5db21f5e5 100644
--- a/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -105,7 +105,6 @@ struct zfsvfs {
 	rrmlock_t	z_teardown_lock;
 	krwlock_t	z_teardown_inactive_lock;
 	list_t		z_all_znodes;	/* all znodes in the fs */
-	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
 	unsigned long	z_rollback_time; /* last online rollback time */
 	unsigned long	z_snap_defer_time; /* last snapshot unmount deferral */
 	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 33759fa26169..e8b9ada1316b 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -1154,7 +1154,6 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_lock);
-	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
 	list_destroy(&zfsvfs->z_all_znodes);
 	ZFS_TEARDOWN_DESTROY(zfsvfs);
 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
@@ -1558,12 +1557,11 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 		 * may add the parents of dir-based xattrs to the taskq
 		 * so we want to wait for these.
 		 *
-		 * We can safely read z_nr_znodes without locking because the
-		 * VFS has already blocked operations which add to the
-		 * z_all_znodes list and thus increment z_nr_znodes.
+		 * We can safely check z_all_znodes for being empty because the
+		 * VFS has already blocked operations which add to it.
 		 */
 		int round = 0;
-		while (zfsvfs->z_nr_znodes > 0) {
+		while (!list_is_empty(&zfsvfs->z_all_znodes)) {
 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)), 0);
 			if (++round > 1 && !unmounting)
diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c
index c4f2b722ef4e..0d4c94555c6b 100644
--- a/module/os/freebsd/zfs/zfs_znode.c
+++ b/module/os/freebsd/zfs/zfs_znode.c
@@ -537,7 +537,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
 	zp->z_zfsvfs = zfsvfs;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
@@ -1286,7 +1285,6 @@ zfs_znode_free(znode_t *zp)
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	POINTER_INVALIDATE(&zp->z_zfsvfs);
 	list_remove(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes--;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 #if __FreeBSD_version >= 1300139
diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c
index c45a3eb5a4eb..02cb379ea840 100644
--- a/module/os/linux/zfs/zfs_ctldir.c
+++ b/module/os/linux/zfs/zfs_ctldir.c
@@ -537,7 +537,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
 	membar_producer();
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 464c12e1108d..a1db5c57c18b 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -1330,12 +1330,11 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 		 * may add the parents of dir-based xattrs to the taskq
 		 * so we want to wait for these.
 		 *
-		 * We can safely read z_nr_znodes without locking because the
-		 * VFS has already blocked operations which add to the
-		 * z_all_znodes list and thus increment z_nr_znodes.
+		 * We can safely check z_all_znodes for being empty because the
+		 * VFS has already blocked operations which add to it.
 		 */
 		int round = 0;
-		while (zfsvfs->z_nr_znodes > 0) {
+		while (!list_is_empty(&zfsvfs->z_all_znodes)) {
 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)), 0);
 			if (++round > 1 && !unmounting)
diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c
index 335ae3460c58..52c8e51df659 100644
--- a/module/os/linux/zfs/zfs_znode.c
+++ b/module/os/linux/zfs/zfs_znode.c
@@ -390,7 +390,6 @@ zfs_inode_destroy(struct inode *ip)
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	if (list_link_active(&zp->z_link_node)) {
 		list_remove(&zfsvfs->z_all_znodes, zp);
-		zfsvfs->z_nr_znodes--;
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
@@ -641,7 +640,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	if (links > 0)

From 472335781e50d7864c802184cfaaf35ad60d6b5d Mon Sep 17 00:00:00 2001
From: Umer Saleem <usaleem@ixsystems.com>
Date: Thu, 24 Aug 2023 20:11:59 +0500
Subject: [PATCH 19/37] Do not enable zfs-share.service

zfs-share.service executes `zfs share` on every boot to share any
filesystem/s, that are shared over SMB and/or NFS using the
sharesmb and sharenfs properties.

Since we do not rely on these properties to share over SMB and NFS
and the service fails on boot on TrueNAS if sharesmb and/or
sharenfs properties are set, and we rely on middleware to control
the SMB and NFS shares, zfs-share.service should be disabled for
TrueNAS SCALE.

Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
---
 contrib/debian/rules.in                 | 1 -
 etc/systemd/system/50-zfs.preset        | 2 +-
 etc/systemd/system/zfs-share.service.in | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/contrib/debian/rules.in b/contrib/debian/rules.in
index 9ff28b663b5f..e8cb5aa0ce3d 100755
--- a/contrib/debian/rules.in
+++ b/contrib/debian/rules.in
@@ -168,7 +168,6 @@ override_dh_installinit:
 	dh_installinit -r --no-restart-after-upgrade --name zfs-import
 	dh_installinit -r --no-restart-after-upgrade --name zfs-mount
 	dh_installinit -r --no-restart-after-upgrade --name zfs-load-key
-	dh_installinit -R --name zfs-share
 	dh_installinit -R --name zfs-zed
 
 override_dh_installsystemd:
diff --git a/etc/systemd/system/50-zfs.preset b/etc/systemd/system/50-zfs.preset
index e4056a92cd98..d2da85d330eb 100644
--- a/etc/systemd/system/50-zfs.preset
+++ b/etc/systemd/system/50-zfs.preset
@@ -3,7 +3,7 @@ enable zfs-import-cache.service
 disable zfs-import-scan.service
 enable zfs-import.target
 enable zfs-mount.service
-enable zfs-share.service
+disable zfs-share.service
 enable zfs-zed.service
 enable zfs-volume-wait.service
 enable zfs.target
diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in
index 1a6342a06fec..7c90f74a684a 100644
--- a/etc/systemd/system/zfs-share.service.in
+++ b/etc/systemd/system/zfs-share.service.in
@@ -17,4 +17,4 @@ EnvironmentFile=-@initconfdir@/zfs
 ExecStart=@sbindir@/zfs share -a
 
 [Install]
-WantedBy=zfs.target
+WantedBy=multi-user.target

From 86c1cb8c82477876a2a462341687bb0e7328cc5f Mon Sep 17 00:00:00 2001
From: Umer Saleem <usaleem@ixsystems.com>
Date: Tue, 5 Sep 2023 13:27:53 +0500
Subject: [PATCH 20/37] Update the behavior of mountpoint property

There are some inconsistencies in the handling of mountpoint
property. This commit updates the behavior and makes it
consistent.

If mountpoint property is set when dataset is unmounted, this
would update the mountpoint property. The mountpoint could be
valid or invalid in this case. Setting the mountpoint property
would result in success in this case. Dataset would still be
unmounted here.

On the other hand, if dataset is mounted and mountpoint
property is updated to something invalid where mount cannot be
successful, for example, setting the mountpoint inside a readonly
directory. This would unmount the dataset, set the mountpoint
property to requested value and tries to mount the dataset. The
mount operation returns error and this error is treated as
overall failure of setting the property while the property is
actually set.

To make the behavior consistent in case dataset is mounted or
unmounted, we should try to mount the dataset whenever mountpoint
property is updated. This would result in mounting the datasets
if canmount property is set to on, regardless if the dataset was
previously unmounted.

The failure in mount operation while setting the mountpoint
property should not be treated as failure, since the property is
actually set now to user requested value.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
Closes #15240
---
 cmd/zfs/zfs_main.c                                |  7 ++++---
 lib/libzfs/libzfs_changelist.c                    |  8 ++++----
 .../cli_root/zfs_mount/zfs_mount_006_pos.ksh      |  2 +-
 .../cli_root/zfs_mount/zfs_mount_008_pos.ksh      |  4 ++--
 .../cli_root/zfs_mount/zfs_mount_012_pos.ksh      | 15 +++++++--------
 .../cli_root/zfs_set/mountpoint_002_pos.ksh       | 12 +++++++++---
 .../cli_root/zfs_set/zfs_set_003_neg.ksh          | 10 +++++++---
 7 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 5ed25d1ea720..b0f5d7024951 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -4198,8 +4198,9 @@ static int
 set_callback(zfs_handle_t *zhp, void *data)
 {
 	nvlist_t *props = data;
+	int ret = zfs_prop_set_list(zhp, props);
 
-	if (zfs_prop_set_list(zhp, props) != 0) {
+	if (ret != 0 || libzfs_errno(g_zfs) != EZFS_SUCCESS) {
 		switch (libzfs_errno(g_zfs)) {
 		case EZFS_MOUNTFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
@@ -4208,11 +4209,11 @@ set_callback(zfs_handle_t *zhp, void *data)
 		case EZFS_SHARENFSFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to reshare filesystem\n"));
+			ret = 1;
 			break;
 		}
-		return (1);
 	}
-	return (0);
+	return (ret);
 }
 
 static int
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
index dd14c570ec03..4b0f66964346 100644
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -244,13 +244,13 @@ changelist_postfix(prop_changelist_t *clp)
 		    zfs_is_mounted(cn->cn_handle, NULL);
 
 		if (!mounted && !needs_key && (cn->cn_mounted ||
-		    ((sharenfs || sharesmb || clp->cl_waslegacy) &&
+		    (((clp->cl_prop == ZFS_PROP_MOUNTPOINT &&
+		    clp->cl_prop == clp->cl_realprop) ||
+		    sharenfs || sharesmb || clp->cl_waslegacy) &&
 		    (zfs_prop_get_int(cn->cn_handle,
 		    ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {
 
-			if (zfs_mount(cn->cn_handle, NULL, 0) != 0)
-				errors++;
-			else
+			if (zfs_mount(cn->cn_handle, NULL, 0) == 0)
 				mounted = TRUE;
 		}
 
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh
index 2a2466f65c02..e9ab472795eb 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_006_pos.ksh
@@ -94,7 +94,7 @@ while (( depth < MAXDEPTH )); do
 done
 
 log_must zfs set mountpoint=$mtpt $TESTPOOL/$TESTFS
-log_must zfs $mountcmd $TESTPOOL/$TESTFS
+log_must ismounted $TESTPOOL/$TESTFS
 
 log_must zfs set overlay=off $TESTPOOL/$TESTFS
 if ! is_illumos; then
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh
index 2c1029d551cf..0437c61a2c40 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_008_pos.ksh
@@ -71,7 +71,7 @@ log_must mkfile 1M $testfile $testfile1
 
 log_must zfs unmount $fs1
 log_must zfs set mountpoint=$mntpnt $fs1
-log_must zfs mount $fs1
+log_must ismounted $fs1
 log_must zfs unmount $fs1
 log_must zfs mount -O $fs1
 
@@ -85,7 +85,7 @@ log_must ls $mntpnt/$TESTFILE1 $mntpnt/$TESTFILE2
 # Verify $TESTFILE2 was created in $fs1, rather than $fs
 log_must zfs unmount $fs1
 log_must zfs set mountpoint=$mntpnt1 $fs1
-log_must zfs mount $fs1
+log_must ismounted $fs1
 log_must ls $testfile1 $mntpnt1/$TESTFILE2
 
 # Verify $TESTFILE2 was not created in $fs, and $fs is accessible again.
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh
index 5ff094d2c479..66958f2f0884 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_pos.ksh
@@ -25,13 +25,12 @@
 # STRATEGY:
 # 1. Unmount the dataset
 # 2. Create a new empty directory
-# 3. Set the dataset's mountpoint
-# 4. Attempt to mount the dataset
-# 5. Verify the mount succeeds
-# 6. Unmount the dataset
-# 7. Create a file in the directory created in step 2
-# 8. Attempt to mount the dataset
-# 9. Verify the mount succeeds
+# 3. Set the dataset's mountpoint, this should mount the dataset
+# 4. Verify the mount succeeds
+# 5. Unmount the dataset
+# 6. Create a file in the directory created in step 2
+# 7. Attempt to mount the dataset
+# 8. Verify the mount succeeds
 #
 
 verify_runnable "both"
@@ -43,7 +42,7 @@ fs=$TESTPOOL/$TESTFS
 log_must zfs umount $fs
 log_must mkdir -p $TESTDIR
 log_must zfs set mountpoint=$TESTDIR $fs
-log_must zfs mount $fs
+log_must ismounted $fs
 log_must zfs umount $fs
 log_must touch $TESTDIR/testfile.$$
 log_must zfs mount $fs
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh
index a5785226e02e..c227a6fb8aa8 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/mountpoint_002_pos.ksh
@@ -35,7 +35,9 @@
 #
 # DESCRIPTION:
 #	If ZFS is currently managing the file system but it is currently unmounted,
-#	and the mountpoint property is changed, the file system remains unmounted.
+#	and the mountpoint property is changed, the file system should be mounted
+#	if it is a valid mountpoint and canmount allows to mount, otherwise it
+#	should not be mounted.
 #
 # STRATEGY:
 # 1. Setup a pool and create fs, ctr within it.
@@ -62,7 +64,7 @@ function cleanup
 }
 
 log_assert "Setting a valid mountpoint for an unmounted file system, \
-	it remains unmounted."
+	it gets mounted."
 log_onexit cleanup
 
 old_fs_mpt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
@@ -83,7 +85,11 @@ while (( i < ${#dataset[@]} )); do
 	while (( j < ${#values[@]} )); do
 		set_n_check_prop "${values[j]}" "mountpoint" \
 			"${dataset[i]}"
-		log_mustnot ismounted ${dataset[i]}
+		if [ "${dataset[i]}" = "$TESTPOOL/$TESTFS" ]; then
+			log_must ismounted ${dataset[i]}
+		else
+			log_mustnot ismounted ${dataset[i]}
+		fi
 		(( j += 1 ))
 	done
 	cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh
index 3afb0eb7010e..5901ba7dc461 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg.ksh
@@ -33,7 +33,9 @@
 
 #
 # DESCRIPTION:
-# 'zfs set mountpoint/sharenfs' should fail when the mountpoint is invalid
+# 'zfs set mountpoint/sharenfs' should set the property when mountpoint
+#  is invalid. Setting the property should be successful, but dataset
+# should not be mounted, as mountpoint is invalid.
 #
 # STRATEGY:
 # 1. Create invalid scenarios
@@ -62,10 +64,12 @@ longpath=$(gen_dataset_name 1030 "abcdefg")
 log_must zfs create -o mountpoint=legacy $TESTPOOL/foo
 
 # Do the negative testing about "property may be set but unable to remount filesystem"
-log_mustnot eval "zfs set mountpoint=$badpath $TESTPOOL/foo >/dev/null 2>&1"
+set_n_check_prop "$badpath" "mountpoint" "$TESTPOOL/foo"
+log_mustnot ismounted $TESTPOOL/foo
 
 # Do the negative testing about "property may be set but unable to reshare filesystem"
-log_mustnot eval "zfs set sharenfs=on $TESTPOOL/foo >/dev/null 2>&1"
+set_n_check_prop "on" "sharenfs" "$TESTPOOL/foo"
+log_mustnot ismounted $TESTPOOL/foo
 
 # Do the negative testing about "sharenfs property can not be set to null"
 log_mustnot eval "zfs set sharenfs= $TESTPOOL/foo >/dev/null 2>&1"

From 22da5ae59fbf8902f4897c2d417f5673591cc8e7 Mon Sep 17 00:00:00 2001
From: Umer Saleem <usaleem@ixsystems.com>
Date: Tue, 5 Sep 2023 13:33:58 +0500
Subject: [PATCH 21/37] Improve the handling of sharesmb,sharenfs properties

For sharesmb and sharenfs properties, the status of setting the
property is tied with whether we succeed to share the dataset or
not. In case sharing the dataset is not successful, this is
treated as overall failure of setting the property. In this case,
if we check the property after the failure, it is set to on.

This commit updates this behavior and the status of setting the
share properties is not returned as failure, when we fail to
share the dataset.

For sharenfs property, if access list is provided, the syntax
errors in access list/host adresses are not validated until after
setting the property during postfix phase while trying to
share the dataset. This is not correct, since the property has
already been set when we reach there.

Syntax errors in access list/host addresses are validated while
validating the property list, before setting the property and
failure is returned to user in this case when there are errors
in access list.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
Closes #15240
---
 cmd/zfs/zfs_main.c             |  1 -
 lib/libshare/os/freebsd/nfs.c  |  3 ++-
 lib/libshare/os/linux/nfs.c    | 47 +++++++++++++++++++++++++++++++---
 lib/libzfs/libzfs_changelist.c | 11 ++++----
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index b0f5d7024951..22c28740e41b 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -4209,7 +4209,6 @@ set_callback(zfs_handle_t *zhp, void *data)
 		case EZFS_SHARENFSFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to reshare filesystem\n"));
-			ret = 1;
 			break;
 		}
 	}
diff --git a/lib/libshare/os/freebsd/nfs.c b/lib/libshare/os/freebsd/nfs.c
index 521631c51f07..d9fc66106369 100644
--- a/lib/libshare/os/freebsd/nfs.c
+++ b/lib/libshare/os/freebsd/nfs.c
@@ -161,7 +161,8 @@ nfs_is_shared(sa_share_impl_t impl_share)
 static int
 nfs_validate_shareopts(const char *shareopts)
 {
-	(void) shareopts;
+	if (strlen(shareopts) == 0)
+		return (SA_SYNTAX_ERR);
 	return (SA_OK);
 }
 
diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
index c27e5564c1e1..004946b0cfe4 100644
--- a/lib/libshare/os/linux/nfs.c
+++ b/lib/libshare/os/linux/nfs.c
@@ -319,12 +319,49 @@ get_linux_shareopts_cb(const char *key, const char *value, void *cookie)
 	    "wdelay" };
 
 	char **plinux_opts = (char **)cookie;
+	char *host, *val_dup, *literal, *next;
 
-	/* host-specific options, these are taken care of elsewhere */
-	if (strcmp(key, "ro") == 0 || strcmp(key, "rw") == 0 ||
-	    strcmp(key, "sec") == 0)
+	if (strcmp(key, "sec") == 0)
 		return (SA_OK);
 
+	if (strcmp(key, "ro") == 0 || strcmp(key, "rw") == 0) {
+		if (value == NULL || strlen(value) == 0)
+			return (SA_OK);
+		val_dup = strdup(value);
+		host = val_dup;
+		if (host == NULL)
+			return (SA_NO_MEMORY);
+		do {
+			if (*host == '[') {
+				host++;
+				literal = strchr(host, ']');
+				if (literal == NULL) {
+					free(val_dup);
+					return (SA_SYNTAX_ERR);
+				}
+				if (literal[1] == '\0')
+					next = NULL;
+				else if (literal[1] == '/') {
+					next = strchr(literal + 2, ':');
+					if (next != NULL)
+						++next;
+				} else if (literal[1] == ':')
+					next = literal + 2;
+				else {
+					free(val_dup);
+					return (SA_SYNTAX_ERR);
+				}
+			} else {
+				next = strchr(host, ':');
+				if (next != NULL)
+					++next;
+			}
+			host = next;
+		} while (host != NULL);
+		free(val_dup);
+		return (SA_OK);
+	}
+
 	if (strcmp(key, "anon") == 0)
 		key = "anonuid";
 
@@ -472,6 +509,10 @@ static int
 nfs_validate_shareopts(const char *shareopts)
 {
 	char *linux_opts = NULL;
+
+	if (strlen(shareopts) == 0)
+		return (SA_SYNTAX_ERR);
+
 	int error = get_linux_shareopts(shareopts, &linux_opts);
 	if (error != SA_OK)
 		return (error);
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
index 4b0f66964346..efe1c0c06035 100644
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -174,7 +174,6 @@ changelist_postfix(prop_changelist_t *clp)
 	prop_changenode_t *cn;
 	uu_avl_walk_t *walk;
 	char shareopts[ZFS_MAXPROPLEN];
-	int errors = 0;
 	boolean_t commit_smb_shares = B_FALSE;
 	boolean_t commit_nfs_shares = B_FALSE;
 
@@ -262,19 +261,19 @@ changelist_postfix(prop_changelist_t *clp)
 		const enum sa_protocol nfs[] =
 		    {SA_PROTOCOL_NFS, SA_NO_PROTOCOL};
 		if (sharenfs && mounted) {
-			errors += zfs_share(cn->cn_handle, nfs);
+			zfs_share(cn->cn_handle, nfs);
 			commit_nfs_shares = B_TRUE;
 		} else if (cn->cn_shared || clp->cl_waslegacy) {
-			errors += zfs_unshare(cn->cn_handle, NULL, nfs);
+			zfs_unshare(cn->cn_handle, NULL, nfs);
 			commit_nfs_shares = B_TRUE;
 		}
 		const enum sa_protocol smb[] =
 		    {SA_PROTOCOL_SMB, SA_NO_PROTOCOL};
 		if (sharesmb && mounted) {
-			errors += zfs_share(cn->cn_handle, smb);
+			zfs_share(cn->cn_handle, smb);
 			commit_smb_shares = B_TRUE;
 		} else if (cn->cn_shared || clp->cl_waslegacy) {
-			errors += zfs_unshare(cn->cn_handle, NULL, smb);
+			zfs_unshare(cn->cn_handle, NULL, smb);
 			commit_smb_shares = B_TRUE;
 		}
 	}
@@ -288,7 +287,7 @@ changelist_postfix(prop_changelist_t *clp)
 	zfs_commit_shares(proto);
 	uu_avl_walk_end(walk);
 
-	return (errors ? -1 : 0);
+	return (0);
 }
 
 /*

From 62677576a75e94396e945c4ecd9372f5d34e50cb Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Wed, 20 Sep 2023 14:17:11 -0400
Subject: [PATCH 22/37] ZIL: Fix potential race on flush deferring.

zil_lwb_set_zio_dependency() can not set write ZIO dependency on
previous LWB's write ZIO if one is already in done handler and set
state to LWB_STATE_WRITE_DONE.  So theoretically done handler of
next LWB's write ZIO may run before done handler of previous LWB
write ZIO completes.  In such case we can not defer flushes, since
the flush issue process is not locked.

This may fix some reported assertions of lwb_vdev_tree not being
empty inside zil_free_lwb().

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15278
---
 module/zfs/zil.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index b30676b42d88..9e9c9c22549d 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -1550,7 +1550,16 @@ zil_lwb_write_done(zio_t *zio)
 	lwb->lwb_state = LWB_STATE_WRITE_DONE;
 	lwb->lwb_child_zio = NULL;
 	lwb->lwb_write_zio = NULL;
+
+	/*
+	 * If nlwb is not yet issued, zil_lwb_set_zio_dependency() is not
+	 * called for it yet, and when it will be, it won't be able to make
+	 * its write ZIO a parent this ZIO.  In such case we can not defer
+	 * our flushes or below may be a race between the done callbacks.
+	 */
 	nlwb = list_next(&zilog->zl_lwb_list, lwb);
+	if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED)
+		nlwb = NULL;
 	mutex_exit(&zilog->zl_lock);
 
 	if (avl_numnodes(t) == 0)

From 1c2aee7a5222b1b65fea07dd7ee18ae14c3f7be2 Mon Sep 17 00:00:00 2001
From: Rob N <robn@despairlabs.com>
Date: Wed, 20 Sep 2023 01:48:02 +1000
Subject: [PATCH 23/37] tests: install missing PAM tests

'pam_change_unmounted' and 'pam_recursive' both exist and are referenced
by the test run config, but weren't being installed and so are excluded.
This gets them installed so they will run as expected.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Kay Pedersen <mail@mkwg.de>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Closes #15291
---
 tests/zfs-tests/tests/Makefile.am | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index 66aff5026f8f..1a58e6f774e9 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1588,7 +1588,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/online_offline/setup.ksh \
 	functional/pam/cleanup.ksh \
 	functional/pam/pam_basic.ksh \
+	functional/pam/pam_change_unmounted.ksh \
 	functional/pam/pam_nounmount.ksh \
+	functional/pam/pam_recursive.ksh \
 	functional/pam/pam_short_password.ksh \
 	functional/pam/setup.ksh \
 	functional/pool_checkpoint/checkpoint_after_rewind.ksh \

From cc75c816c539d22c56aa912d5db19827a16dcbf3 Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Tue, 19 Sep 2023 08:58:14 -0700
Subject: [PATCH 24/37] Fix l2arc_apply_transforms ztest crash

In #13375 we modified the allocation size of the buffer that we use
to apply l2arc transforms to be the size of the arc hdr we're using,
rather than the allocation size that will be in place on the disk,
because sometimes the hdr size is larger. Unfortunately, sometimes
the allocation size is larger, which means that we overflow the buffer
in that case. This change modifies the allocation to be the max of
the two values

Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15177
Closes #15248
---
 module/zfs/arc.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 7023f448182a..22dc0ed5e3b6 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -9092,15 +9092,16 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
 		 * write things before deciding to fail compression in nearly
 		 * every case.)
 		 */
-		cabd = abd_alloc_for_io(size, ismd);
-		tmp = abd_borrow_buf(cabd, size);
+		uint64_t bufsize = MAX(size, asize);
+		cabd = abd_alloc_for_io(bufsize, ismd);
+		tmp = abd_borrow_buf(cabd, bufsize);
 
 		psize = zio_compress_data(compress, to_write, &tmp, size,
 		    hdr->b_complevel);
 
 		if (psize >= asize) {
 			psize = HDR_GET_PSIZE(hdr);
-			abd_return_buf_copy(cabd, tmp, size);
+			abd_return_buf_copy(cabd, tmp, bufsize);
 			HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
 			to_write = cabd;
 			abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
@@ -9110,9 +9111,9 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
 		}
 		ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
 		if (psize < asize)
-			memset((char *)tmp + psize, 0, asize - psize);
+			memset((char *)tmp + psize, 0, bufsize - psize);
 		psize = HDR_GET_PSIZE(hdr);
-		abd_return_buf_copy(cabd, tmp, size);
+		abd_return_buf_copy(cabd, tmp, bufsize);
 		to_write = cabd;
 	}
 

From 9aa1a2878ea98f707e871833743d074a59ff2115 Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Tue, 19 Sep 2023 09:02:23 -0700
Subject: [PATCH 25/37] Fix incorrect expected error in ztest

There is an occasional ztest failure that looks like ztest: attach
(/var/tmp/zloop-run/ztest.13a 570425344, draid1-1-0 532152320, 1)
returned 22, expected 95. This is because the value that we return
is EINVAL, but expected_error is set incorrectly.

Change the expected_error value to match both the comment and the
actual error value.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15295
---
 cmd/ztest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 398c519cfc35..59c4be225f93 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -3767,7 +3767,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
 	else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
-		expected_error = ENOTSUP;
+		expected_error = EINVAL;
 	else
 		expected_error = 0;
 

From 3af63683fe07465fd96827cab0d946cbdaba6f73 Mon Sep 17 00:00:00 2001
From: Rob N <robn@despairlabs.com>
Date: Wed, 20 Sep 2023 02:06:47 +1000
Subject: [PATCH 26/37] cmd: add 'help' subcommand to zpool and zfs

'program help subcommand' is a reasonably common pattern for
multifunction command-line programs. This commit adds support for that
style to the zpool and zfs commands.

When run as 'zpool help [<topic>]' or 'zfs help [<topic>]', executes the
'man' program on the PATH with the most likely manpage name for the
requested topic: "zpool-<topic>" or "zfs-<topic>" for subcommands, or
"zpool<topic>" or "zfs<topic>" for the "concepts" and "props" topics.
If no topic is supplied, uses the top "zpool" or "zfs" pages.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Kay Pedersen <mail@mkwg.de>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Closes #15288
---
 cmd/zfs/zfs_main.c                            | 30 ++++++++++++++++++
 cmd/zpool/zpool_main.c                        | 31 +++++++++++++++++++
 .../cli_root/zfs_program/zfs_program_json.ksh |  4 ++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 5ed25d1ea720..62802de23e5f 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -132,6 +132,8 @@ static int zfs_do_zone(int argc, char **argv);
 static int zfs_do_unzone(int argc, char **argv);
 #endif
 
+static int zfs_do_help(int argc, char **argv);
+
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
  */
@@ -606,6 +608,9 @@ usage(boolean_t requested)
 		(void) fprintf(fp,
 		    gettext("\nFor the delegated permission list, run: %s\n"),
 		    "zfs allow|unallow");
+		(void) fprintf(fp,
+		    gettext("\nFor further help on a command or topic, "
+		    "run: %s\n"), "zfs help [<topic>]");
 	}
 
 	/*
@@ -8726,6 +8731,25 @@ zfs_do_version(int argc, char **argv)
 	return (zfs_version_print() != 0);
 }
 
+/* Display documentation */
+static int
+zfs_do_help(int argc, char **argv)
+{
+	char page[MAXNAMELEN];
+	if (argc < 3 || strcmp(argv[2], "zfs") == 0)
+		strcpy(page, "zfs");
+	else if (strcmp(argv[2], "concepts") == 0 ||
+	    strcmp(argv[2], "props") == 0)
+		snprintf(page, sizeof (page), "zfs%s", argv[2]);
+	else
+		snprintf(page, sizeof (page), "zfs-%s", argv[2]);
+
+	execlp("man", "man", page, NULL);
+
+	fprintf(stderr, "couldn't run man program: %s", strerror(errno));
+	return (-1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -8781,6 +8805,12 @@ main(int argc, char **argv)
 	if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
 		return (zfs_do_version(argc, argv));
 
+	/*
+	 * Special case 'help'
+	 */
+	if (strcmp(cmdname, "help") == 0)
+		return (zfs_do_help(argc, argv));
+
 	if ((g_zfs = libzfs_init()) == NULL) {
 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
 		return (1);
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 6d0dae8d8b05..d64fdfa5ba4c 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -126,6 +126,8 @@ static int zpool_do_version(int, char **);
 
 static int zpool_do_wait(int, char **);
 
+static int zpool_do_help(int argc, char **argv);
+
 static zpool_compat_status_t zpool_do_load_compat(
     const char *, boolean_t *);
 
@@ -538,6 +540,10 @@ usage(boolean_t requested)
 				(void) fprintf(fp, "%s",
 				    get_usage(command_table[i].usage));
 		}
+
+		(void) fprintf(fp,
+		    gettext("\nFor further help on a command or topic, "
+		    "run: %s\n"), "zpool help [<topic>]");
 	} else {
 		(void) fprintf(fp, gettext("usage:\n"));
 		(void) fprintf(fp, "%s", get_usage(current_command->usage));
@@ -11051,6 +11057,25 @@ zpool_do_version(int argc, char **argv)
 	return (zfs_version_print() != 0);
 }
 
+/* Display documentation */
+static int
+zpool_do_help(int argc, char **argv)
+{
+	char page[MAXNAMELEN];
+	if (argc < 3 || strcmp(argv[2], "zpool") == 0)
+		strcpy(page, "zpool");
+	else if (strcmp(argv[2], "concepts") == 0 ||
+	    strcmp(argv[2], "props") == 0)
+		snprintf(page, sizeof (page), "zpool%s", argv[2]);
+	else
+		snprintf(page, sizeof (page), "zpool-%s", argv[2]);
+
+	execlp("man", "man", page, NULL);
+
+	fprintf(stderr, "couldn't run man program: %s", strerror(errno));
+	return (-1);
+}
+
 /*
  * Do zpool_load_compat() and print error message on failure
  */
@@ -11118,6 +11143,12 @@ main(int argc, char **argv)
 	if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
 		return (zpool_do_version(argc, argv));
 
+	/*
+	 * Special case 'help'
+	 */
+	if (strcmp(cmdname, "help") == 0)
+		return (zpool_do_help(argc, argv));
+
 	if ((g_zfs = libzfs_init()) == NULL) {
 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
 		return (1);
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
index b0265c5ee4a1..2241b77bf806 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
@@ -117,7 +117,9 @@ usage:
 
 For the property list, run: zfs set|get
 
-For the delegated permission list, run: zfs allow|unallow")
+For the delegated permission list, run: zfs allow|unallow
+
+For further help on a command or topic, run: zfs help [<topic>]")
 cnt=0
 for cmd in ${neg_cmds[@]}; do
 	log_mustnot zfs program $cmd $TESTPOOL $TESTZCP $TESTDS 2>&1

From 729507d309d4573868855eca04f8522cc120f7d5 Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Wed, 20 Sep 2023 16:39:38 -0700
Subject: [PATCH 27/37] Fix occasional rsend test crashes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have occasional crashes in the rsend tests. Debugging revealed
that this is because the send_worker thread is getting EINTR from
splice(). This happens when a non-fatal signal is received during
the syscall. We should retry the syscall, rather than exiting failure.
Tweak the loop to only break if the splice is finished or we receive
a non-EINTR error.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15273
---
 lib/libzfs_core/libzfs_core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index c63a16de5ab6..01d803e21db0 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -650,10 +650,12 @@ send_worker(void *arg)
 	unsigned int bufsiz = max_pipe_buffer(ctx->from);
 	ssize_t rd;
 
-	while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
-	    SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
-		;
-
+	for (;;) {
+		rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
+		    SPLICE_F_MOVE | SPLICE_F_MORE);
+		if ((rd == -1 && errno != EINTR) || rd == 0)
+			break;
+	}
 	int err = (rd == -1) ? errno : 0;
 	close(ctx->from);
 	return ((void *)(uintptr_t)err);

From a199cac6cdd751c02e1bd99fb02cd4623356140d Mon Sep 17 00:00:00 2001
From: Rob N <robn@despairlabs.com>
Date: Thu, 21 Sep 2023 09:56:45 +1000
Subject: [PATCH 28/37] status: report pool suspension state under
 failmode=continue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When failmode=continue is set and the pool suspends, both 'zpool status'
and the 'zfs/pool/state' kstat ignore it and report the normal vdev tree
state. There's no clear indicator that the pool is suspended. This is
unlike suspend in failmode=wait, or suspend due to MMP check failure,
which both report "SUSPENDED" explicitly.

This commit changes it so SUSPENDED is reported for failmode=continue
the same as for other modes.

Rationale:

The historical behaviour of failmode=continue is roughly, "press on as
though all is well". To this end, the fact that the pool had suspended
was not shown, to maintain the façade that all is well.

Its unclear why hiding this information was considered appropriate. One
possibility is that it was expected that a true pool fault would always
be reported as DEGRADED or FAULTED, and that the pool could not suspend
without these happening.

That is not necessarily true, as vdev health and suspend state are only
loosely connected, such that a pool in (apparent) good health can be
suspended for good reasons, and of course a degraded pool does not lead
to suspension. Even if that expectation were true, there's still a
difference in urgency - a degraded pool may not need to be attended to
for hours, while a suspended pool is most often unusable until an
operator intervenes.

An operator that has set failmode=continue has presumably done so
because their workload is one that can continue to operate in a useful
way when the pool suspends. In this case the operator still needs a
clear indicator that there is a problem that needs attending to.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #15297
---
 lib/libzfs/libzfs_pool.c | 3 ++-
 module/zfs/spa_misc.c    | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 85564edfd862..4ebd112f452f 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -29,7 +29,7 @@
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
  * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
- * Copyright (c) 2021, Klara Inc.
+ * Copyright (c) 2021, 2023, Klara Inc.
  */
 
 #include <errno.h>
@@ -255,6 +255,7 @@ zpool_get_state_str(zpool_handle_t *zhp)
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		str = gettext("FAULTED");
 	} else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
+	    status == ZPOOL_STATUS_IO_FAILURE_CONTINUE ||
 	    status == ZPOOL_STATUS_IO_FAILURE_MMP) {
 		str = gettext("SUSPENDED");
 	} else {
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 3b355e0debcc..72b690162d64 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -27,6 +27,7 @@
  * Copyright (c) 2017 Datto Inc.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
  */
 
 #include <sys/zfs_context.h>
@@ -2756,8 +2757,7 @@ spa_state_to_name(spa_t *spa)
 	vdev_state_t state = rvd->vdev_state;
 	vdev_aux_t aux = rvd->vdev_stat.vs_aux;
 
-	if (spa_suspended(spa) &&
-	    (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
+	if (spa_suspended(spa))
 		return ("SUSPENDED");
 
 	switch (state) {

From 5f30698670dec3fc2085663677021f0a592cf081 Mon Sep 17 00:00:00 2001
From: Rob N <rob.norris@klarasystems.com>
Date: Fri, 22 Sep 2023 10:54:15 +1000
Subject: [PATCH 29/37] tests/block_cloning: try harder to stay on same txg in
 fallback test

We've observed this test failing intermittently. When it does, the
"same block" check shows that both files have the same content, that is,
the file was cloned.

The only way this could have happened is if the open txg moved between
the dd and clonefile calls. That's possible because although we set
zfs_txg_timeout to be large, that only affects the wait time in the sync
thread at the start of a new txg; it doesn't change anything if its
currently waiting or working.

So here we just force the txgs to move immediately before, which should
get both operations onto the same txg as intented.

Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris Rob Norris <rob.norris@klarasystems.com>
Closes #15303
---
 .../block_cloning_copyfilerange_fallback_same_txg.ksh            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
index a10545bc0769..74c5a5bece60 100755
--- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
+++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
@@ -51,6 +51,7 @@ log_onexit cleanup
 log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
 
 log_must set_tunable64 TXG_TIMEOUT 5000
+log_must sync_pool $TESTPOOL true
 
 log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
 log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288

From 0aabd6b48228845462090c9137a340ca062a3cb9 Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Thu, 21 Sep 2023 21:40:13 -0400
Subject: [PATCH 30/37] ZIL: Avoid dbuf_read() in ztest_get_data()

While working on similar patches for zfs and zvol in #15153 I've
forgot about ztest.  Update it also so that we test the same code
paths as use in production.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15301
---
 cmd/ztest.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 59c4be225f93..9c5a6944035c 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -2457,8 +2457,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 		zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
 		    object, offset, size, RL_READER);
 
-		error = dmu_buf_hold(os, object, offset, zgd, &db,
-		    DMU_READ_NO_PREFETCH);
+		error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;

From 0ce1b2ca19302df59a8c4fa8383f35cf549874de Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Fri, 22 Sep 2023 16:08:51 -0700
Subject: [PATCH 31/37] Invoke zdb by guid to avoid import errors

The problem that was occurring is basically that a device was removed
by ztest and replaced with another device. It was then reguided. The
import then failed because there were two possible imports with the
same name; one with the new guid, and one with the old. This can
happen because the label writes from the device removal/replacement
can be subject to ztest's error injection.

The other ways to fix this would be to change the error injection to
not trigger on removals (which may not be technically feasible), or
to change the import code to not report configurations that are so
short on devices (which would potentially have unpleasant end-user
effects when trying to recover from data losses/device configuration
issues).

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15298
---
 cmd/ztest.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 9c5a6944035c..8cfbdfe1c2e2 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -6378,6 +6378,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
 	spa_t *spa = ztest_spa;
 	uint64_t orig, load;
 	int error;
+	ztest_shared_t *zs = ztest_shared;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
@@ -6387,6 +6388,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
 
 	(void) pthread_rwlock_wrlock(&ztest_name_lock);
 	error = spa_change_guid(spa);
+	zs->zs_guid = spa_guid(spa);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	if (error != 0)
@@ -6916,7 +6918,7 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
  * Verify pool integrity by running zdb.
  */
 static void
-ztest_run_zdb(const char *pool)
+ztest_run_zdb(uint64_t guid)
 {
 	int status;
 	char *bin;
@@ -6940,13 +6942,13 @@ ztest_run_zdb(const char *pool)
 	free(set_gvars_args);
 
 	size_t would = snprintf(zdb, len,
-	    "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s",
+	    "%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64,
 	    bin,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
 	    set_gvars_args_joined,
 	    ztest_opts.zo_dir,
-	    pool);
+	    guid);
 	ASSERT3U(would, <, len);
 
 	umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1);
@@ -7524,14 +7526,15 @@ ztest_import(ztest_shared_t *zs)
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+	zs->zs_guid = spa_guid(spa);
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	if (!ztest_opts.zo_mmp_test) {
-		ztest_run_zdb(ztest_opts.zo_pool);
+		ztest_run_zdb(zs->zs_guid);
 		ztest_freeze();
-		ztest_run_zdb(ztest_opts.zo_pool);
+		ztest_run_zdb(zs->zs_guid);
 	}
 
 	(void) pthread_rwlock_destroy(&ztest_name_lock);
@@ -7602,7 +7605,6 @@ ztest_run(ztest_shared_t *zs)
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-	zs->zs_guid = dds.dds_guid;
 	dmu_objset_disown(os, B_TRUE, FTAG);
 
 	/*
@@ -7873,14 +7875,15 @@ ztest_init(ztest_shared_t *zs)
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+	zs->zs_guid = spa_guid(spa);
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	if (!ztest_opts.zo_mmp_test) {
-		ztest_run_zdb(ztest_opts.zo_pool);
+		ztest_run_zdb(zs->zs_guid);
 		ztest_freeze();
-		ztest_run_zdb(ztest_opts.zo_pool);
+		ztest_run_zdb(zs->zs_guid);
 	}
 
 	(void) pthread_rwlock_destroy(&ztest_name_lock);
@@ -8303,7 +8306,7 @@ main(int argc, char **argv)
 		}
 
 		if (!ztest_opts.zo_mmp_test)
-			ztest_run_zdb(ztest_opts.zo_pool);
+			ztest_run_zdb(zs->zs_guid);
 	}
 
 	if (ztest_opts.zo_verbose >= 1) {

From 8526b12f3d6f751ec575ab582fd925bf0084a27e Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Mon, 25 Sep 2023 11:14:00 -0700
Subject: [PATCH 32/37] Set timeout before creating pool in test

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Rob Norris <rob.norris@klarasystems.com>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15309
---
 .../block_cloning_copyfilerange_fallback_same_txg.ksh         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
index 74c5a5bece60..17ce5c9ba784 100755
--- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
+++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
@@ -48,11 +48,11 @@ function cleanup
 
 log_onexit cleanup
 
-log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
-
 log_must set_tunable64 TXG_TIMEOUT 5000
 log_must sync_pool $TESTPOOL true
 
+log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
+
 log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
 log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
 

From ba4dbbdae7d8bc168f4cb247db187d46c41c7742 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Mon, 25 Sep 2023 11:15:32 -0700
Subject: [PATCH 33/37] ZTS: Add additional exceptions

"zfs_share_concurrent_shares" may fail on FreeBSD and some Linux
distributions (fedora).  Move it to the common list.

"zfs_allow_010_pos" has been observed to fail on FreeBSD 13.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Kay Pedersen <mail@mkwg.de>
Reviewed-by: Umer Saleem <usaleem@ixsystems.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #15306
---
 tests/test-runner/bin/zts-report.py.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in
index e1bbe063ab4c..558e4b57279d 100755
--- a/tests/test-runner/bin/zts-report.py.in
+++ b/tests/test-runner/bin/zts-report.py.in
@@ -214,6 +214,7 @@ maybe = {
     'cli_root/zfs_get/zfs_get_009_pos': ['SKIP', 5479],
     'cli_root/zfs_rollback/zfs_rollback_001_pos': ['FAIL', known_reason],
     'cli_root/zfs_rollback/zfs_rollback_002_pos': ['FAIL', known_reason],
+    'cli_root/zfs_share/zfs_share_concurrent_shares': ['FAIL', known_reason],
     'cli_root/zfs_snapshot/zfs_snapshot_002_neg': ['FAIL', known_reason],
     'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason],
     'cli_root/zpool_add/zpool_add_004_pos': ['FAIL', known_reason],
@@ -259,10 +260,9 @@ if sys.platform.startswith('freebsd'):
     maybe.update({
         'cli_root/zfs_copies/zfs_copies_002_pos': ['FAIL', known_reason],
         'cli_root/zfs_inherit/zfs_inherit_001_neg': ['FAIL', known_reason],
-        'cli_root/zfs_share/zfs_share_concurrent_shares':
-            ['FAIL', known_reason],
         'cli_root/zpool_import/zpool_import_012_pos': ['FAIL', known_reason],
         'delegate/zfs_allow_003_pos': ['FAIL', known_reason],
+        'delegate/zfs_allow_010_pos': ['FAIL', known_reason],
         'inheritance/inherit_001_pos': ['FAIL', 11829],
         'resilver/resilver_restart_001': ['FAIL', known_reason],
         'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622],

From 99dc1fc340670a070aba58aa660d0d72c774093c Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Tue, 26 Sep 2023 14:37:28 -0700
Subject: [PATCH 34/37] ZTS: Fix introduced test bug in
 block_cloning_copyfilerange

Reviewed-by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15316
---
 .../block_cloning_copyfilerange_fallback_same_txg.ksh            | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
index 17ce5c9ba784..2cd2f4763a73 100755
--- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
+++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh
@@ -49,7 +49,6 @@ function cleanup
 log_onexit cleanup
 
 log_must set_tunable64 TXG_TIMEOUT 5000
-log_must sync_pool $TESTPOOL true
 
 log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
 

From d38f4664a6ef5cfff8f005016e10b9583d04de6b Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Wed, 27 Sep 2023 12:06:24 -0700
Subject: [PATCH 35/37] Reduce trim min size even lower for tests to reduce
 flakiness

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15315
---
 .../tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh | 2 +-
 .../cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh           | 2 +-
 tests/zfs-tests/tests/functional/trim/autotrim_config.ksh       | 2 +-
 tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh    | 2 +-
 .../zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh | 2 +-
 tests/zfs-tests/tests/functional/trim/trim_config.ksh           | 2 +-
 tests/zfs-tests/tests/functional/trim/trim_integrity.ksh        | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
index bdbf3db53336..9342cbe880ae 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
@@ -60,7 +60,7 @@ log_must set_tunable64 VDEV_MIN_MS_COUNT 64
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 log_must mkdir "$TESTDIR"
 log_must truncate -s $LARGESIZE "$LARGEFILE"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh
index d5aaf49aebc5..41d1decd13ce 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh
@@ -52,7 +52,7 @@ LARGEFILE="$TESTDIR/largefile"
 
 # Reduce trim size to allow for tighter tolerance below when checking.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 log_must mkdir "$TESTDIR"
 log_must truncate -s $LARGESIZE "$LARGEFILE"
diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh
index c97772585737..4dba4616e947 100755
--- a/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh
+++ b/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh
@@ -57,7 +57,7 @@ log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 # Reduced TRIM_TXG_BATCH to make trimming more frequent.
 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh
index e25390339b6c..e139809105df 100755
--- a/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh
+++ b/tests/zfs-tests/tests/functional/trim/autotrim_integrity.ksh
@@ -54,7 +54,7 @@ log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 # Reduced TRIM_TXG_BATCH to make trimming more frequent.
 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
diff --git a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh
index ae7ad8d73dd8..53de485b1a8e 100755
--- a/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh
+++ b/tests/zfs-tests/tests/functional/trim/autotrim_trim_integrity.ksh
@@ -55,7 +55,7 @@ log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 # Reduced TRIM_TXG_BATCH to make trimming more frequent.
 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
diff --git a/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/tests/zfs-tests/tests/functional/trim/trim_config.ksh
index 6a187a05b579..c08b5edb5698 100755
--- a/tests/zfs-tests/tests/functional/trim/trim_config.ksh
+++ b/tests/zfs-tests/tests/functional/trim/trim_config.ksh
@@ -57,7 +57,7 @@ log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 # Reduced TRIM_TXG_BATCH to make trimming more frequent.
 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)
diff --git a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh
index 2dff0924f7b1..c67697521ce8 100755
--- a/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh
+++ b/tests/zfs-tests/tests/functional/trim/trim_integrity.ksh
@@ -54,7 +54,7 @@ log_onexit cleanup
 
 # Minimum trim size is decreased to verify all trim sizes.
 typeset trim_extent_bytes_min=$(get_tunable TRIM_EXTENT_BYTES_MIN)
-log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 4096
+log_must set_tunable64 TRIM_EXTENT_BYTES_MIN 512
 
 # Reduced TRIM_TXG_BATCH to make trimming more frequent.
 typeset trim_txg_batch=$(get_tunable TRIM_TXG_BATCH)

From 9e36c5769fe142873c8a2984335d11007fd66987 Mon Sep 17 00:00:00 2001
From: Paul Dagnelie <pcd@delphix.com>
Date: Thu, 28 Sep 2023 14:08:52 -0700
Subject: [PATCH 36/37] Don't allocate from new metaslabs

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15307
Closes #15308
---
 module/zfs/metaslab.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index 20dc934593f1..cdf599b17924 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -3208,6 +3208,15 @@ metaslab_segment_weight(metaslab_t *msp)
 static boolean_t
 metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard)
 {
+	/*
+	 * This case will usually but not always get caught by the checks below;
+	 * metaslabs can be loaded by various means, including the trim and
+	 * initialize code. Once that happens, without this check they are
+	 * allocatable even before they finish their first txg sync.
+	 */
+	if (unlikely(msp->ms_new))
+		return (B_FALSE);
+
 	/*
 	 * If the metaslab is loaded, ms_max_size is definitive and we can use
 	 * the fast check. If it's not, the ms_max_size is a lower bound (once

From 229ca7d738ccbf4c55076977467ee93e20b6f01b Mon Sep 17 00:00:00 2001
From: Akash B <akash-b@hpe.com>
Date: Fri, 29 Sep 2023 02:40:07 +0530
Subject: [PATCH 37/37] Fix ENOSPC for extended quota

When unlinking multiple files from a pool at 100% capacity, it
was possible for ENOSPC to be returned after the first few unlinks.
This issue was fixed previously by PR #13172 but then this was
again introduced by PR #13839.

This is resolved using the existing mechanism of returning ERESTART
when over quota as long as we know enough space will shortly be
available after processing the pending deferred frees.

Also, updated the existing testcase which reliably reproduced the
issue without this patch.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Dipak Ghosh <dipak.ghosh@hpe.com>
Signed-off-by: Akash B <akash-b@hpe.com>
Closes #15312
---
 module/zfs/dsl_dir.c                          | 22 +++++++------------
 .../tests/functional/no_space/enospc_rm.ksh   | 12 +++++++++-
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index bbe6a03d620f..baf970121a61 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -26,6 +26,7 @@
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
  */
 
 #include <sys/dmu.h>
@@ -1358,30 +1359,23 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
 		ext_quota = 0;
 
 	if (used_on_disk >= quota) {
+		if (retval == ENOSPC && (used_on_disk - quota) <
+		    dsl_pool_deferred_space(dd->dd_pool)) {
+			retval = SET_ERROR(ERESTART);
+		}
 		/* Quota exceeded */
 		mutex_exit(&dd->dd_lock);
 		DMU_TX_STAT_BUMP(dmu_tx_quota);
 		return (retval);
 	} else if (used_on_disk + est_inflight >= quota + ext_quota) {
-		if (est_inflight > 0 || used_on_disk < quota) {
-			retval = SET_ERROR(ERESTART);
-		} else {
-			ASSERT3U(used_on_disk, >=, quota);
-
-			if (retval == ENOSPC && (used_on_disk - quota) <
-			    dsl_pool_deferred_space(dd->dd_pool)) {
-				retval = SET_ERROR(ERESTART);
-			}
-		}
-
 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
-		    "quota=%lluK tr=%lluK err=%d\n",
+		    "quota=%lluK tr=%lluK\n",
 		    (u_longlong_t)used_on_disk>>10,
 		    (u_longlong_t)est_inflight>>10,
-		    (u_longlong_t)quota>>10, (u_longlong_t)asize>>10, retval);
+		    (u_longlong_t)quota>>10, (u_longlong_t)asize>>10);
 		mutex_exit(&dd->dd_lock);
 		DMU_TX_STAT_BUMP(dmu_tx_quota);
-		return (retval);
+		return (SET_ERROR(ERESTART));
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
diff --git a/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh b/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh
index d0f4ff4a08fe..d9582fbe2a76 100755
--- a/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh
+++ b/tests/zfs-tests/tests/functional/no_space/enospc_rm.ksh
@@ -17,6 +17,7 @@
 #
 # Copyright (c) 2014, 2016 by Delphix. All rights reserved.
 # Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -51,11 +52,20 @@ log_must zfs create $TESTPOOL/$TESTFS
 log_must zfs set mountpoint=$TESTDIR $TESTPOOL/$TESTFS
 log_must zfs set compression=off $TESTPOOL/$TESTFS
 
-log_note "Writing files until ENOSPC."
+log_note "Writing Big(1G) files until ENOSPC."
 log_mustnot_expect "No space left on device" fio --name=test \
     --fallocate=none --rw=write --bs=1M --size=1G --numjobs=4 \
     --sync=1 --directory=$TESTDIR/ --group_reporting
 
+log_must rm $TESTDIR/test.*
+log_must test -z "$(ls -A $TESTDIR)"
+sync_pool $TESTPOOL true
+
+log_note "Writing small(10M) files until ENOSPC."
+log_mustnot_expect "No space left on device" fio --name=test \
+    --fallocate=none --rw=write --bs=1M --size=10M --numjobs=200 \
+    --sync=1 --directory=$TESTDIR/ --group_reporting
+
 log_must rm $TESTDIR/test.*
 log_must test -z "$(ls -A $TESTDIR)"