diff --git a/appveryor.yml b/appveryor.yml
new file mode 100644
index 0000000000..725f8fd87e
--- /dev/null
+++ b/appveryor.yml
@@ -0,0 +1,12 @@
+version: 1.0.{build}
+branches:
+  only:
+  - macos
+image: macOS
+build_script:
+- sh: >-
+    pwd
+    ls -l
+    sh autoconf.sh
+	./configure CPPFLAGS="-I/usr/local/opt/gettext/include -I/usr/local/opt/openssl@1.1/include" LDFLAGS="-L/usr/local/opt/gettext/lib/ -L/usr/local/opt/openssl@1.1/lib"
+	make
diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am
index 8b2d0c2002..57d24aa206 100644
--- a/cmd/zed/zed.d/Makefile.am
+++ b/cmd/zed/zed.d/Makefile.am
@@ -20,6 +20,8 @@ dist_zedexec_SCRIPTS = \
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	snapshot_mount.sh \
+	snapshot_unmount.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
@@ -38,6 +40,8 @@ zedconfdefaults = \
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	snapshot_mount.sh \
+	snapshot_unmount.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
diff --git a/cmd/zed/zed.d/snapshot_mount.sh b/cmd/zed/zed.d/snapshot_mount.sh
new file mode 100644
index 0000000000..5cf807aa99
--- /dev/null
+++ b/cmd/zed/zed.d/snapshot_mount.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# Helper to mount and unmount snapshots when asked to by kernel.
+#
+# Mostly used in macOS.
+#
+set -ef
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+[ -n "${ZEVENT_SNAPSHOT_NAME}" ] || exit 1
+[ -n "${ZEVENT_SUBCLASS}" ] || exit 2
+
+if   [ "${ZEVENT_SUBCLASS}" = "snapshot_mount" ]; then
+    action="mount"
+elif [ "${ZEVENT_SUBCLASS}" = "snapshot_unmount" ]; then
+    action="unmount"
+else
+    zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
+    exit 3
+fi
+
+zed_exit_if_ignoring_this_event
+zed_check_cmd "${ZFS}" || exit 4
+
+"${ZFS}" "${action}" "${ZEVENT_SNAPSHOT_NAME}"
+
+finished
diff --git a/cmd/zed/zed.d/snapshot_unmount.sh b/cmd/zed/zed.d/snapshot_unmount.sh
new file mode 120000
index 0000000000..9f74a29e61
--- /dev/null
+++ b/cmd/zed/zed.d/snapshot_unmount.sh
@@ -0,0 +1 @@
+snapshot_mount.sh
\ No newline at end of file
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 2f4e075064..062bd625bf 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -766,11 +766,11 @@ zfs_mount_and_share(libzfs_handle_t *hdl, const char *dataset, zfs_type_t type)
 		} else if (zfs_mount(zhp, NULL, 0) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not mounted\n"));
-			ret = 1;
+			ret = 0;
 		} else if (zfs_share(zhp) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not shared\n"));
-			ret = 1;
+			ret = 0;
 		}
 		zfs_commit_all_shares();
 	}
@@ -6973,7 +6973,7 @@ share_mount(int op, int argc, char **argv)
 		}
 
 	} else {
-#if defined (__APPLE__)
+#if defined(__APPLE__)
 		/*
 		 * OsX can not mount from kernel, users are expected to mount
 		 * by hand using "zfs mount dataset@snapshot".
@@ -6997,8 +6997,8 @@ share_mount(int op, int argc, char **argv)
 
 			} else {
 
-				ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
-					options);
+				ret = share_mount_one(zhp, op, flags, NULL,
+				    B_TRUE, options);
 			}
 
 			zfs_close(zhp);
@@ -7400,7 +7400,7 @@ unshare_unmount(int op, int argc, char **argv)
 			return (unshare_unmount_path(op, argv[0],
 			    flags, B_FALSE));
 
-#if defined (__APPLE__)
+#if defined(__APPLE__)
 		/* Temporarily, allow mounting snapshots on OS X */
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
diff --git a/cmd/zpool/os/macos/zpool_vdev_os.c b/cmd/zpool/os/macos/zpool_vdev_os.c
new file mode 100644
index 0000000000..b95e4deea6
--- /dev/null
+++ b/cmd/zpool/os/macos/zpool_vdev_os.c
@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libzutil.h>
+#include <limits.h>
+#include <sys/spa.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "zpool_util.h"
+#include <sys/zfs_context.h>
+
+#include <sys/efi_partition.h>
+#include <sys/stat.h>
+#include <sys/vtoc.h>
+#include <sys/mntent.h>
+#include <uuid/uuid.h>
+
+boolean_t
+check_sector_size_database(char *path, int *sector_size)
+{
+	return (B_FALSE);
+}
+
+void
+zpool_vdev_enable_file(struct stat64 *statbuf, boolean_t *wholedisk)
+{
+	if (S_ISCHR(statbuf->st_mode)) {
+		statbuf->st_mode &= ~S_IFCHR;
+		statbuf->st_mode |= S_IFBLK;
+		*wholedisk = B_FALSE;
+	}
+}
+
+int
+check_device(const char *name, boolean_t force,
+    boolean_t isspare, boolean_t iswholedisk)
+{
+	char path[MAXPATHLEN];
+
+	if (strncmp(name, _PATH_DEV, sizeof (_PATH_DEV) - 1) != 0)
+		snprintf(path, sizeof (path), "%s%s", _PATH_DEV, name);
+	else
+		strlcpy(path, name, sizeof (path));
+
+	return (check_file(path, force, isspare));
+}
diff --git a/configure.ac b/configure.ac
index 6e5a333c30..db5cb79269 100644
--- a/configure.ac
+++ b/configure.ac
@@ -139,6 +139,8 @@ AC_CONFIG_FILES([
 	include/os/macos/spl/Makefile
 	include/os/macos/spl/rpc/Makefile
 	include/os/macos/spl/sys/Makefile
+	include/os/macos/zfs/Makefile
+	include/os/macos/zfs/sys/Makefile
 	include/sys/Makefile
 	include/sys/crypto/Makefile
 	include/sys/fm/Makefile
@@ -161,6 +163,12 @@ AC_CONFIG_FILES([
 	lib/libspl/include/os/freebsd/sys/Makefile
 	lib/libspl/include/os/linux/Makefile
 	lib/libspl/include/os/linux/sys/Makefile
+	lib/libspl/include/os/macos/Makefile
+	lib/libspl/include/os/macos/ia32/Makefile
+	lib/libspl/include/os/macos/ia32/sys/Makefile
+	lib/libspl/include/os/macos/mach/Makefile
+	lib/libspl/include/os/macos/rpc/Makefile
+	lib/libspl/include/os/macos/sys/Makefile
 	lib/libspl/include/rpc/Makefile
 	lib/libspl/include/sys/Makefile
 	lib/libspl/include/sys/dktp/Makefile
diff --git a/include/os/macos/Makefile.am b/include/os/macos/Makefile.am
new file mode 100644
index 0000000000..a9564c3e3c
--- /dev/null
+++ b/include/os/macos/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = spl zfs
\ No newline at end of file
diff --git a/include/os/macos/spl/Makefile.am b/include/os/macos/spl/Makefile.am
new file mode 100644
index 0000000000..75cad0836e
--- /dev/null
+++ b/include/os/macos/spl/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = sys rpc
diff --git a/include/os/macos/spl/ia32/sys/asm_linkage.h b/include/os/macos/spl/ia32/sys/asm_linkage.h
new file mode 100644
index 0000000000..0009705ad6
--- /dev/null
+++ b/include/os/macos/spl/ia32/sys/asm_linkage.h
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_ASM_LINKAGE_H
+#define	_IA32_SYS_ASM_LINKAGE_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef _ASM	/* The remainder of this file is only for assembly files */
+
+/*
+ * make annoying differences in assembler syntax go away
+ */
+
+/*
+ * D16 and A16 are used to insert instructions prefixes; the
+ * macros help the assembler code be slightly more portable.
+ */
+#if !defined(__GNUC_AS__)
+/*
+ * /usr/ccs/bin/as prefixes are parsed as separate instructions
+ */
+#define	D16	data16;
+#define	A16	addr16;
+
+/*
+ * (There are some weird constructs in constant expressions)
+ */
+#define	_CONST(const)		[const]
+#define	_BITNOT(const)		-1!_CONST(const)
+#define	_MUL(a, b)		_CONST(a \* b)
+
+#else
+/*
+ * Why not use the 'data16' and 'addr16' prefixes .. well, the
+ * assembler doesn't quite believe in real mode, and thus argues with
+ * us about what we're trying to do.
+ */
+#define	D16	.byte	0x66;
+#define	A16	.byte	0x67;
+
+#define	_CONST(const)		(const)
+#define	_BITNOT(const)		~_CONST(const)
+#define	_MUL(a, b)		_CONST(a * b)
+
+#endif
+
+/*
+ * C pointers are different sizes between i386 and amd64.
+ * These constants can be used to compute offsets into pointer arrays.
+ */
+#if defined(__amd64)
+#define	CLONGSHIFT	3
+#define	CLONGSIZE	8
+#define	CLONGMASK	7
+#elif defined(__i386)
+#define	CLONGSHIFT	2
+#define	CLONGSIZE	4
+#define	CLONGMASK	3
+#endif
+
+/*
+ * Since we know we're either ILP32 or LP64 ..
+ */
+#define	CPTRSHIFT	CLONGSHIFT
+#define	CPTRSIZE	CLONGSIZE
+#define	CPTRMASK	CLONGMASK
+
+#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
+#error	"inconsistent shift constants"
+#endif
+
+#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
+#error	"inconsistent mask constants"
+#endif
+
+#define	ASM_ENTRY_ALIGN	4, 0x90
+
+/*
+ * SSE register alignment and save areas
+ */
+
+#define	XMM_SIZE	16
+#define	XMM_ALIGN	16
+#define	XMM_ALIGN_LOG	4, 0x90
+
+#if defined(__amd64)
+
+#define	SAVE_XMM_PROLOG(sreg, nreg)				\
+	subq	$_CONST(_MUL(XMM_SIZE, nreg)), %rsp;		\
+	movq	%rsp, sreg
+
+#define	RSTOR_XMM_EPILOG(sreg, nreg)				\
+	addq	$_CONST(_MUL(XMM_SIZE, nreg)), %rsp
+
+#elif defined(__i386)
+
+#define	SAVE_XMM_PROLOG(sreg, nreg)				\
+	subl	$_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
+	movl	%esp, sreg;					\
+	addl	$XMM_ALIGN, sreg;				\
+	andl	$_BITNOT(XMM_ALIGN-1), sreg
+
+#define	RSTOR_XMM_EPILOG(sreg, nreg)				\
+	addl	$_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
+
+#endif	/* __i386 */
+
+/*
+ * profiling causes definitions of the MCOUNT and RTMCOUNT
+ * particular to the type
+ */
+#ifdef GPROF
+
+#define	MCOUNT(x) \
+	pushl	%ebp; \
+	movl	%esp, %ebp; \
+	call	_mcount; \
+	popl	%ebp
+
+#endif /* GPROF */
+
+#ifdef PROF
+
+#define	MCOUNT(x) \
+/* CSTYLED */ \
+	.lcomm .L_/**/x/**/1, 4, 4; \
+	pushl	%ebp; \
+	movl	%esp, %ebp; \
+/* CSTYLED */ \
+	movl	$.L_/**/x/**/1, %edx; \
+	call	_mcount; \
+	popl	%ebp
+
+#endif /* PROF */
+
+/*
+ * if we are not profiling, MCOUNT should be defined to nothing
+ */
+#if !defined(PROF) && !defined(GPROF)
+#define	MCOUNT(x)
+#endif /* !defined(PROF) && !defined(GPROF) */
+
+#define	RTMCOUNT(x)	MCOUNT(x)
+
+/*
+ * Macro to define weak symbol aliases. These are similar to the ANSI-C
+ *	#pragma weak name = _name
+ * except a compiler can determine type. The assembler must be told. Hence,
+ * the second parameter must be the type of the symbol (i.e.: function,...)
+ */
+#define	ANSI_PRAGMA_WEAK(sym, stype)	\
+	.weak	sym; \
+/* CSTYLED */ \
+sym	= _/**/sym
+
+/*
+ * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
+ *	#pragma weak sym1 = sym2
+ */
+#define	ANSI_PRAGMA_WEAK2(sym1, sym2, stype)	\
+	.weak	sym1; \
+sym1	= sym2
+
+/*
+ * ENTRY provides the standard procedure entry code and an easy way to
+ * insert the calls to mcount for profiling. ENTRY_NP is identical, but
+ * never calls mcount.
+ */
+#define	ENTRY(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:	MCOUNT(x)
+
+#define	ENTRY_NP(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:
+
+#define	RTENTRY(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:	RTMCOUNT(x)
+
+/*
+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point.
+ */
+#define	ENTRY2(x, y) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	x, y; \
+/* CSTYLED */ \
+x:; \
+y:	MCOUNT(x)
+
+#define	ENTRY_NP2(x, y) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	x, y; \
+/* CSTYLED */ \
+x:; \
+y:
+
+
+/*
+ * ALTENTRY provides for additional entry points.
+ */
+#define	ALTENTRY(x) \
+	.globl	_##x; \
+	.globl x; \
+_##x:; \
+x:
+
+/*
+ * DGDEF and DGDEF2 provide global data declarations.
+ *
+ * DGDEF provides a word aligned word of storage.
+ *
+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment.  This
+ * implies this macro is best used for byte arrays.
+ *
+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
+ */
+#define	DGDEF2(name, sz) \
+	.data; \
+	.globl	name; \
+name:
+
+#define	DGDEF3(name, sz, algn) \
+	.data; \
+	.align	algn; \
+	.globl	name; \
+name:
+
+#define	DGDEF(name)	DGDEF3(name, 4, 4)
+
+/*
+ * SET_SIZE trails a function and set the size for the ELF symbol table.
+ */
+#define	SET_SIZE(x)
+
+/*
+ * NWORD provides native word value.
+ */
+#if defined(__amd64)
+
+/*CSTYLED*/
+#define	NWORD	quad
+
+#elif defined(__i386)
+
+#define	NWORD	long
+
+#endif  /* __i386 */
+
+#endif /* _ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/include/os/macos/spl/libkern/libkern.h b/include/os/macos/spl/libkern/libkern.h
new file mode 100644
index 0000000000..5d4fa410b7
--- /dev/null
+++ b/include/os/macos/spl/libkern/libkern.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_LIBKERN_H
+#define	_SPL_LIBKERN_H
+
+/*
+ * We wrap this header to handle that copyinstr()'s final argument is
+ * mandatory on OSX. Wrap it to call our ddi_copyinstr to make it optional.
+ */
+#include_next <libkern/libkern.h>
+#undef copyinstr
+#define	copyinstr(U, K, L, D) ddi_copyinstr((U), (K), (L), (D))
+
+#endif
diff --git a/include/os/macos/spl/linux/init.h b/include/os/macos/spl/linux/init.h
new file mode 100644
index 0000000000..4ab1523c16
--- /dev/null
+++ b/include/os/macos/spl/linux/init.h
@@ -0,0 +1,26 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LINUX_INIT_H
+#define	_LINUX_INIT_H
+
+
+#endif
diff --git a/include/os/macos/spl/linux/kernel.h b/include/os/macos/spl/linux/kernel.h
new file mode 100644
index 0000000000..73a2b2eaad
--- /dev/null
+++ b/include/os/macos/spl/linux/kernel.h
@@ -0,0 +1,25 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LINUX_KERNEL_H
+#define	_LINUX_KERNEL_H
+
+#endif
diff --git a/include/os/macos/spl/linux/module.h b/include/os/macos/spl/linux/module.h
new file mode 100644
index 0000000000..264d6c058d
--- /dev/null
+++ b/include/os/macos/spl/linux/module.h
@@ -0,0 +1,28 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LINUX_MODULE_H
+#define	_LINUX_MODULE_H
+
+#include <libkern/libkern.h>
+#include <sys/mod_os.h>
+
+#endif
diff --git a/include/os/macos/spl/rpc/Makefile.am b/include/os/macos/spl/rpc/Makefile.am
new file mode 100644
index 0000000000..770d26812e
--- /dev/null
+++ b/include/os/macos/spl/rpc/Makefile.am
@@ -0,0 +1,3 @@
+KERNEL_H = \
+	$(top_srcdir)/include/os/macos/spl/rpc/types.h \
+	$(top_srcdir)/include/os/macos/spl/rpc/xdr.h
diff --git a/include/os/macos/spl/rpc/types.h b/include/os/macos/spl/rpc/types.h
new file mode 100644
index 0000000000..e089e0ed8c
--- /dev/null
+++ b/include/os/macos/spl/rpc/types.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+#ifndef _SPL_RPC_TYPES_H
+#define	_SPL_RPC_TYPES_H
+
+typedef int bool_t;
+
+#endif /* SPL_RPC_TYPES_H */
diff --git a/include/os/macos/spl/rpc/xdr.h b/include/os/macos/spl/rpc/xdr.h
new file mode 100644
index 0000000000..7b8074b05c
--- /dev/null
+++ b/include/os/macos/spl/rpc/xdr.h
@@ -0,0 +1,175 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (c) 1989, 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+/*
+ * Portions of this source code were derived from Berkeley
+ * 4.3 BSD under license from the Regents of the University of
+ * California.
+ */
+
+/*
+ * xdr.h, External Data Representation Serialization Routines.
+ *
+ */
+
+#ifndef _SPL_RPC_XDR_H
+#define	_SPL_RPC_XDR_H
+
+
+#include <sys/types.h>
+#include <rpc/types.h>
+
+/*
+ * XDR enums and types.
+ */
+enum xdr_op {
+	XDR_ENCODE,
+	XDR_DECODE
+};
+
+struct xdr_ops;
+
+typedef struct {
+	struct xdr_ops *x_ops;	/* Also used to let caller know if */
+				/* xdrmem_create() succeeds (sigh..) */
+	caddr_t		x_addr;	/* Current buffer addr */
+	caddr_t		x_addr_end; /* End of the buffer */
+	enum xdr_op	x_op;	/* Stream direction */
+} XDR;
+
+typedef bool_t (*xdrproc_t)(XDR *xdrs, void *ptr);
+
+struct xdr_ops {
+	bool_t (*xdr_control)(XDR *, int, void *);
+
+	bool_t (*xdr_char)(XDR *, char *);
+	bool_t (*xdr_u_short)(XDR *, unsigned short *);
+	bool_t (*xdr_u_int)(XDR *, unsigned *);
+	bool_t (*xdr_u_longlong_t)(XDR *, u_longlong_t *);
+
+	bool_t (*xdr_opaque)(XDR *, caddr_t, const uint_t);
+	bool_t (*xdr_string)(XDR *, char **, const uint_t);
+	bool_t (*xdr_array)(XDR *, caddr_t *, uint_t *, const uint_t,
+	    const uint_t, const xdrproc_t);
+};
+
+/*
+ * XDR control operator.
+ */
+#define	XDR_GET_BYTES_AVAIL 1
+
+struct xdr_bytesrec {
+	bool_t xc_is_last_record;
+	size_t xc_num_avail;
+};
+
+/*
+ * XDR functions.
+ */
+void xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
+    const enum xdr_op op);
+#define	xdr_destroy(xdrs) ((void) 0)
+
+#define	xdr_control(xdrs, req, info) \
+	(xdrs)->x_ops->xdr_control((xdrs), (req), (info))
+
+/*
+ * For precaution, the following are defined as static inlines instead of macros
+ * to get some amount of type safety.
+ *
+ * Also, macros wouldn't work in the case where typecasting is done, because it
+ * must be possible to reference the functions' addresses by these names.
+ */
+static inline bool_t
+xdr_char(XDR *xdrs, char *cp)
+{
+	return (xdrs->x_ops->xdr_char(xdrs, cp));
+}
+
+static inline bool_t
+xdr_u_short(XDR *xdrs, unsigned short *usp)
+{
+	return (xdrs->x_ops->xdr_u_short(xdrs, usp));
+}
+
+static inline bool_t
+xdr_short(XDR *xdrs, short *sp)
+{
+	return (xdrs->x_ops->xdr_u_short(xdrs, (unsigned short *) sp));
+}
+
+static inline bool_t
+xdr_u_int(XDR *xdrs, unsigned *up)
+{
+	return (xdrs->x_ops->xdr_u_int(xdrs, up));
+}
+
+static inline bool_t
+xdr_int(XDR *xdrs, int *ip)
+{
+	return (xdrs->x_ops->xdr_u_int(xdrs, (unsigned *)ip));
+}
+
+static inline bool_t
+xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
+{
+	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, ullp));
+}
+
+static inline bool_t
+xdr_longlong_t(XDR *xdrs, longlong_t *llp)
+{
+	return (xdrs->x_ops->xdr_u_longlong_t(xdrs, (u_longlong_t *)llp));
+}
+
+/*
+ * Fixed-length opaque data.
+ */
+static inline bool_t
+xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	return (xdrs->x_ops->xdr_opaque(xdrs, cp, cnt));
+}
+
+/*
+ * Variable-length string.
+ * The *sp buffer must have (maxsize + 1) bytes.
+ */
+static inline bool_t
+xdr_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	return (xdrs->x_ops->xdr_string(xdrs, sp, maxsize));
+}
+
+/*
+ * Variable-length arrays.
+ */
+static inline bool_t xdr_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep,
+    const uint_t maxsize, const uint_t elsize, const xdrproc_t elproc)
+{
+	return (xdrs->x_ops->xdr_array(xdrs, arrp, sizep, maxsize, elsize,
+	    elproc));
+}
+
+#endif /* SPL_RPC_XDR_H */
diff --git a/include/os/macos/spl/sys/Makefile.am b/include/os/macos/spl/sys/Makefile.am
new file mode 100644
index 0000000000..5703774569
--- /dev/null
+++ b/include/os/macos/spl/sys/Makefile.am
@@ -0,0 +1,49 @@
+KERNEL_H = \
+	$(top_srcdir)/include/os/macos/spl/sys/atomic.h \
+	$(top_srcdir)/include/os/macos/spl/sys/byteorder.h \
+	$(top_srcdir)/include/os/macos/spl/sys/callb.h \
+	$(top_srcdir)/include/os/macos/spl/sys/cmn_err.h \
+	$(top_srcdir)/include/os/macos/spl/sys/condvar.h \
+	$(top_srcdir)/include/os/macos/spl/sys/console.h \
+	$(top_srcdir)/include/os/macos/spl/sys/cred.h \
+	$(top_srcdir)/include/os/macos/spl/sys/debug.h \
+	$(top_srcdir)/include/os/macos/spl/sys/errno.h \
+	$(top_srcdir)/include/os/macos/spl/sys/fcntl.h \
+	$(top_srcdir)/include/os/macos/spl/sys/file.h \
+	$(top_srcdir)/include/os/macos/spl/sys/inttypes.h \
+	$(top_srcdir)/include/os/macos/spl/sys/isa_defs.h \
+	$(top_srcdir)/include/os/macos/spl/sys/kmem.h \
+	$(top_srcdir)/include/os/macos/spl/sys/kmem_impl.h \
+	$(top_srcdir)/include/os/macos/spl/sys/kstat.h \
+	$(top_srcdir)/include/os/macos/spl/sys/list.h \
+	$(top_srcdir)/include/os/macos/spl/sys/mod_os.h \
+	$(top_srcdir)/include/os/macos/spl/sys/mutex.h \
+	$(top_srcdir)/include/os/macos/spl/sys/param.h \
+	$(top_srcdir)/include/os/macos/spl/sys/policy.h \
+	$(top_srcdir)/include/os/macos/spl/sys/priv.h \
+	$(top_srcdir)/include/os/macos/spl/sys/proc.h \
+	$(top_srcdir)/include/os/macos/spl/sys/processor.h \
+	$(top_srcdir)/include/os/macos/spl/sys/random.h \
+	$(top_srcdir)/include/os/macos/spl/sys/rwlock.h \
+	$(top_srcdir)/include/os/macos/spl/sys/seg_kmem.h \
+	$(top_srcdir)/include/os/macos/spl/sys/signal.h \
+	$(top_srcdir)/include/os/macos/spl/sys/stropts.h \
+	$(top_srcdir)/include/os/macos/spl/sys/sunddi.h \
+	$(top_srcdir)/include/os/macos/spl/sys/sysmacros.h \
+	$(top_srcdir)/include/os/macos/spl/sys/systeminfo.h \
+	$(top_srcdir)/include/os/macos/spl/sys/systm.h \
+	$(top_srcdir)/include/os/macos/spl/sys/taskq.h \
+	$(top_srcdir)/include/os/macos/spl/sys/taskq_impl.h \
+	$(top_srcdir)/include/os/macos/spl/sys/thread.h \
+	$(top_srcdir)/include/os/macos/spl/sys/time.h \
+	$(top_srcdir)/include/os/macos/spl/sys/timer.h \
+	$(top_srcdir)/include/os/macos/spl/sys/tsd.h \
+	$(top_srcdir)/include/os/macos/spl/sys/types.h \
+	$(top_srcdir)/include/os/macos/spl/sys/utsname.h \
+	$(top_srcdir)/include/os/macos/spl/sys/varargs.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vfs.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vmem.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vmem_impl.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vmsystm.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vnode.h \
+	$(top_srcdir)/include/os/macos/spl/sys/zone.h
diff --git a/include/os/macos/spl/sys/acl.h b/include/os/macos/spl/sys/acl.h
new file mode 100644
index 0000000000..840ba7f43c
--- /dev/null
+++ b/include/os/macos/spl/sys/acl.h
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SPL_ACL_H
+#define	_SPL_ACL_H
+
+#include <sys/types.h>
+
+typedef struct ace {
+	uid_t a_who;
+	uint32_t a_access_mask;
+	uint16_t a_flags;
+	uint16_t a_type;
+} ace_t;
+
+typedef struct ace_object {
+	uid_t a_who;			/* uid or gid */
+	uint32_t a_access_mask;		/* read,write,... */
+	uint16_t a_flags;		/* see below */
+	uint16_t a_type;		/* allow or deny */
+	uint8_t a_obj_type[16];		/* obj type */
+	uint8_t a_inherit_obj_type[16];	/* inherit obj */
+} ace_object_t;
+
+#define	MAX_ACL_ENTRIES			1024
+
+#define	ACE_READ_DATA			0x00000001
+#define	ACE_LIST_DIRECTORY		0x00000001
+#define	ACE_WRITE_DATA			0x00000002
+#define	ACE_ADD_FILE			0x00000002
+#define	ACE_APPEND_DATA			0x00000004
+#define	ACE_ADD_SUBDIRECTORY		0x00000004
+#define	ACE_READ_NAMED_ATTRS		0x00000008
+#define	ACE_WRITE_NAMED_ATTRS		0x00000010
+#define	ACE_EXECUTE			0x00000020
+#define	ACE_DELETE_CHILD		0x00000040
+#define	ACE_READ_ATTRIBUTES		0x00000080
+#define	ACE_WRITE_ATTRIBUTES		0x00000100
+#define	ACE_DELETE			0x00010000
+#define	ACE_READ_ACL			0x00020000
+#define	ACE_WRITE_ACL			0x00040000
+#define	ACE_WRITE_OWNER			0x00080000
+#define	ACE_SYNCHRONIZE			0x00100000
+
+#define	ACE_FILE_INHERIT_ACE		0x0001
+#define	ACE_DIRECTORY_INHERIT_ACE	0x0002
+#define	ACE_NO_PROPAGATE_INHERIT_ACE	0x0004
+#define	ACE_INHERIT_ONLY_ACE		0x0008
+#define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG	0x0010
+#define	ACE_FAILED_ACCESS_ACE_FLAG	0x0020
+#define	ACE_IDENTIFIER_GROUP		0x0040
+#define	ACE_INHERITED_ACE		0x0080
+#define	ACE_OWNER			0x1000
+#define	ACE_GROUP			0x2000
+#define	ACE_EVERYONE			0x4000
+
+#define	ACE_ACCESS_ALLOWED_ACE_TYPE	0x0000
+#define	ACE_ACCESS_DENIED_ACE_TYPE	0x0001
+#define	ACE_SYSTEM_AUDIT_ACE_TYPE	0x0002
+#define	ACE_SYSTEM_ALARM_ACE_TYPE	0x0003
+
+#define	ACL_AUTO_INHERIT		0x0001
+#define	ACL_PROTECTED			0x0002
+#define	ACL_DEFAULTED			0x0004
+#define	ACL_FLAGS_ALL	(ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED)
+
+#define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
+#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
+#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
+#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
+#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
+#define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
+#define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
+#define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
+#define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
+#define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
+#define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
+#define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
+#define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
+
+#define	ACE_ALL_TYPES	0x001F
+
+#define	ACE_TYPE_FLAGS	(ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
+
+#define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+	ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS|\
+	ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES|\
+	ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL|	\
+	ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
+
+#define	VSA_ACE				0x0010
+#define	VSA_ACECNT			0x0020
+#define	VSA_ACE_ALLTYPES		0x0040
+#define	VSA_ACE_ACLFLAGS		0x0080
+
+typedef struct trivial_acl {
+	uint32_t allow0;	/* allow mask for bits only in owner */
+	uint32_t deny1;		/* deny mask for bits not in owner */
+	uint32_t deny2;		/* deny mask for bits not in group */
+	uint32_t owner;		/* allow mask matching mode */
+	uint32_t group;		/* allow mask matching mode */
+	uint32_t everyone;	/* allow mask matching mode */
+} trivial_acl_t;
+
+#endif /* _SPL_ACL_H */
diff --git a/include/os/macos/spl/sys/atomic.h b/include/os/macos/spl/sys/atomic.h
new file mode 100644
index 0000000000..0fcc072680
--- /dev/null
+++ b/include/os/macos/spl/sys/atomic.h
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * OSX Atomic functions using clang builtins.
+ *
+ * Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_ATOMIC_H
+#define	_SPL_ATOMIC_H
+
+#include <sys/types.h>
+#include <string.h>
+#include <libkern/OSAtomic.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Increment target
+ */
+static inline void
+atomic_inc_8(volatile uint8_t *target)
+{
+	__sync_fetch_and_add(target, 1);
+}
+
+static inline void
+atomic_inc_16(volatile uint16_t *target)
+{
+	__sync_fetch_and_add(target, 1);
+}
+
+static inline void
+atomic_inc_32(volatile uint32_t *target)
+{
+	__sync_fetch_and_add(target, 1);
+}
+
+static inline void
+atomic_inc_64(volatile uint64_t *target)
+{
+	__sync_fetch_and_add(target, 1);
+}
+
+static inline int32_t
+atomic_inc_32_nv(volatile uint32_t *target)
+{
+	return (__sync_add_and_fetch(target, 1));
+}
+
+static inline int64_t
+atomic_inc_64_nv(volatile uint64_t *target)
+{
+	return (__sync_add_and_fetch(target, 1));
+}
+
+
+
+/*
+ * Decrement target
+ */
+static inline void
+atomic_dec_8(volatile uint8_t *target)
+{
+	__sync_fetch_and_sub(target, 1);
+}
+
+static inline void
+atomic_dec_16(volatile uint16_t *target)
+{
+	__sync_fetch_and_sub(target, 1);
+}
+
+static inline void
+atomic_dec_32(volatile uint32_t *target)
+{
+	__sync_fetch_and_sub(target, 1);
+}
+
+static inline void
+atomic_dec_64(volatile uint64_t *target)
+{
+	__sync_fetch_and_sub(target, 1);
+}
+
+static inline int32_t
+atomic_dec_32_nv(volatile uint32_t *target)
+{
+	return (__sync_sub_and_fetch(target, 1));
+}
+
+static inline int64_t
+atomic_dec_64_nv(volatile uint64_t *target)
+{
+	return (__sync_sub_and_fetch(target, 1));
+}
+
+/*
+ * Add delta to target
+ */
+static inline void
+atomic_add_8(volatile uint8_t *target, int8_t delta)
+{
+	__sync_add_and_fetch(target, delta);
+}
+
+static inline void
+atomic_add_16(volatile uint16_t *target, int16_t delta)
+{
+	__sync_add_and_fetch(target, delta);
+}
+
+static inline void
+atomic_add_32(volatile uint32_t *target, int32_t delta)
+{
+	__sync_add_and_fetch(target, delta);
+}
+
+static inline uint32_t
+atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
+{
+	return (__sync_add_and_fetch(target, delta));
+}
+
+static inline void
+atomic_add_64(volatile uint64_t *target, int64_t delta)
+{
+	__sync_add_and_fetch(target, delta);
+}
+
+static inline uint64_t
+atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
+{
+	return (__sync_add_and_fetch(target, delta));
+}
+
+
+/*
+ * Subtract delta to target
+ */
+static inline void
+atomic_sub_8(volatile uint8_t *target, int8_t delta)
+{
+	__sync_sub_and_fetch(target, delta);
+}
+
+static inline void
+atomic_sub_16(volatile uint16_t *target, int16_t delta)
+{
+	__sync_sub_and_fetch(target, delta);
+}
+
+static inline void
+atomic_sub_32(volatile uint32_t *target, int32_t delta)
+{
+	__sync_sub_and_fetch(target, delta);
+}
+
+static inline void
+atomic_sub_64(volatile uint64_t *target, int64_t delta)
+{
+	__sync_sub_and_fetch(target, delta);
+}
+
+static inline uint64_t
+atomic_sub_64_nv(volatile uint64_t *target, int64_t delta)
+{
+	return (__sync_sub_and_fetch(target, delta));
+}
+
+/*
+ * logical OR bits with target
+ */
+static inline void
+atomic_or_8(volatile uint8_t *target, uint8_t mask)
+{
+	__sync_or_and_fetch(target, mask);
+}
+
+static inline void
+atomic_or_16(volatile uint16_t *target, uint16_t mask)
+{
+	__sync_or_and_fetch(target, mask);
+}
+
+static inline void
+atomic_or_32(volatile uint32_t *target, uint32_t mask)
+{
+	__sync_or_and_fetch(target, mask);
+}
+
+/*
+ * logical AND bits with target
+ */
+static inline void
+atomic_and_8(volatile uint8_t *target, uint8_t mask)
+{
+	__sync_and_and_fetch(target, mask);
+}
+
+static inline void
+atomic_and_16(volatile uint16_t *target, uint16_t mask)
+{
+	__sync_and_and_fetch(target, mask);
+}
+
+static inline void
+atomic_and_32(volatile uint32_t *target, uint32_t mask)
+{
+	__sync_and_and_fetch(target, mask);
+}
+
+/*
+ * Compare And Set
+ * if *arg1 == arg2, then set *arg1 = arg3; return old value.
+ */
+static inline uint8_t
+atomic_cas_8(volatile uint8_t *_target, uint8_t _cmp, uint8_t _new)
+{
+	return (__sync_val_compare_and_swap(_target, _cmp, _new));
+}
+
+static inline uint16_t
+atomic_cas_16(volatile uint16_t *_target, uint16_t _cmp, uint16_t _new)
+{
+	return (__sync_val_compare_and_swap(_target, _cmp, _new));
+}
+
+static inline uint32_t
+atomic_cas_32(volatile uint32_t *_target, uint32_t _cmp, uint32_t _new)
+{
+	return (__sync_val_compare_and_swap(_target, _cmp, _new));
+}
+
+static inline uint64_t
+atomic_cas_64(volatile uint64_t *_target, uint64_t _cmp, uint64_t _new)
+{
+	return (__sync_val_compare_and_swap(_target, _cmp, _new));
+}
+
+static inline uint32_t
+atomic_swap_32(volatile uint32_t *_target, uint32_t _new)
+{
+	return (__sync_lock_test_and_set(_target, _new));
+}
+
+static inline uint64_t
+atomic_swap_64(volatile uint64_t *_target, uint64_t _new)
+{
+	return (__sync_lock_test_and_set(_target, _new));
+}
+
+extern void *atomic_cas_ptr(volatile void *_target, void *_cmp, void *_new);
+
+static inline void
+membar_producer(void)
+{
+	__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif  /* _SPL_ATOMIC_H */
diff --git a/include/os/macos/spl/sys/byteorder.h b/include/os/macos/spl/sys/byteorder.h
new file mode 100644
index 0000000000..1a97baefc0
--- /dev/null
+++ b/include/os/macos/spl/sys/byteorder.h
@@ -0,0 +1,67 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_BYTEORDER_H
+#define	_SPL_BYTEORDER_H
+
+#include <libkern/OSByteOrder.h>
+#include <machine/byte_order.h>
+
+#define	LE_16(x) OSSwapHostToLittleInt16(x)
+#define	LE_32(x) OSSwapHostToLittleInt32(x)
+#define	LE_64(x) OSSwapHostToLittleInt64(x)
+#define	BE_16(x) OSSwapHostToBigInt16(x)
+#define	BE_32(x) OSSwapHostToBigInt32(x)
+#define	BE_64(x) OSSwapHostToBigInt64(x)
+
+#define	BE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	BE_IN16(xa) \
+	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define	BE_IN32(xa) \
+	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+
+/* 10.8 is lacking in htonll */
+#if !defined(htonll)
+#define	htonll(x)	__DARWIN_OSSwapInt64(x)
+#endif
+#if !defined(ntohll)
+#define	ntohll(x)	__DARWIN_OSSwapInt64(x)
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define	_LITTLE_ENDIAN
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define	_BIG_ENDIAN
+#endif
+
+#endif /* SPL_BYTEORDER_H */
diff --git a/include/os/macos/spl/sys/callb.h b/include/os/macos/spl/sys/callb.h
new file mode 100644
index 0000000000..3d86b9d41c
--- /dev/null
+++ b/include/os/macos/spl/sys/callb.h
@@ -0,0 +1,66 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SPL_CALLB_H
+#define	_SPL_CALLB_H
+
+#include <sys/mutex.h>
+
+#define	CALLB_CPR_ASSERT(cp)	ASSERT(MUTEX_HELD((cp)->cc_lockp));
+
+typedef struct callb_cpr {
+	kmutex_t *cc_lockp;
+} callb_cpr_t;
+
+#define	CALLB_CPR_INIT(cp, lockp, func, name) { \
+		(cp)->cc_lockp = lockp;	\
+}
+
+#define	CALLB_CPR_SAFE_BEGIN(cp) { \
+		CALLB_CPR_ASSERT(cp); \
+}
+
+#define	CALLB_CPR_SAFE_END(cp, lockp) { \
+		CALLB_CPR_ASSERT(cp); \
+}
+
+#define	CALLB_CPR_EXIT(cp) { \
+		ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
+		mutex_exit((cp)->cc_lockp); \
+}
+
+
+#define	CALLOUT_FLAG_ROUNDUP	0x1
+#define	CALLOUT_FLAG_ABSOLUTE	0x2
+#define	CALLOUT_FLAG_HRESTIME	0x4
+#define	CALLOUT_FLAG_32BIT	0x8
+
+/* Move me to more correct "sys/callo.h" file when convenient. */
+#define	CALLOUT_NORMAL 1
+typedef uint64_t callout_id_t;
+callout_id_t timeout_generic(int, void (*)(void *), void *, hrtime_t,
+    hrtime_t, int);
+
+#endif  /* _SPL_CALLB_H */
diff --git a/include/os/macos/spl/sys/cmn_err.h b/include/os/macos/spl/sys/cmn_err.h
new file mode 100644
index 0000000000..e4343a97a7
--- /dev/null
+++ b/include/os/macos/spl/sys/cmn_err.h
@@ -0,0 +1,54 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef _SPL_CMN_ERR_H
+#define	_SPL_CMN_ERR_H
+
+#include <stdarg.h>
+#include <sys/varargs.h>
+
+#define	CE_CONT		0 /* continuation	*/
+#define	CE_NOTE		1 /* notice		*/
+#define	CE_WARN		2 /* warning		*/
+#define	CE_PANIC	3 /* panic		*/
+#define	CE_IGNORE	4 /* print nothing	*/
+
+#ifdef _KERNEL
+
+extern void vcmn_err(int, const char *, __va_list);
+extern void cmn_err(int, const char *, ...);
+
+#endif /* _KERNEL */
+
+#define	fm_panic	panic
+
+#endif /* SPL_CMN_ERR_H */
diff --git a/include/os/macos/spl/sys/condvar.h b/include/os/macos/spl/sys/condvar.h
new file mode 100644
index 0000000000..2955a4c581
--- /dev/null
+++ b/include/os/macos/spl/sys/condvar.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef OSX_CONDVAR_H
+#define	OSX_CONDVAR_H
+
+#include <sys/time.h>
+#include <sys/mutex.h>
+
+#define	hz	10  /* frequency when using gethrtime() >> 23 for lbolt */
+
+typedef enum {
+	CV_DEFAULT,
+	CV_DRIVER
+} kcv_type_t;
+
+
+struct cv {
+	uint64_t   pad;
+};
+
+typedef struct cv  kcondvar_t;
+
+void spl_cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg);
+void spl_cv_destroy(kcondvar_t *cvp);
+void spl_cv_signal(kcondvar_t *cvp);
+void spl_cv_broadcast(kcondvar_t *cvp);
+int	 spl_cv_wait(kcondvar_t *cvp, kmutex_t *mp, int flags, const char *msg);
+int  spl_cv_timedwait(kcondvar_t *, kmutex_t *, clock_t, int, const char *msg);
+int  cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp,
+    hrtime_t tim, hrtime_t res, int flag);
+
+/*
+ * Use these wrapper macros to obtain the CV variable
+ * name to make ZFS more gdb debugging friendly!
+ * This name shows up as a thread's wait_event string.
+ */
+#define	cv_wait(cvp, mp)	\
+	(void) spl_cv_wait((cvp), (mp), PRIBIO, #cvp)
+
+#define	cv_wait_io(cvp, mp)	\
+	(void) spl_cv_wait((cvp), (mp), PRIBIO, #cvp)
+
+#define	cv_timedwait(cvp, mp, tim)	\
+	spl_cv_timedwait((cvp), (mp), (tim), PRIBIO, #cvp)
+
+#define	cv_timedwait_io(cvp, mp, tim)	\
+	spl_cv_timedwait((cvp), (mp), (tim), PRIBIO, #cvp)
+
+#define	cv_wait_interruptible(cvp, mp)	\
+	(void) spl_cv_wait((cvp), (mp), PRIBIO|PCATCH, #cvp)
+
+#define	cv_timedwait_interruptible(cvp, mp, tim)	\
+	spl_cv_timedwait((cvp), (mp), (tim), PRIBIO|PCATCH, #cvp)
+
+/* cv_wait_sig is the correct name for cv_wait_interruptible */
+#define	cv_wait_sig(cvp, mp)	\
+	spl_cv_wait((cvp), (mp), PRIBIO|PCATCH, #cvp)
+
+#define	cv_wait_io_sig(cvp, mp)	\
+	spl_cv_wait((cvp), (mp), PRIBIO|PCATCH, #cvp)
+
+#define	cv_timedwait_sig(cvp, mp, tim)	\
+	spl_cv_timedwait((cvp), (mp), (tim), PRIBIO|PCATCH, #cvp)
+
+#define	TICK_TO_NSEC(tick)	((hrtime_t)(tick) * 1000000000 / hz)
+#define	cv_reltimedwait(cvp, mp, tim, type)	\
+	cv_timedwait_hires((cvp), (mp), TICK_TO_NSEC((tim)), 0, 0)
+
+#define	cv_timedwait_sig_hires(cvp, mp, tim, res, flag)	\
+	cv_timedwait_hires(cvp, mp, tim, res, (flag)|PCATCH)
+
+#define	cv_init	spl_cv_init
+#define	cv_destroy	spl_cv_destroy
+#define	cv_broadcast	spl_cv_broadcast
+#define	cv_signal	spl_cv_signal
+
+#endif
diff --git a/include/os/macos/spl/sys/console.h b/include/os/macos/spl/sys/console.h
new file mode 100644
index 0000000000..57c9622105
--- /dev/null
+++ b/include/os/macos/spl/sys/console.h
@@ -0,0 +1,41 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef	_SPL_SYS_CONSOLE_H
+#define	_SPL_SYS_CONSOLE_H
+
+static inline void
+console_vprintf(const char *fmt, va_list args)
+{
+	vprintf(fmt, args);
+}
+
+static inline void
+console_printf(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	console_vprintf(fmt, args);
+	va_end(args);
+}
+
+#endif /* _SPL_SYS_CONSOLE_H */
diff --git a/include/os/macos/spl/sys/cred.h b/include/os/macos/spl/sys/cred.h
new file mode 100644
index 0000000000..9dd1640b5b
--- /dev/null
+++ b/include/os/macos/spl/sys/cred.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_CRED_H
+#define	_SPL_CRED_H
+
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/kauth.h>
+
+typedef struct ucred cred_t;
+
+#define	kcred	(cred_t *)NOCRED
+#define	CRED()	(cred_t *)kauth_cred_get()
+#define	KUID_TO_SUID(x)		(x)
+#define	KGID_TO_SGID(x)		(x)
+
+#include <AvailabilityMacros.h>
+
+// Older OSX API
+#if !(MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)
+#define	kauth_cred_getruid(x) (x)->cr_ruid
+#define	kauth_cred_getrgid(x) (x)->cr_rgid
+#define	kauth_cred_getsvuid(x) (x)->cr_svuid
+#define	kauth_cred_getsvgid(x) (x)->cr_svgid
+#endif
+
+
+extern void crhold(cred_t *cr);
+extern void crfree(cred_t *cr);
+extern uid_t crgetuid(const cred_t *cr);
+extern uid_t crgetruid(const cred_t *cr);
+extern uid_t crgetsuid(const cred_t *cr);
+extern uid_t crgetfsuid(const cred_t *cr);
+extern gid_t crgetgid(const cred_t *cr);
+extern gid_t crgetrgid(const cred_t *cr);
+extern gid_t crgetsgid(const cred_t *cr);
+extern gid_t crgetfsgid(const cred_t *cr);
+extern int crgetngroups(const cred_t *cr);
+extern gid_t *crgetgroups(const cred_t *cr);
+extern void crgetgroupsfree(gid_t *gids);
+extern int spl_cred_ismember_gid(cred_t *cr, gid_t gid);
+
+#define	crgetsid(cred, i)	(NULL)
+
+#endif  /* _SPL_CRED_H */
diff --git a/include/os/macos/spl/sys/ctype.h b/include/os/macos/spl/sys/ctype.h
new file mode 100644
index 0000000000..7455487330
--- /dev/null
+++ b/include/os/macos/spl/sys/ctype.h
@@ -0,0 +1,27 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_CTYPE_H
+#define	_SPL_CTYPE_H
+
+#define	iscntrl(C)	(uchar(C) <= 0x1f || uchar(C) == 0x7f)
+
+#endif
diff --git a/include/os/macos/spl/sys/debug.h b/include/os/macos/spl/sys/debug.h
new file mode 100644
index 0000000000..61d39d035a
--- /dev/null
+++ b/include/os/macos/spl/sys/debug.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2020 iXsystems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Available Solaris debug functions.  All of the ASSERT() macros will be
+ * compiled out when NDEBUG is defined, this is the default behavior for
+ * the SPL.  To enable assertions use the --enable-debug with configure.
+ * The VERIFY() functions are never compiled out and cannot be disabled.
+ *
+ * PANIC()	- Panic the node and print message.
+ * ASSERT()	- Assert X is true, if not panic.
+ * ASSERT3B()	- Assert boolean X OP Y is true, if not panic.
+ * ASSERT3S()	- Assert signed X OP Y is true, if not panic.
+ * ASSERT3U()	- Assert unsigned X OP Y is true, if not panic.
+ * ASSERT3P()	- Assert pointer X OP Y is true, if not panic.
+ * ASSERT0()	- Assert value is zero, if not panic.
+ * VERIFY()	- Verify X is true, if not panic.
+ * VERIFY3B()	- Verify boolean X OP Y is true, if not panic.
+ * VERIFY3S()	- Verify signed X OP Y is true, if not panic.
+ * VERIFY3U()	- Verify unsigned X OP Y is true, if not panic.
+ * VERIFY3P()	- Verify pointer X OP Y is true, if not panic.
+ * VERIFY0()	- Verify value is zero, if not panic.
+ */
+
+#ifndef _SPL_DEBUG_H
+#define	_SPL_DEBUG_H
+
+#include <sys/types.h>
+
+/*
+ * Common DEBUG functionality.
+ */
+int spl_panic(const char *file, const char *func, int line,
+    const char *fmt, ...);
+void spl_dumpstack(void);
+
+void spl_backtrace(char *thesignal);
+int getpcstack(uintptr_t *pcstack, int pcstack_limit);
+void print_symbol(uintptr_t symbol);
+
+#ifndef expect
+#define	expect(expr, value) (__builtin_expect((expr), (value)))
+#endif
+#define	likely(x)		__builtin_expect(!!(x), 1)
+#define	unlikely(x)		__builtin_expect(!!(x), 0)
+
+#ifndef	__maybe_unused
+#define	__maybe_unused	__attribute__((unused))
+#endif
+
+/* BEGIN CSTYLED */
+#define	PANIC(fmt, a...)						\
+	spl_panic(__FILE__, __FUNCTION__, __LINE__, fmt, ## a)
+
+#define	VERIFY(cond)							\
+	(void) (unlikely(!(cond)) &&					\
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__,			\
+	    "%s", "VERIFY(" #cond ") failed\n"))
+
+#define	VERIFY3B(LEFT, OP, RIGHT)	do {				\
+		boolean_t _verify3_left = (boolean_t)(LEFT);		\
+		boolean_t _verify3_right = (boolean_t)(RIGHT);		\
+		if (!(_verify3_left OP _verify3_right))			\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%d " #OP " %d)\n",				\
+		    (boolean_t) (_verify3_left),			\
+		    (boolean_t) (_verify3_right));			\
+	} while (0)
+
+#define	VERIFY3S(LEFT, OP, RIGHT)	do {				\
+		int64_t _verify3_left = (int64_t)(LEFT);		\
+		int64_t _verify3_right = (int64_t)(RIGHT);		\
+		if (!(_verify3_left OP _verify3_right))			\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%lld " #OP " %lld)\n",			\
+		    (long long) (_verify3_left),			\
+		    (long long) (_verify3_right));			\
+	} while (0)
+
+#define	VERIFY3U(LEFT, OP, RIGHT)	do {				\
+		uint64_t _verify3_left = (uint64_t)(LEFT);		\
+		uint64_t _verify3_right = (uint64_t)(RIGHT);		\
+		if (!(_verify3_left OP _verify3_right))			\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%llu " #OP " %llu)\n",			\
+		    (unsigned long long) (_verify3_left),		\
+		    (unsigned long long) (_verify3_right));		\
+	} while (0)
+
+#define	VERIFY3P(LEFT, OP, RIGHT)	do {				\
+		uintptr_t _verify3_left = (uintptr_t)(LEFT);		\
+		uintptr_t _verify3_right = (uintptr_t)(RIGHT);		\
+		if (!(_verify3_left OP _verify3_right))			\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(" #LEFT " "  #OP " "  #RIGHT ") "		\
+		    "failed (%px " #OP " %px)\n",			\
+		    (void *) (_verify3_left),				\
+		    (void *) (_verify3_right));				\
+	} while (0)
+
+#define	VERIFY0(RIGHT)	do {				\
+		int64_t _verify3_left = (int64_t)(0);		\
+		int64_t _verify3_right = (int64_t)(RIGHT);		\
+		if (!(_verify3_left == _verify3_right))			\
+		    spl_panic(__FILE__, __FUNCTION__, __LINE__,		\
+		    "VERIFY3(0 == " #RIGHT ") "				\
+		    "failed (0 == %lld)\n",				\
+		    (long long) (_verify3_right));			\
+	} while (0)
+
+#define	CTASSERT_GLOBAL(x)		_CTASSERT(x, __LINE__)
+#define	CTASSERT(x)				{ _CTASSERT(x, __LINE__); }
+#define	_CTASSERT(x, y)			__CTASSERT(x, y)
+#define	__CTASSERT(x, y)											\
+	typedef char __attribute__ ((unused))                           \
+	__compile_time_assertion__ ## y[(x) ? 1 : -1]
+
+
+
+/*
+ * Debugging disabled (--disable-debug)
+ */
+#ifdef NDEBUG
+
+#define	ASSERT(x)		((void)0)
+#define	ASSERT3B(x,y,z)		((void)0)
+#define	ASSERT3S(x,y,z)		((void)0)
+#define	ASSERT3U(x,y,z)		((void)0)
+#define	ASSERT3P(x,y,z)		((void)0)
+#define	ASSERT0(x)		((void)0)
+#define	ASSERTV(x)		((void)0)
+#define	IMPLY(A, B)		((void)0)
+#define	EQUIV(A, B)		((void)0)
+
+/*
+ * Debugging enabled (--enable-debug)
+ */
+#else
+
+#define	ASSERT3B	VERIFY3B
+#define	ASSERT3S	VERIFY3S
+#define	ASSERT3U	VERIFY3U
+#define	ASSERT3P	VERIFY3P
+#define	ASSERT0		VERIFY0
+#define	ASSERT		VERIFY
+#define	ASSERTV(X)	X
+#define	IMPLY(A, B) \
+	((void)(((!(A)) || (B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") implies (" #B ")")))
+#define	EQUIV(A, B) \
+	((void)((!!(A) == !!(B)) || \
+	    spl_panic(__FILE__, __FUNCTION__, __LINE__, \
+	    "(" #A ") is equivalent to (" #B ")")))
+/* END CSTYLED */
+
+#endif /* NDEBUG */
+
+#endif /* SPL_DEBUG_H */
diff --git a/include/os/macos/spl/sys/disp.h b/include/os/macos/spl/sys/disp.h
new file mode 100644
index 0000000000..3b1bcbb25c
--- /dev/null
+++ b/include/os/macos/spl/sys/disp.h
@@ -0,0 +1,25 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_DISP_H
+#define	_SPL_DISP_H
+
+#endif
diff --git a/include/os/macos/spl/sys/dkio.h b/include/os/macos/spl/sys/dkio.h
new file mode 100644
index 0000000000..d10314b3e4
--- /dev/null
+++ b/include/os/macos/spl/sys/dkio.h
@@ -0,0 +1,527 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * $FreeBSD$
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _OPENSOLARIS_SYS_DKIO_H_
+#define	_OPENSOLARIS_SYS_DKIO_H_
+
+#include <sys/types.h>	/* Needed for NDKMAP define */
+#include <sys/types32.h>	/* Needed for NDKMAP define */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#if defined(_SUNOS_VTOC_16)
+#define	NDKMAP		16		/* # of logical partitions */
+#define	DK_LABEL_LOC	1		/* location of disk label */
+#elif defined(_SUNOS_VTOC_8)
+#define	NDKMAP		8		/* # of logical partitions */
+#define	DK_LABEL_LOC	0		/* location of disk label */
+#else
+#error "No VTOC format defined."
+#endif
+
+/*
+ * Structures and definitions for disk io control commands
+ */
+
+/*
+ * Structures used as data by ioctl calls.
+ */
+
+#define	DK_DEVLEN	16		/* device name max length, including */
+					/* unit # & NULL (ie - "xyc1") */
+
+/*
+ * Used for controller info
+ */
+struct dk_cinfo {
+	char	dki_cname[DK_DEVLEN];	/* controller name (no unit #) */
+	ushort_t dki_ctype;		/* controller type */
+	ushort_t dki_flags;		/* flags */
+	ushort_t dki_cnum;		/* controller number */
+	uint_t	dki_addr;		/* controller address */
+	uint_t	dki_space;		/* controller bus type */
+	uint_t	dki_prio;		/* interrupt priority */
+	uint_t	dki_vec;		/* interrupt vector */
+	char	dki_dname[DK_DEVLEN];	/* drive name (no unit #) */
+	uint_t	dki_unit;		/* unit number */
+	uint_t	dki_slave;		/* slave number */
+	ushort_t dki_partition;		/* partition number */
+	ushort_t dki_maxtransfer;	/* max. transfer size in DEV_BSIZE */
+};
+
+/*
+ * Controller types
+ */
+#define	DKC_UNKNOWN	0
+#define	DKC_CDROM	1	/* CD-ROM, SCSI or otherwise */
+#define	DKC_WDC2880	2
+#define	DKC_XXX_0	3	/* unassigned */
+#define	DKC_XXX_1	4	/* unassigned */
+#define	DKC_DSD5215	5
+#define	DKC_ACB4000	7
+#define	DKC_MD21	8
+#define	DKC_XXX_2	9	/* unassigned */
+#define	DKC_NCRFLOPPY	10
+#define	DKC_SMSFLOPPY	12
+#define	DKC_SCSI_CCS	13	/* SCSI CCS compatible */
+#define	DKC_INTEL82072	14	/* native floppy chip */
+#define	DKC_MD		16	/* meta-disk (virtual-disk) driver */
+#define	DKC_INTEL82077	19	/* 82077 floppy disk controller */
+#define	DKC_DIRECT	20	/* Intel direct attached device i.e. IDE */
+#define	DKC_PCMCIA_MEM	21	/* PCMCIA memory disk-like type */
+#define	DKC_PCMCIA_ATA	22	/* PCMCIA AT Attached type */
+#define	DKC_VBD		23	/* virtual block device */
+
+/*
+ * Sun reserves up through 1023
+ */
+
+#define	DKC_CUSTOMER_BASE	1024
+
+/*
+ * Flags
+ */
+#define	DKI_BAD144	0x01	/* use DEC std 144 bad sector fwding */
+#define	DKI_MAPTRK	0x02	/* controller does track mapping */
+#define	DKI_FMTTRK	0x04	/* formats only full track at a time */
+#define	DKI_FMTVOL	0x08	/* formats only full volume at a time */
+#define	DKI_FMTCYL	0x10	/* formats only full cylinders at a time */
+#define	DKI_HEXUNIT	0x20	/* unit number is printed as 3 hex digits */
+#define	DKI_PCMCIA_PFD	0x40	/* PCMCIA pseudo-floppy memory card */
+
+/*
+ * partition headers:  section 1
+ * Returned in struct dk_allmap by ioctl DKIOC[SG]APART (dkio(7I))
+ */
+struct dk_map {
+	uint64_t	dkl_cylno;	/* starting cylinder */
+	uint64_t	dkl_nblk;	/* number of blocks;  if == 0, */
+					/* partition is undefined */
+};
+
+/*
+ * Used for all partitions
+ */
+struct dk_allmap {
+	struct dk_map	dka_map[NDKMAP];
+};
+
+#if defined(_SYSCALL32)
+struct dk_allmap32 {
+	struct dk_map32	dka_map[NDKMAP];
+};
+#endif /* _SYSCALL32 */
+
+/*
+ * Definition of a disk's geometry
+ */
+struct dk_geom {
+	unsigned short	dkg_ncyl;	/* # of data cylinders */
+	unsigned short	dkg_acyl;	/* # of alternate cylinders */
+	unsigned short	dkg_bcyl;	/* cyl offset (for fixed head area) */
+	unsigned short	dkg_nhead;	/* # of heads */
+	unsigned short	dkg_obs1;	/* obsolete */
+	unsigned short	dkg_nsect;	/* # of data sectors per track */
+	unsigned short	dkg_intrlv;	/* interleave factor */
+	unsigned short	dkg_obs2;	/* obsolete */
+	unsigned short	dkg_obs3;	/* obsolete */
+	unsigned short	dkg_apc;	/* alternates per cyl (SCSI only) */
+	unsigned short	dkg_rpm;	/* revolutions per minute */
+	unsigned short	dkg_pcyl;	/* # of physical cylinders */
+	unsigned short	dkg_write_reinstruct;	/* # sectors to skip, writes */
+	unsigned short	dkg_read_reinstruct;	/* # sectors to skip, reads */
+	unsigned short	dkg_extra[7];	/* for compatible expansion */
+};
+
+/*
+ * These defines are for historic compatibility with old drivers.
+ */
+#define	dkg_bhead	dkg_obs1	/* used to be head offset */
+#define	dkg_gap1	dkg_obs2	/* used to be gap1 */
+#define	dkg_gap2	dkg_obs3	/* used to be gap2 */
+
+/*
+ * Disk io control commands
+ * Warning: some other ioctls with the DIOC prefix exist elsewhere.
+ * The Generic DKIOC numbers are from	0   -  50.
+ *	The Floppy Driver uses		51  - 100.
+ *	The Hard Disk (except SCSI)	101 - 106.	(these are obsolete)
+ *	The CDROM Driver		151 - 200.
+ *	The USCSI ioctl			201 - 250.
+ */
+#define	DKIOC		(0x04 << 8)
+
+/*
+ * The following ioctls are generic in nature and need to be
+ * supported as appropriate by all disk drivers
+ */
+#define	DKIOCGGEOM	(DKIOC|1)		/* Get geometry */
+#define	DKIOCINFO	(DKIOC|3)		/* Get info */
+#define	DKIOCGVTOC	(DKIOC|11)		/* Get VTOC */
+#define	DKIOCSVTOC	(DKIOC|12)		/* Set VTOC & Write to Disk */
+
+/*
+ * Disk Cache Controls.  These ioctls should be supported by
+ * all disk drivers.
+ *
+ * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl
+ * argument, but it should be passed as NULL to allow for future
+ * reinterpretation.  From user-mode, this ioctl request is synchronous.
+ *
+ * When invoked from within the kernel, the arg can be NULL to indicate
+ * a synchronous request or can be the address of a struct dk_callback
+ * to request an asynchronous callback when the flush request is complete.
+ * In this case, the flag to the ioctl must include FKIOCTL and the
+ * dkc_callback field of the pointed to struct must be non-null or the
+ * request is made synchronously.
+ *
+ * In the callback case: if the ioctl returns 0, a callback WILL be performed.
+ * If the ioctl returns non-zero, a callback will NOT be performed.
+ * NOTE: In some cases, the callback may be done BEFORE the ioctl call
+ * returns.  The caller's locking strategy should be prepared for this case.
+ */
+#define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
+
+struct dk_callback {
+	void (*dkc_callback)(void *dkc_cookie, int error);
+	void *dkc_cookie;
+	int dkc_flag;
+};
+
+/* bit flag definitions for dkc_flag */
+#define	FLUSH_VOLATILE		0x1	/* Bit 0: if set, only flush */
+					/* volatile cache; otherwise, flush */
+					/* volatile and non-volatile cache */
+
+#define	DKIOCGETWCE		(DKIOC|36)	/* Get current write cache */
+						/* enablement status */
+#define	DKIOCSETWCE		(DKIOC|37)	/* Enable/Disable write cache */
+
+/*
+ * The following ioctls are used by Sun drivers to communicate
+ * with their associated format routines. Support of these ioctls
+ * is not required of foreign drivers
+ */
+#define	DKIOCSGEOM	(DKIOC|2)		/* Set geometry */
+#define	DKIOCSAPART	(DKIOC|4)		/* Set all partitions */
+#define	DKIOCGAPART	(DKIOC|5)		/* Get all partitions */
+#define	DKIOCG_PHYGEOM	(DKIOC|32)		/* get physical geometry */
+#define	DKIOCG_VIRTGEOM	(DKIOC|33)		/* get virtual geometry */
+
+/*
+ * The following ioctl's are removable media support
+ */
+#define	DKIOCLOCK	(DKIOC|7)	/* Generic 'lock' */
+#define	DKIOCUNLOCK	(DKIOC|8)	/* Generic 'unlock' */
+#define	DKIOCSTATE	(DKIOC|13)	/* Inquire insert/eject state */
+#define	DKIOCREMOVABLE	(DKIOC|16)	/* is media removable */
+
+
+/*
+ * ioctl for hotpluggable devices
+ */
+#define	DKIOCHOTPLUGGABLE	(DKIOC|35)	/* is hotpluggable */
+
+/*
+ * Ioctl to force driver to re-read the alternate partition and rebuild
+ * the internal defect map.
+ */
+#define	DKIOCADDBAD	(DKIOC|20)	/* Re-read the alternate map (IDE) */
+#define	DKIOCGETDEF	(DKIOC|21)	/* read defect list (IDE)	   */
+
+/*
+ * Used by applications to get disk defect information from IDE
+ * drives.
+ */
+#ifdef _SYSCALL32
+struct defect_header32 {
+	int		head;
+	caddr32_t	buffer;
+};
+#endif /* _SYSCALL32 */
+
+struct defect_header {
+	int		head;
+	caddr_t		buffer;
+};
+
+#define	DKIOCPARTINFO	(DKIOC|22)	/* Get partition or slice parameters */
+
+/*
+ * Used by applications to get partition or slice information
+ */
+#ifdef _SYSCALL32
+struct part_info32 {
+	uint32_t	p_start;
+	int		p_length;
+};
+#endif /* _SYSCALL32 */
+
+struct part_info {
+	uint64_t	p_start;
+	int		p_length;
+};
+
+/* The following ioctls are for Optical Memory Device */
+#define	DKIOC_EBP_ENABLE  (DKIOC|40)	/* enable by pass erase on write */
+#define	DKIOC_EBP_DISABLE (DKIOC|41)	/* disable by pass erase on write */
+
+/*
+ * This state enum is the argument passed to the DKIOCSTATE ioctl.
+ */
+enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE };
+
+#define	DKIOCGMEDIAINFO	(DKIOC|42)	/* get information about the media */
+
+/*
+ * ioctls to read/write mboot info.
+ */
+#define	DKIOCGMBOOT	(DKIOC|43)	/* get mboot info */
+#define	DKIOCSMBOOT	(DKIOC|44)	/* set mboot info */
+
+/*
+ * ioctl to get the device temperature.
+ */
+#define	DKIOCGTEMPERATURE	(DKIOC|45)	/* get temperature */
+
+/*
+ * Used for providing the temperature.
+ */
+
+struct	dk_temperature	{
+	uint_t		dkt_flags;	/* Flags */
+	short		dkt_cur_temp;	/* Current disk temperature */
+	short		dkt_ref_temp;	/* reference disk temperature */
+};
+
+#define	DKT_BYPASS_PM		0x1
+#define	DKT_INVALID_TEMP	0xFFFF
+
+
+/*
+ * Media types or profiles known
+ */
+#define	DK_UNKNOWN		0x00	/* Media inserted - type unknown */
+
+
+/*
+ * SFF 8090 Specification Version 3, media types 0x01 - 0xfffe are retained to
+ * maintain compatibility with SFF8090.  The following define the
+ * optical media type.
+ */
+#define	DK_REMOVABLE_DISK	0x02 /* Removable Disk */
+#define	DK_MO_ERASABLE		0x03 /* MO Erasable */
+#define	DK_MO_WRITEONCE		0x04 /* MO Write once */
+#define	DK_AS_MO		0x05 /* AS MO */
+#define	DK_CDROM		0x08 /* CDROM */
+#define	DK_CDR			0x09 /* CD-R */
+#define	DK_CDRW			0x0A /* CD-RW */
+#define	DK_DVDROM		0x10 /* DVD-ROM */
+#define	DK_DVDR			0x11 /* DVD-R */
+#define	DK_DVDRAM		0x12 /* DVD_RAM or DVD-RW */
+
+/*
+ * Media types for other rewritable magnetic media
+ */
+#define	DK_FIXED_DISK		0x10001	/* Fixed disk SCSI or otherwise */
+#define	DK_FLOPPY		0x10002 /* Floppy media */
+#define	DK_ZIP			0x10003 /* IOMEGA ZIP media */
+#define	DK_JAZ			0x10004 /* IOMEGA JAZ media */
+
+#define	DKIOCSETEFI	(DKIOC|17)		/* Set EFI info */
+#define	DKIOCGETEFI	(DKIOC|18)		/* Get EFI info */
+
+#define	DKIOCPARTITION	(DKIOC|9)		/* Get partition info */
+
+/*
+ * Ioctls to get/set volume capabilities related to Logical Volume Managers.
+ * They include the ability to get/set capabilities and to issue a read to a
+ * specific underlying device of a replicated device.
+ */
+
+#define	DKIOCGETVOLCAP	(DKIOC | 25)	/* Get volume capabilities */
+#define	DKIOCSETVOLCAP	(DKIOC | 26)	/* Set volume capabilities */
+#define	DKIOCDMR	(DKIOC | 27)	/* Issue a directed read */
+
+typedef uint_t volcapinfo_t;
+
+typedef uint_t volcapset_t;
+
+#define	DKV_ABR_CAP 0x00000001		/* Support Appl.Based Recovery */
+#define	DKV_DMR_CAP 0x00000002		/* Support Directed  Mirror Read */
+
+typedef struct volcap {
+	volcapinfo_t vc_info;	/* Capabilities available */
+	volcapset_t vc_set;	/* Capabilities set */
+} volcap_t;
+
+#define	VOL_SIDENAME 256
+
+typedef struct vol_directed_rd {
+	int		vdr_flags;
+	offset_t	vdr_offset;
+	size_t		vdr_nbytes;
+	size_t		vdr_bytesread;
+	void		*vdr_data;
+	int		vdr_side;
+	char		vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd_t;
+
+#define	DKV_SIDE_INIT		(-1)
+#define	DKV_DMR_NEXT_SIDE	0x00000001
+#define	DKV_DMR_DONE		0x00000002
+#define	DKV_DMR_ERROR		0x00000004
+#define	DKV_DMR_SUCCESS		0x00000008
+#define	DKV_DMR_SHORT		0x00000010
+
+#ifdef _MULTI_DATAMODEL
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+typedef struct vol_directed_rd32 {
+	int32_t		vdr_flags;
+	offset_t	vdr_offset;	/* 64-bit element on 32-bit alignment */
+	size32_t	vdr_nbytes;
+	size32_t	vdr_bytesread;
+	caddr32_t	vdr_data;
+	int32_t		vdr_side;
+	char		vdr_side_name[VOL_SIDENAME];
+} vol_directed_rd32_t;
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+#endif	/* _MULTI_DATAMODEL */
+
+/*
+ * The ioctl is used to fetch disk's device type, vendor ID,
+ * model number/product ID, firmware revision and serial number together.
+ *
+ * Currently there are two device types - DKD_ATA_TYPE which means the
+ * disk is driven by cmdk/ata or dad/uata driver, and DKD_SCSI_TYPE
+ * which means the disk is driven by sd/scsi hba driver.
+ */
+#define	DKIOC_GETDISKID	(DKIOC|46)
+
+/* These two labels are for dkd_dtype of dk_disk_id_t */
+#define	DKD_ATA_TYPE	0x01 /* ATA disk or legacy mode SATA disk */
+#define	DKD_SCSI_TYPE	0x02 /* SCSI disk or native mode SATA disk */
+
+#define	DKD_ATA_MODEL	40	/* model number length */
+#define	DKD_ATA_FWVER	8	/* firmware revision length */
+#define	DKD_ATA_SERIAL	20	/* serial number length */
+
+#define	DKD_SCSI_VENDOR	8	/* vendor ID length */
+#define	DKD_SCSI_PRODUCT 16	/* product ID length */
+#define	DKD_SCSI_REVLEVEL 4	/* revision level length */
+#define	DKD_SCSI_SERIAL 12	/* serial number length */
+
+/*
+ * The argument type for DKIOC_GETDISKID ioctl.
+ */
+typedef struct dk_disk_id {
+	uint_t	dkd_dtype;
+	union {
+		struct {
+			char dkd_amodel[DKD_ATA_MODEL];		/* 40 bytes */
+			char dkd_afwver[DKD_ATA_FWVER];		/* 8 bytes */
+			char dkd_aserial[DKD_ATA_SERIAL];	/* 20 bytes */
+		} ata_disk_id;
+		struct {
+			char dkd_svendor[DKD_SCSI_VENDOR];	/* 8 bytes */
+			char dkd_sproduct[DKD_SCSI_PRODUCT];	/* 16 bytes */
+			char dkd_sfwver[DKD_SCSI_REVLEVEL];	/* 4 bytes */
+			char dkd_sserial[DKD_SCSI_SERIAL];	/* 12 bytes */
+		} scsi_disk_id;
+	} disk_id;
+} dk_disk_id_t;
+
+/*
+ * The ioctl is used to update the firmware of device.
+ */
+#define	DKIOC_UPDATEFW		(DKIOC|47)
+
+/* The argument type for DKIOC_UPDATEFW ioctl */
+typedef struct dk_updatefw {
+	caddr_t		dku_ptrbuf;	/* pointer to firmware buf */
+	uint_t		dku_size;	/* firmware buf length */
+	uint8_t		dku_type;	/* firmware update type */
+} dk_updatefw_t;
+
+#ifdef _SYSCALL32
+typedef struct dk_updatefw_32 {
+	caddr32_t	dku_ptrbuf;	/* pointer to firmware buf */
+	uint_t		dku_size;	/* firmware buf length */
+	uint8_t		dku_type;	/* firmware update type */
+} dk_updatefw_32_t;
+#endif /* _SYSCALL32 */
+
+/*
+ * firmware update type - temporary or permanent use
+ */
+#define	FW_TYPE_TEMP	0x0		/* temporary use */
+#define	FW_TYPE_PERM	0x1		/* permanent use */
+
+#define	DKIOC		(0x04 << 8)
+#define	DKIOCTRIM	(DKIOC | 35)
+
+/*
+ * ioctl to free space (e.g. SCSI UNMAP) off a disk.
+ * Pass a dkioc_free_list_t containing a list of extents to be freed.
+ */
+#define	DKIOCFREE	(DKIOC|50)
+
+#define	DF_WAIT_SYNC	0x00000001	/* Wait for full write-out of free. */
+
+typedef struct dkioc_free_list_ext_s {
+	uint64_t	dfle_start;
+	uint64_t	dfle_length;
+} dkioc_free_list_ext_t;
+
+typedef struct dkioc_free_list_s {
+	uint64_t	dfl_flags;
+	uint64_t	dfl_num_exts;
+	uint64_t	dfl_offset;
+	dkioc_free_list_ext_t	dfl_exts[1];
+} dkioc_free_list_t;
+#define	DFL_SZ(num_exts) \
+	(sizeof (dkioc_free_list_t) + \
+	(num_exts - 1) * sizeof (dkioc_free_list_ext_t))
+
+/* Frees a variable-length dkioc_free_list_t structure. */
+static inline void
+dfl_free(dkioc_free_list_t *dfl)
+{
+	kmem_free(dfl, DFL_SZ(dfl->dfl_num_exts));
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _OPENSOLARIS_SYS_DKIO_H_ */
diff --git a/include/os/macos/spl/sys/errno.h b/include/os/macos/spl/sys/errno.h
new file mode 100644
index 0000000000..67574721cc
--- /dev/null
+++ b/include/os/macos/spl/sys/errno.h
@@ -0,0 +1,29 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include_next <sys/errno.h>
+
+#define	EBADE		EBADMACHO
+#define	ECKSUM		EBADE
+#define	EFRAGS		EIDRM
+#define	EREMOTEIO	ENOLINK
+#define	ENOTACTIVE	ENOPOLICY
+#define	ECHRNG		EMULTIHOP
diff --git a/include/os/macos/spl/sys/fcntl.h b/include/os/macos/spl/sys/fcntl.h
new file mode 100644
index 0000000000..5f13d304fc
--- /dev/null
+++ b/include/os/macos/spl/sys/fcntl.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_FCNTL_H
+#define	_SPL_FCNTL_H
+
+#include_next <sys/fcntl.h>
+
+#define	F_FREESP		11
+
+#define	O_LARGEFILE		0
+#define	O_RSYNC			0
+#define	O_DIRECT		0
+
+#endif /* _SPL_FCNTL_H */
diff --git a/include/os/macos/spl/sys/file.h b/include/os/macos/spl/sys/file.h
new file mode 100644
index 0000000000..136e8f3bfb
--- /dev/null
+++ b/include/os/macos/spl/sys/file.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_FILE_H
+#define	_SPL_FILE_H
+
+#define	FIGNORECASE			0x00080000
+#define	FKIOCTL				0x80000000
+#define	ED_CASE_CONFLICT	0x10
+
+#include <sys/list.h>
+
+/*
+ * XNU has all the proc structs as opaque and with no functions we
+ * are allowed to call, so we implement file IO from within the kernel
+ * as vnode operations.
+ * The second mode is when we are given a "fd" from userland, which we
+ * map in here, using getf()/releasef().
+ * When it comes to IO, if "fd" is set, we use it (fo_rdwr()) as it
+ * can handle both files, and pipes.
+ * In kernel space file ops, we use vn_rdwr on the vnode.
+ */
+struct spl_fileproc {
+	void		*f_vnode;	/* underlying vnode */
+	list_node_t	f_next;		/* * next getf() link for releasef() */
+	int		f_fd;		/* * userland file descriptor */
+	off_t		f_offset;	/* offset for stateful IO */
+	void		*f_proc;	/* opaque */
+	void		*f_fp;		/* opaque */
+	int		f_writes;	/* did write? for close sync */
+	minor_t		f_file;		/* minor of the file */
+	void		*f_private;	/* zfsdev_state_t */
+};
+/* Members with '*' are not used when 'fd' is not given */
+
+void *getf(int fd);
+void releasef(int fd);
+struct vnode *getf_vnode(void *fp);
+
+#endif /* SPL_FILE_H */
diff --git a/include/os/macos/spl/sys/inttypes.h b/include/os/macos/spl/sys/inttypes.h
new file mode 100644
index 0000000000..c9f6a316aa
--- /dev/null
+++ b/include/os/macos/spl/sys/inttypes.h
@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_INTTYPES_H
+#define	_SPL_INTTYPES_H
+
+#endif /* SPL_INTTYPES_H */
diff --git a/include/os/macos/spl/sys/isa_defs.h b/include/os/macos/spl/sys/isa_defs.h
new file mode 100644
index 0000000000..f702dc51e1
--- /dev/null
+++ b/include/os/macos/spl/sys/isa_defs.h
@@ -0,0 +1,690 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_ISA_DEFS_H
+#define	_SYS_ISA_DEFS_H
+
+/*
+ * This header file serves to group a set of well known defines and to
+ * set these for each instruction set architecture.  These defines may
+ * be divided into two groups;  characteristics of the processor and
+ * implementation choices for Solaris on a processor.
+ *
+ * Processor Characteristics:
+ *
+ * _LITTLE_ENDIAN / _BIG_ENDIAN:
+ *	The natural byte order of the processor.  A pointer to an int points
+ *	to the least/most significant byte of that int.
+ *
+ * _STACK_GROWS_UPWARD / _STACK_GROWS_DOWNWARD:
+ *	The processor specific direction of stack growth.  A push onto the
+ *	stack increases/decreases the stack pointer, so it stores data at
+ *	successively higher/lower addresses.  (Stackless machines ignored
+ *	without regrets).
+ *
+ * _LONG_LONG_HTOL / _LONG_LONG_LTOH:
+ *	A pointer to a long long points to the most/least significant long
+ *	within that long long.
+ *
+ * _BIT_FIELDS_HTOL / _BIT_FIELDS_LTOH:
+ *	The C compiler assigns bit fields from the high/low to the low/high end
+ *	of an int (most to least significant vs. least to most significant).
+ *
+ * _IEEE_754:
+ *	The processor (or supported implementations of the processor)
+ *	supports the ieee-754 floating point standard.  No other floating
+ *	point standards are supported (or significant).  Any other supported
+ *	floating point formats are expected to be cased on the ISA processor
+ *	symbol.
+ *
+ * _CHAR_IS_UNSIGNED / _CHAR_IS_SIGNED:
+ *	The C Compiler implements objects of type `char' as `unsigned' or
+ *	`signed' respectively.  This is really an implementation choice of
+ *	the compiler writer, but it is specified in the ABI and tends to
+ *	be uniform across compilers for an instruction set architecture.
+ *	Hence, it has the properties of a processor characteristic.
+ *
+ * _CHAR_ALIGNMENT / _SHORT_ALIGNMENT / _INT_ALIGNMENT / _LONG_ALIGNMENT /
+ * _LONG_LONG_ALIGNMENT / _DOUBLE_ALIGNMENT / _LONG_DOUBLE_ALIGNMENT /
+ * _POINTER_ALIGNMENT / _FLOAT_ALIGNMENT:
+ *	The ABI defines alignment requirements of each of the primitive
+ *	object types.  Some, if not all, may be hardware requirements as
+ * 	well.  The values are expressed in "byte-alignment" units.
+ *
+ * _MAX_ALIGNMENT:
+ *	The most stringent alignment requirement as specified by the ABI.
+ *	Equal to the maximum of all the above _XXX_ALIGNMENT values.
+ *
+ * _ALIGNMENT_REQUIRED:
+ *	True or false (1 or 0) whether or not the hardware requires the ABI
+ *	alignment.
+ *
+ * _LONG_LONG_ALIGNMENT_32
+ *	The 32-bit ABI supported by a 64-bit kernel may have different
+ *	alignment requirements for primitive object types.  The value of this
+ *	identifier is expressed in "byte-alignment" units.
+ *
+ * _HAVE_CPUID_INSN
+ *	This indicates that the architecture supports the 'cpuid'
+ *	instruction as defined by Intel.  (Intel allows other vendors
+ *	to extend the instruction for their own purposes.)
+ *
+ *
+ * Implementation Choices:
+ *
+ * _ILP32 / _LP64:
+ *	This specifies the compiler data type implementation as specified in
+ *	the relevant ABI.  The choice between these is strongly influenced
+ *	by the underlying hardware, but is not absolutely tied to it.
+ *	Currently only two data type models are supported:
+ *
+ *	_ILP32:
+ *		Int/Long/Pointer are 32 bits.  This is the historical UNIX
+ *		and Solaris implementation.  Due to its historical standing,
+ *		this is the default case.
+ *
+ *	_LP64:
+ *		Long/Pointer are 64 bits, Int is 32 bits.  This is the chosen
+ *		implementation for 64-bit ABIs such as SPARC V9.
+ *
+ *	_I32LPx:
+ *		A compilation environment where 'int' is 32-bit, and
+ *		longs and pointers are simply the same size.
+ *
+ *	In all cases, Char is 8 bits and Short is 16 bits.
+ *
+ * _SUNOS_VTOC_8 / _SUNOS_VTOC_16 / _SVR4_VTOC_16:
+ *	This specifies the form of the disk VTOC (or label):
+ *
+ *	_SUNOS_VTOC_8:
+ *		This is a VTOC form which is upwardly compatible with the
+ *		SunOS 4.x disk label and allows 8 partitions per disk.
+ *
+ *	_SUNOS_VTOC_16:
+ *		In this format the incore vtoc image matches the ondisk
+ *		version.  It allows 16 slices per disk, and is not
+ *		compatible with the SunOS 4.x disk label.
+ *
+ *	Note that these are not the only two VTOC forms possible and
+ *	additional forms may be added.  One possible form would be the
+ *	SVr4 VTOC form.  The symbol for that is reserved now, although
+ *	it is not implemented.
+ *
+ *	_SVR4_VTOC_16:
+ *		This VTOC form is compatible with the System V Release 4
+ *		VTOC (as implemented on the SVr4 Intel and 3b ports) with
+ *		16 partitions per disk.
+ *
+ *
+ * _DMA_USES_PHYSADDR / _DMA_USES_VIRTADDR
+ *	This describes the type of addresses used by system DMA:
+ *
+ *	_DMA_USES_PHYSADDR:
+ *		This type of DMA, used in the x86 implementation,
+ *		requires physical addresses for DMA buffers.  The 24-bit
+ *		addresses used by some legacy boards is the source of the
+ *		"low-memory" (<16MB) requirement for some devices using DMA.
+ *
+ *	_DMA_USES_VIRTADDR:
+ *		This method of DMA allows the use of virtual addresses for
+ *		DMA transfers.
+ *
+ * _FIRMWARE_NEEDS_FDISK / _NO_FDISK_PRESENT
+ *      This indicates the presence/absence of an fdisk table.
+ *
+ *      _FIRMWARE_NEEDS_FDISK
+ *              The fdisk table is required by system firmware.  If present,
+ *              it allows a disk to be subdivided into multiple fdisk
+ *              partitions, each of which is equivalent to a separate,
+ *              virtual disk.  This enables the co-existence of multiple
+ *              operating systems on a shared hard disk.
+ *
+ *      _NO_FDISK_PRESENT
+ *              If the fdisk table is absent, it is assumed that the entire
+ *              media is allocated for a single operating system.
+ *
+ * _HAVE_TEM_FIRMWARE
+ *	Defined if this architecture has the (fallback) option of
+ *	using prom_* calls for doing I/O if a suitable kernel driver
+ *	is not available to do it.
+ *
+ * _DONT_USE_1275_GENERIC_NAMES
+ *		Controls whether or not device tree node names should
+ *		comply with the IEEE 1275 "Generic Names" Recommended
+ *		Practice. With _DONT_USE_GENERIC_NAMES, device-specific
+ *		names identifying the particular device will be used.
+ *
+ * __i386_COMPAT
+ *	This indicates whether the i386 ABI is supported as a *non-native*
+ *	mode for the platform.  When this symbol is defined:
+ *	-	32-bit xstat-style system calls are enabled
+ *	-	32-bit xmknod-style system calls are enabled
+ *	-	32-bit system calls use i386 sizes -and- alignments
+ *
+ *	Note that this is NOT defined for the i386 native environment!
+ *
+ * __x86
+ *	This is ONLY a synonym for defined(__i386) || defined(__amd64)
+ *	which is useful only insofar as these two architectures share
+ *	common attributes.  Analogous to __sparc.
+ *
+ * _PSM_MODULES
+ *	This indicates whether or not the implementation uses PSM
+ *	modules for processor support, reading /etc/mach from inside
+ *	the kernel to extract a list.
+ *
+ * _RTC_CONFIG
+ *	This indicates whether or not the implementation uses /etc/rtc_config
+ *	to configure the real-time clock in the kernel.
+ *
+ * _UNIX_KRTLD
+ *	This indicates that the implementation uses a dynamically
+ *	linked unix + krtld to form the core kernel image at boot
+ *	time, or (in the absence of this symbol) a prelinked kernel image.
+ *
+ * _OBP
+ *	This indicates the firmware interface is OBP.
+ *
+ * _SOFT_HOSTID
+ *	This indicates that the implementation obtains the hostid
+ *	from the file /etc/hostid, rather than from hardware.
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The following set of definitions characterize Solaris on AMD's
+ * 64-bit systems.
+ */
+#if defined(__x86_64) || defined(__amd64)
+
+#if !defined(__amd64)
+#define	__amd64		/* preferred guard */
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+/*
+ * Different alignment constraints for the i386 ABI in compatibility mode
+ */
+#define	_LONG_LONG_ALIGNMENT_32		4
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_MULTI_DATAMODEL
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	__i386_COMPAT
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_SOFT_HOSTID
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+/*
+ * The feature test macro __i386 is generic for all processors implementing
+ * the Intel 386 instruction set or a superset of it.  Specifically, this
+ * includes all members of the 386, 486, and Pentium family of processors.
+ */
+#elif defined(__i386) || defined(__i386__)
+
+#if !defined(__i386)
+#define	__i386
+#endif
+
+#if !defined(__x86)
+#define	__x86
+#endif
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_SOFT_HOSTID
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__aarch64__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_UNSIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__riscv)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_UNSIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+#define	_ALIGNMENT_REQUIRED		1
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#define	_LP64
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__arm__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__mips__)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_LTOH
+#define	_BIT_FIELDS_LTOH
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#if defined(__mips_n64)
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_LONG_DOUBLE_ALIGNMENT		8
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			8
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_INT_ALIGNMENT
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#else
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_LONG_ALIGNMENT		4
+#define	_DOUBLE_ALIGNMENT		4
+#define	_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_LONG_DOUBLE_ALIGNMENT		4
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	4
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			4
+#define	_ALIGNMENT_REQUIRED		0
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices".
+ */
+#if !defined(_ILP32)
+#define	_ILP32
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#endif
+#define	_SUNOS_VTOC_16
+#define	_DMA_USES_PHYSADDR
+#define	_FIRMWARE_NEEDS_FDISK
+#define	_PSM_MODULES
+#define	_RTC_CONFIG
+#define	_DONT_USE_1275_GENERIC_NAMES
+#define	_HAVE_CPUID_INSN
+
+#elif defined(__powerpc__)
+
+#if defined(__BIG_ENDIAN__)
+#define	_BIT_FIELDS_HTOL
+#else
+#define	_BIT_FIELDS_LTOH
+#endif
+
+/*
+ * The following set of definitions characterize the Solaris on SPARC systems.
+ *
+ * The symbol __sparc indicates any of the SPARC family of processor
+ * architectures.  This includes SPARC V7, SPARC V8 and SPARC V9.
+ *
+ * The symbol __sparcv8 indicates the 32-bit SPARC V8 architecture as defined
+ * by Version 8 of the SPARC Architecture Manual.  (SPARC V7 is close enough
+ * to SPARC V8 for the former to be subsumed into the latter definition.)
+ *
+ * The symbol __sparcv9 indicates the 64-bit SPARC V9 architecture as defined
+ * by Version 9 of the SPARC Architecture Manual.
+ *
+ * The symbols __sparcv8 and __sparcv9 are mutually exclusive, and are only
+ * relevant when the symbol __sparc is defined.
+ */
+/*
+ * XXX Due to the existence of 5110166, "defined(__sparcv9)" needs to be added
+ * to support backwards builds.  This workaround should be removed in s10_71.
+ */
+#elif defined(__sparc) || defined(__sparcv9) || defined(__sparc__)
+#if !defined(__sparc)
+#define	__sparc
+#endif
+
+/*
+ * You can be 32-bit or 64-bit, but not both at the same time.
+ */
+#if defined(__sparcv8) && defined(__sparcv9)
+#error	"SPARC Versions 8 and 9 are mutually exclusive choices"
+#endif
+
+/*
+ * Existing compilers do not set __sparcv8.  Years will transpire before
+ * the compilers can be depended on to set the feature test macro. In
+ * the interim, we'll set it here on the basis of historical behaviour;
+ * if you haven't asked for SPARC V9, then you must've meant SPARC V8.
+ */
+#if !defined(__sparcv9) && !defined(__sparcv8)
+#define	__sparcv8
+#endif
+
+/*
+ * Define the appropriate "processor characteristics" shared between
+ * all Solaris on SPARC systems.
+ */
+#define	_STACK_GROWS_DOWNWARD
+#define	_LONG_LONG_HTOL
+#define	_BIT_FIELDS_HTOL
+#define	_IEEE_754
+#define	_CHAR_IS_SIGNED
+#define	_BOOL_ALIGNMENT			1
+#define	_CHAR_ALIGNMENT			1
+#define	_SHORT_ALIGNMENT		2
+#define	_INT_ALIGNMENT			4
+#define	_FLOAT_ALIGNMENT		4
+#define	_FLOAT_COMPLEX_ALIGNMENT	4
+#define	_LONG_LONG_ALIGNMENT		8
+#define	_DOUBLE_ALIGNMENT		8
+#define	_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_ALIGNMENT_REQUIRED		1
+
+/*
+ * Define the appropriate "implementation choices" shared between versions.
+ */
+#define	_SUNOS_VTOC_8
+#define	_DMA_USES_VIRTADDR
+#define	_NO_FDISK_PRESENT
+#define	_HAVE_TEM_FIRMWARE
+#define	_OBP
+
+/*
+ * The following set of definitions characterize the implementation of
+ * 32-bit Solaris on SPARC V8 systems.
+ */
+#if defined(__sparcv8)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_LONG_ALIGNMENT			4
+#define	_LONG_DOUBLE_ALIGNMENT		8
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	8
+#define	_POINTER_ALIGNMENT		4
+#define	_MAX_ALIGNMENT			8
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#define	_ILP32
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+
+/*
+ * The following set of definitions characterize the implementation of
+ * 64-bit Solaris on SPARC V9 systems.
+ */
+#elif defined(__sparcv9)
+
+/*
+ * Define the appropriate "processor characteristics"
+ */
+#define	_LONG_ALIGNMENT			8
+#define	_LONG_DOUBLE_ALIGNMENT		16
+#define	_LONG_DOUBLE_COMPLEX_ALIGNMENT	16
+#define	_POINTER_ALIGNMENT		8
+#define	_MAX_ALIGNMENT			16
+
+#define	_LONG_LONG_ALIGNMENT_32		_LONG_LONG_ALIGNMENT
+
+/*
+ * Define the appropriate "implementation choices"
+ */
+#if !defined(_LP64)
+#error "_LP64 not defined"
+#endif
+#if !defined(_I32LPx)
+#define	_I32LPx
+#endif
+#define	_MULTI_DATAMODEL
+
+#else
+#error	"unknown SPARC version"
+#endif
+
+/*
+ * #error is strictly ansi-C, but works as well as anything for K&R systems.
+ */
+#else
+#error "ISA not supported"
+#endif
+
+#if defined(_ILP32) && defined(_LP64)
+#error "Both _ILP32 and _LP64 are defined"
+#endif
+
+#define	____cacheline_aligned	__attribute__((aligned(64)))
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ISA_DEFS_H */
diff --git a/include/os/macos/spl/sys/kmem.h b/include/os/macos/spl/sys/kmem.h
new file mode 100644
index 0000000000..86287060cc
--- /dev/null
+++ b/include/os/macos/spl/sys/kmem.h
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS Project
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ * Copyright (C) 2017 Sean Doran <smd@use.net>
+ *
+ */
+
+#ifndef _SPL_KMEM_H
+#define	_SPL_KMEM_H
+
+#include <sys/atomic.h>
+#include <sys/types.h>
+#include <sys/vmsystm.h>
+#include <sys/kstat.h>
+#include <sys/malloc.h>
+#include <sys/list.h>
+#include <sys/vmem.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+// XNU total amount of memory
+extern uint64_t physmem;
+
+#define	KM_SLEEP	0x0000	/* can block for memory; success guaranteed */
+#define	KM_NOSLEEP	0x0001	/* cannot block for memory; may fail */
+#define	KM_PANIC	0x0002	/* if memory cannot be allocated, panic */
+#define	KM_PUSHPAGE	0x0004	/* can block for memory; may use reserve */
+#define	KM_NORMALPRI	0x0008  /* with KM_NOSLEEP, lower priority allocation */
+#define	KM_NODEBUG	0x0010  /* NOT IMPLEMENTED ON OSX */
+#define	KM_NO_VBA	0x0020  /* OSX: don't descend to the bucket layer */
+#define	KM_VMFLAGS	0x00ff	/* flags that must match VM_* flags */
+
+#define	KM_FLAGS	0xffff	/* all settable kmem flags */
+
+/*
+ * Kernel memory allocator: DDI interfaces.
+ * See kmem_alloc(9F) for details.
+ */
+
+// Work around symbol collisions in XNU
+#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
+#define	kmem_zalloc(size, kmflags)	zfs_kmem_zalloc((size), (kmflags))
+#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
+
+void *zfs_kmem_alloc(size_t size, int kmflags);
+void *zfs_kmem_zalloc(size_t size, int kmflags);
+void zfs_kmem_free(void *buf, size_t size);
+
+void spl_kmem_init(uint64_t);
+void spl_kmem_thread_init(void);
+void spl_kmem_mp_init(void);
+void spl_kmem_thread_fini(void);
+void spl_kmem_fini(void);
+
+size_t kmem_size(void);
+size_t kmem_used(void);
+int64_t kmem_avail(void);
+size_t kmem_num_pages_wanted(void);
+int	spl_vm_pool_low(void);
+int32_t spl_minimal_physmem_p(void);
+int64_t spl_adjust_pressure(int64_t);
+int64_t spl_free_wrapper(void);
+int64_t spl_free_manual_pressure_wrapper(void);
+boolean_t spl_free_fast_pressure_wrapper(void);
+void spl_free_set_pressure(int64_t);
+void spl_free_set_fast_pressure(boolean_t);
+uint64_t spl_free_last_pressure_wrapper(void);
+
+#define	KMC_NOTOUCH	0x00010000
+#define	KMC_NODEBUG	0x00020000
+#define	KMC_NOMAGAZINE	0x00040000
+#define	KMC_NOHASH	0x00080000
+#define	KMC_QCACHE	0x00100000
+#define	KMC_KMEM_ALLOC	0x00200000	/* internal use only */
+#define	KMC_IDENTIFIER	0x00400000	/* internal use only */
+#define	KMC_PREFILL	0x00800000
+#define	KMC_ARENA_SLAB	0x01000000	/* use a bigger kmem cache */
+
+struct kmem_cache;
+
+typedef struct kmem_cache kmem_cache_t;
+
+/* Client response to kmem move callback */
+typedef enum kmem_cbrc {
+	KMEM_CBRC_YES,
+	KMEM_CBRC_NO,
+	KMEM_CBRC_LATER,
+	KMEM_CBRC_DONT_NEED,
+	KMEM_CBRC_DONT_KNOW
+} kmem_cbrc_t;
+
+#define	POINTER_IS_VALID(p)	(!((uintptr_t)(p) & 0x3))
+#define	POINTER_INVALIDATE(pp)	(*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1))
+
+kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
+    int (*constructor)(void *, void *, int),
+    void (*destructor)(void *, void *),
+    void (*reclaim)(void *),
+    void *_private, vmem_t *vmp, int cflags);
+void kmem_cache_destroy(kmem_cache_t *cache);
+void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
+void kmem_cache_free(kmem_cache_t *cache, void *buf);
+void kmem_cache_free_to_slab(kmem_cache_t *cache, void *buf);
+extern boolean_t kmem_cache_reap_active(void);
+void kmem_cache_reap_now(kmem_cache_t *cache);
+void kmem_depot_ws_zero(kmem_cache_t *cache);
+void kmem_reap(void);
+void kmem_reap_idspace(void);
+kmem_cache_t *kmem_cache_buf_in_cache(kmem_cache_t *, void *);
+
+int kmem_debugging(void);
+void kmem_cache_set_move(kmem_cache_t *,
+    kmem_cbrc_t (*)(void *, void *, size_t, void *));
+
+char *kmem_asprintf(const char *fmt, ...);
+extern char *kmem_strdup(const char *str);
+extern void kmem_strfree(char *str);
+char *kmem_vasprintf(const char *fmt, va_list ap);
+char *kmem_strstr(const char *in, const char *str);
+void strident_canon(char *s, size_t n);
+
+boolean_t spl_arc_no_grow(size_t, boolean_t, kmem_cache_t **);
+
+extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
+extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SPL_KMEM_H */
diff --git a/include/os/macos/spl/sys/kmem_cache.h b/include/os/macos/spl/sys/kmem_cache.h
new file mode 100644
index 0000000000..2dc08b1712
--- /dev/null
+++ b/include/os/macos/spl/sys/kmem_cache.h
@@ -0,0 +1,25 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_KMEM_CACHE_H
+#define	_SPL_KMEM_CACHE_H
+
+#endif
diff --git a/include/os/macos/spl/sys/kmem_impl.h b/include/os/macos/spl/sys/kmem_impl.h
new file mode 100644
index 0000000000..2f3fa7f9da
--- /dev/null
+++ b/include/os/macos/spl/sys/kmem_impl.h
@@ -0,0 +1,494 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _SYS_KMEM_IMPL_H
+#define	_SYS_KMEM_IMPL_H
+
+#include <sys/kmem.h>
+#include <sys/vmem.h>
+#include <sys/thread.h>
+#include <sys/time.h>
+#include <sys/kstat.h>
+#include <sys/systm.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * kernel memory allocator: implementation-private data structures
+ *
+ * Lock order:
+ * 1. cache_lock
+ * 2. cc_lock in order by CPU ID
+ * 3. cache_depot_lock
+ *
+ * Do not call kmem_cache_alloc() or taskq_dispatch() while holding any of the
+ * above locks.
+ */
+
+#define	KMF_AUDIT	0x00000001 /* transaction auditing */
+#define	KMF_DEADBEEF	0x00000002 /* deadbeef checking */
+#define	KMF_REDZONE	0x00000004 /* redzone checking */
+#define	KMF_CONTENTS	0x00000008 /* freed-buffer content logging */
+#define	KMF_STICKY	0x00000010 /* if set, override /etc/system */
+#define	KMF_NOMAGAZINE	0x00000020 /* disable per-cpu magazines */
+#define	KMF_FIREWALL	0x00000040 /* put all bufs before unmapped pages */
+#define	KMF_LITE	0x00000100 /* lightweight debugging */
+
+#define	KMF_HASH	0x00000200 /* cache has hash table */
+#define	KMF_RANDOMIZE	0x00000400 /* randomize other kmem_flags */
+
+#define	KMF_DUMPDIVERT	0x00001000 /* use alternate memory at dump time */
+#define	KMF_DUMPUNSAFE	0x00002000 /* flag caches used at dump time */
+#define	KMF_PREFILL	0x00004000 /* Prefill the slab when created. */
+
+#define	KMF_BUFTAG	(KMF_DEADBEEF | KMF_REDZONE)
+#define	KMF_TOUCH	(KMF_BUFTAG | KMF_LITE | KMF_CONTENTS)
+#define	KMF_RANDOM	(KMF_TOUCH | KMF_AUDIT | KMF_NOMAGAZINE)
+#define	KMF_DEBUG	(KMF_RANDOM | KMF_FIREWALL)
+
+#define	KMEM_STACK_DEPTH	15
+
+#define	KMEM_FREE_PATTERN		0xdeadbeefdeadbeefULL
+#define	KMEM_UNINITIALIZED_PATTERN	0xbaddcafebaddcafeULL
+#define	KMEM_REDZONE_PATTERN		0xfeedfacefeedfaceULL
+#define	KMEM_REDZONE_BYTE		0xbb
+
+/*
+ * Upstream platforms handle size == 0 as valid alloc, we
+ * can not return NULL, as that invalidates KM_SLEEP. So
+ * we return a valid hardcoded address, instead of actually taking up
+ * memory by fudging size to 1 byte. If read/writes are
+ * attempted, we will get page fault (which is correct, they
+ * asked for zero bytes after all)
+ */
+#define	KMEM_ZERO_SIZE_PTR ((void *)16)
+
+/*
+ * Redzone size encodings for kmem_alloc() / kmem_free().  We encode the
+ * allocation size, rather than storing it directly, so that kmem_free()
+ * can distinguish frees of the wrong size from redzone violations.
+ *
+ * A size of zero is never valid.
+ */
+#define	KMEM_SIZE_ENCODE(x)	(251 * (x) + 1)
+#define	KMEM_SIZE_DECODE(x)	((x) / 251)
+#define	KMEM_SIZE_VALID(x)	((x) % 251 == 1 && (x) != 1)
+
+
+#define	KMEM_ALIGN		8	/* min guaranteed alignment */
+#define	KMEM_ALIGN_SHIFT	3	/* log2(KMEM_ALIGN) */
+#define	KMEM_VOID_FRACTION	8	/* never waste more than 1/8 of slab */
+
+#define	KMEM_SLAB_IS_PARTIAL(sp)		\
+	((sp)->slab_refcnt > 0 && (sp)->slab_refcnt < (sp)->slab_chunks)
+#define	KMEM_SLAB_IS_ALL_USED(sp)		\
+	((sp)->slab_refcnt == (sp)->slab_chunks)
+
+/*
+ * The bufctl (buffer control) structure keeps some minimal information
+ * about each buffer: its address, its slab, and its current linkage,
+ * which is either on the slab's freelist (if the buffer is free), or
+ * on the cache's buf-to-bufctl hash table (if the buffer is allocated).
+ * In the case of non-hashed, or "raw", caches (the common case), only
+ * the freelist linkage is necessary: the buffer address is at a fixed
+ * offset from the bufctl address, and the slab is at the end of the page.
+ *
+ * NOTE: bc_next must be the first field; raw buffers have linkage only.
+ */
+typedef struct kmem_bufctl {
+	struct kmem_bufctl	*bc_next;	/* next bufctl struct */
+	void			*bc_addr;	/* address of buffer */
+	struct kmem_slab	*bc_slab;	/* controlling slab */
+} kmem_bufctl_t;
+
+/*
+ * The KMF_AUDIT version of the bufctl structure.  The beginning of this
+ * structure must be identical to the normal bufctl structure so that
+ * pointers are interchangeable.
+ */
+typedef struct kmem_bufctl_audit {
+	struct kmem_bufctl	*bc_next;	/* next bufctl struct */
+	void			*bc_addr;	/* address of buffer */
+	struct kmem_slab	*bc_slab;	/* controlling slab */
+	kmem_cache_t		*bc_cache;	/* controlling cache */
+	hrtime_t		bc_timestamp;	/* transaction time */
+	kthread_t		*bc_thread;	/* thread doing transaction */
+	struct kmem_bufctl	*bc_lastlog;	/* last log entry */
+	void			*bc_contents;	/* contents at last free */
+	int			bc_depth;	/* stack depth */
+	pc_t			bc_stack[KMEM_STACK_DEPTH];	/* pc stack */
+} kmem_bufctl_audit_t;
+
+/*
+ * A kmem_buftag structure is appended to each buffer whenever any of the
+ * KMF_BUFTAG flags (KMF_DEADBEEF, KMF_REDZONE, KMF_VERIFY) are set.
+ */
+typedef struct kmem_buftag {
+	uint64_t		bt_redzone;	/* 64-bit redzone pattern */
+	kmem_bufctl_t		*bt_bufctl;	/* bufctl */
+	intptr_t		bt_bxstat;	/* bufctl ^ (alloc/free) */
+} kmem_buftag_t;
+
+/*
+ * A variant of the kmem_buftag structure used for KMF_LITE caches.
+ * Previous callers are stored in reverse chronological order. (i.e. most
+ * recent first)
+ */
+typedef struct kmem_buftag_lite {
+	kmem_buftag_t		bt_buftag;	/* a normal buftag */
+	pc_t			bt_history[1];	/* zero or more callers */
+} kmem_buftag_lite_t;
+
+#define	KMEM_BUFTAG_LITE_SIZE(f)	\
+	(offsetof(kmem_buftag_lite_t, bt_history[f]))
+
+#define	KMEM_BUFTAG(cp, buf)		\
+	((kmem_buftag_t *)((char *)(buf) + (cp)->cache_buftag))
+
+#define	KMEM_BUFCTL(cp, buf)		\
+	((kmem_bufctl_t *)((char *)(buf) + (cp)->cache_bufctl))
+
+#define	KMEM_BUF(cp, bcp)		\
+	((void *)((char *)(bcp) - (cp)->cache_bufctl))
+
+#define	KMEM_SLAB(cp, buf)		\
+	((kmem_slab_t *)P2END((uintptr_t)(buf), (cp)->cache_slabsize) - 1)
+
+/*
+ * Test for using alternate memory at dump time.
+ */
+#define	KMEM_DUMP(cp)		((cp)->cache_flags & KMF_DUMPDIVERT)
+#define	KMEM_DUMPCC(ccp)	((ccp)->cc_flags & KMF_DUMPDIVERT)
+
+/*
+ * The "CPU" macro loads a cpu_t that refers to the cpu that the current
+ * thread is running on at the time the macro is executed.  A context switch
+ * may occur immediately after loading this data structure, leaving this
+ * thread pointing at the cpu_t for the previous cpu.  This is not a problem;
+ * we'd just end up checking the previous cpu's per-cpu cache, and then check
+ * the other layers of the kmem cache if need be.
+ *
+ * It's not even a problem if the old cpu gets DR'ed out during the context
+ * switch.  The cpu-remove DR operation bzero()s the cpu_t, but doesn't free
+ * it.  So the cpu_t's cpu_cache_offset would read as 0, causing us to use
+ * cpu 0's per-cpu cache.
+ *
+ * So, there is no need to disable kernel preemption while using the CPU macro
+ * below since if we have been context switched, there will not be any
+ * correctness problem, just a momentary use of a different per-cpu cache.
+ */
+
+#define	KMEM_CPU_CACHE(cp)			\
+	(&cp->cache_cpu[cpu_number()])
+
+#define	KMOM_MAGAZINE_VALID(cp, mp)	\
+	(((kmem_slab_t *)P2END((uintptr_t)(mp), PAGESIZE) - 1)->slab_cache == \
+	(cp)->cache_magtype->mt_cache)
+
+#define	KMEM_MAGAZINE_VALID(cp, mp)	\
+	(((kmem_slab_t *)P2END((uintptr_t)(mp), PAGESIZE) - 1)->slab_cache == \
+	(cp)->cache_magtype->mt_cache)
+
+#define	KMEM_SLAB_OFFSET(sp, buf)	\
+	((size_t)((uintptr_t)(buf) - (uintptr_t)((sp)->slab_base)))
+
+#define	KMEM_SLAB_MEMBER(sp, buf)	\
+	(KMEM_SLAB_OFFSET(sp, buf) < (sp)->slab_cache->cache_slabsize)
+
+#define	KMEM_BUFTAG_ALLOC	0xa110c8edUL
+#define	KMEM_BUFTAG_FREE	0xf4eef4eeUL
+
+/* slab_later_count thresholds */
+#define	KMEM_DISBELIEF		3
+
+/* slab_flags */
+#define	KMEM_SLAB_NOMOVE		0x1
+#define	KMEM_SLAB_MOVE_PENDING	0x2
+
+typedef struct kmem_slab {
+	struct kmem_cache *slab_cache;	/* controlling cache */
+	void		*slab_base;	/* base of allocated memory */
+	avl_node_t	slab_link;	/* slab linkage */
+	struct kmem_bufctl *slab_head;	/* first free buffer */
+	long		slab_refcnt;	/* outstanding allocations */
+	long		slab_chunks;	/* chunks (bufs) in this slab */
+	uint32_t	slab_stuck_offset; /* unmoved buffer offset */
+	uint16_t	slab_later_count; /* cf KMEM_CBRC_LATER */
+	uint16_t	slab_flags;	/* bits to mark the slab */
+	hrtime_t	slab_create_time; /* when was slab created? */
+} kmem_slab_t;
+
+#define	KMEM_HASH_INITIAL	64
+
+#define	KMEM_HASH(cp, buf)	\
+	((cp)->cache_hash_table +	\
+	(((uintptr_t)(buf) >> (cp)->cache_hash_shift) & (cp)->cache_hash_mask))
+
+#define	KMEM_CACHE_NAMELEN 31
+
+typedef struct kmem_magazine {
+    void	*mag_next;
+    void	*mag_round[1];		/* one or more rounds */
+} kmem_magazine_t;
+
+/*
+ * The magazine types for fast per-cpu allocation
+ */
+typedef struct kmem_magtype {
+	short	mt_magsize;		/* magazine size (number of rounds) */
+	int	mt_align;		/* magazine alignment */
+	size_t	mt_minbuf;		/* all smaller buffers qualify */
+	size_t	mt_maxbuf;		/* no larger buffers qualify */
+	kmem_cache_t *mt_cache;		/* magazine cache */
+} kmem_magtype_t;
+
+#define	KMEM_CPU_CACHE_SIZE	128	/* must be power of 2 */
+#define	KMEM_CPU_PAD	(KMEM_CPU_CACHE_SIZE - sizeof (kmutex_t) -	\
+	2 * sizeof (uint64_t) - 2 * sizeof (void *) - sizeof (int) - \
+	5 * sizeof (short))
+#define	KMEM_CACHE_SIZE(ncpus)	\
+	__builtin_offsetof(kmem_cache_t, cache_cpu[ncpus])
+
+	/* Offset from kmem_cache->cache_cpu for per cpu caches */
+#define	KMEM_CPU_CACHE_OFFSET(cpuid) \
+	__builtin_offsetof(kmem_cache_t, cache_cpu[cpuid]) - \
+	__builtin_offsetof(kmem_cache_t, cache_cpu)
+
+/*
+ * Per CPU cache data
+ */
+typedef struct kmem_cpu_cache {
+	kmutex_t	cc_lock;	/* protects this cpu's local cache */
+	uint64_t	cc_alloc;	/* allocations from this cpu */
+	uint64_t	cc_free;	/* frees to this cpu */
+	kmem_magazine_t	*cc_loaded;	/* the currently loaded magazine */
+	kmem_magazine_t	*cc_ploaded;	/* the previously loaded magazine */
+	int		cc_flags;	/* CPU-local copy of cache_flags */
+	short		cc_rounds;	/* number of objects in loaded mag */
+	short		cc_prounds;	/* number of objects in previous mag */
+	short		cc_magsize;	/* number of rounds in a full mag */
+	short		cc_dump_rounds;	/* dump time copy of cc_rounds */
+	short		cc_dump_prounds; /* dump time copy of cc_prounds */
+	char		cc_pad[KMEM_CPU_PAD]; /* for nice alignment */
+} kmem_cpu_cache_t;
+
+/*
+ * The magazine lists used in the depot.
+ */
+typedef struct kmem_maglist {
+	kmem_magazine_t	*ml_list;	/* magazine list */
+	long		ml_total;	/* number of magazines */
+	long		ml_min;		/* min since last update */
+	long		ml_reaplimit;	/* max reapable magazines */
+	uint64_t	ml_alloc;	/* allocations from this list */
+} kmem_maglist_t;
+
+typedef struct kmem_defrag {
+	/*
+	 * Statistics
+	 */
+	uint64_t	kmd_callbacks;	/* move callbacks */
+	uint64_t	kmd_yes;	/* KMEM_CBRC_YES responses */
+	uint64_t	kmd_no;		/* NO responses */
+	uint64_t	kmd_later;	/* LATER responses */
+	uint64_t	kmd_dont_need;	/* DONT_NEED responses */
+	uint64_t	kmd_dont_know;	/* DONT_KNOW responses */
+	uint64_t	kmd_slabs_freed; /* slabs freed by moves */
+	uint64_t	kmd_defrags;	/* kmem_cache_defrag() */
+	uint64_t	kmd_scans;	/* kmem_cache_scan() */
+
+	/*
+	 * Consolidator fields
+	 */
+	avl_tree_t	kmd_moves_pending;	/* buffer moves pending */
+	list_t		kmd_deadlist;		/* deferred slab frees */
+	size_t		kmd_deadcount;		/* # of slabs in kmd_deadlist */
+	uint8_t		kmd_reclaim_numer;	/* slab usage threshold */
+	uint8_t		kmd_pad1;		/* compiler padding */
+	uint16_t	kmd_consolidate;	/* triggers consolidator */
+	uint32_t	kmd_pad2;		/* compiler padding */
+	size_t		kmd_slabs_sought;	/* reclaimable slabs sought */
+	size_t		kmd_slabs_found;	/* reclaimable slabs found */
+	size_t		kmd_tries;		/* nth scan interval counter */
+	/*
+	 * Fields used to ASSERT that the client does not kmem_cache_free()
+	 * objects passed to the move callback.
+	 */
+	void		*kmd_from_buf;		/* object to move */
+	void		*kmd_to_buf;		/* move destination */
+	kthread_t	*kmd_thread;		/* thread calling move */
+} kmem_defrag_t;
+
+/*
+ * Cache callback function types
+ */
+typedef int (*constructor_fn_t)(void*, void*, int);
+typedef void (*destructor_fn_t)(void*, void*);
+typedef void (*reclaim_fn_t)(void*);
+
+/*
+ * Cache
+ */
+struct kmem_cache {
+
+/*
+ * Statistics
+ */
+	uint64_t cache_slab_create;	/* slab creates */
+	uint64_t cache_slab_destroy;	/* slab destroys */
+	uint64_t cache_slab_alloc;	/* slab layer allocations */
+	uint64_t cache_slab_free;	/* slab layer frees */
+	uint64_t cache_alloc_fail;	/* total failed allocations */
+	uint64_t cache_buftotal;	/* total buffers */
+	uint64_t cache_bufmax;		/* max buffers ever */
+	uint64_t cache_bufslab;		/* buffers free in slab layer */
+	uint64_t cache_reap;		/* cache reaps */
+	uint64_t cache_rescale;		/* hash table rescales */
+	uint64_t cache_lookup_depth;	/* hash lookup depth */
+	uint64_t cache_depot_contention; /* mutex contention count */
+	uint64_t cache_depot_contention_prev; /* previous snapshot */
+	uint64_t cache_alloc_count;	/* Number of allocations in cache */
+	/* successful calls with KM_NO_VBA flag set */
+	uint64_t no_vba_success;
+	uint64_t no_vba_fail;
+	/* number of times we set arc growth suppression time */
+	uint64_t arc_no_grow_set;
+	/* number of times spl_zio_is_suppressed returned true for this cache */
+	uint64_t arc_no_grow;
+
+	/*
+	 * Cache properties
+	 */
+	char		cache_name[KMEM_CACHE_NAMELEN + 1];
+	size_t		cache_bufsize;	/* object size */
+	size_t		cache_align;	/* object alignment */
+	int		(*cache_constructor)(void *, void *, int);
+	void		(*cache_destructor)(void *, void *);
+	void		(*cache_reclaim)(void *);
+	kmem_cbrc_t	(*cache_move)(void *, void *, size_t, void *);
+	void		*cache_private;	/* opaque arg to callbacks */
+	vmem_t		*cache_arena;	/* vmem source for slabs */
+	int		cache_cflags;	/* cache creation flags */
+	int		cache_flags;	/* various cache state info */
+	uint32_t	cache_mtbf;	/* induced alloc failure rate */
+	uint32_t	cache_pad1;	/* compiler padding */
+	kstat_t		*cache_kstat;	/* exported statistics */
+	list_node_t	cache_link;	/* cache linkage */
+
+	/*
+	 * Slab layer
+	 */
+	kmutex_t	cache_lock;		/* protects slab layer */
+
+	size_t		cache_chunksize;	/* buf + alignment [+ debug] */
+	size_t		cache_slabsize;		/* size of a slab */
+	size_t		cache_maxchunks;	/* max buffers per slab */
+	size_t		cache_bufctl;		/* buf-to-bufctl distance */
+	size_t		cache_buftag;		/* buf-to-buftag distance */
+	size_t		cache_verify;		/* bytes to verify */
+	size_t		cache_contents;		/* bytes of saved content */
+	size_t		cache_color;		/* next slab color */
+	size_t		cache_mincolor;		/* maximum slab color */
+	size_t		cache_maxcolor;		/* maximum slab color */
+	size_t		cache_hash_shift;	/* get to interesting bits */
+	size_t		cache_hash_mask;	/* hash table mask */
+	list_t		cache_complete_slabs;	/* completely allocated slabs */
+	size_t		cache_complete_slab_count;
+	avl_tree_t	cache_partial_slabs;	/* partial slab freelist */
+	size_t		cache_partial_binshift;	/* for AVL sort bins */
+	kmem_cache_t	*cache_bufctl_cache;	/* source of bufctls */
+	kmem_bufctl_t	**cache_hash_table;	/* hash table base */
+	kmem_defrag_t	*cache_defrag;		/* slab consolidator fields */
+
+	/*
+	 * Depot layer
+	 */
+	kmutex_t	cache_depot_lock;	/* protects depot */
+	kmem_magtype_t	*cache_magtype;		/* magazine type */
+	kmem_maglist_t	cache_full;		/* full magazines */
+	kmem_maglist_t	cache_empty;		/* empty magazines */
+	void		*cache_dumpfreelist;	/* heap during crash dump */
+	void		*cache_dumplog;		/* log entry during dump */
+
+	/*
+	 * Per CPU structures
+	 */
+	// XNU adjust to suit __builtin_offsetof
+	kmem_cpu_cache_t cache_cpu[1];		/* per-cpu data */
+
+};
+
+typedef struct kmem_cpu_log_header {
+	kmutex_t	clh_lock;
+	char		*clh_current;
+	size_t		clh_avail;
+	int		clh_chunk;
+	int		clh_hits;
+#if	defined(SPL_DEBUG_MUTEX)
+	char		clh_pad[128 - sizeof (kmutex_t) - sizeof (char *) -
+	    sizeof (size_t) - 2 * sizeof (int)];
+#else
+	char		clh_pad[64 - sizeof (kmutex_t) - sizeof (char *) -
+	    sizeof (size_t) - 2 * sizeof (int)];
+#endif
+} kmem_cpu_log_header_t;
+
+typedef struct kmem_log_header {
+	kmutex_t	lh_lock;
+	char		*lh_base;
+	int		*lh_free;
+	size_t		lh_chunksize;
+	int		lh_nchunks;
+	int		lh_head;
+	int		lh_tail;
+	int		lh_hits;
+	kmem_cpu_log_header_t lh_cpu[1];	/* ncpus actually allocated */
+} kmem_log_header_t;
+
+/* kmem_move kmm_flags */
+#define	KMM_DESPERATE	0x1
+#define	KMM_NOTIFY		0x2
+#define	KMM_DEBUG		0x4
+
+typedef struct kmem_move {
+	kmem_slab_t	*kmm_from_slab;
+	void		*kmm_from_buf;
+	void		*kmm_to_buf;
+	avl_node_t	kmm_entry;
+	int			kmm_flags;
+} kmem_move_t;
+
+/*
+ * In order to consolidate partial slabs, it must be possible for the cache to
+ * have partial slabs.
+ */
+#define	KMEM_IS_MOVABLE(cp) \
+	(((cp)->cache_chunksize * 2) <= (cp)->cache_slabsize)
+
+#endif
diff --git a/include/os/macos/spl/sys/kstat.h b/include/os/macos/spl/sys/kstat.h
new file mode 100644
index 0000000000..4463770e7a
--- /dev/null
+++ b/include/os/macos/spl/sys/kstat.h
@@ -0,0 +1,188 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SPL_KSTAT_H
+#define	_SPL_KSTAT_H
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/kmem.h>
+#include <sys/sysctl.h>
+#include <sys/mutex.h>
+
+#define	KSTAT_STRLEN		63
+
+/*
+ * For reference valid classes are:
+ * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
+ */
+
+#define	KSTAT_TYPE_RAW		0	/* can be anything; ks_ndata >= 1 */
+#define	KSTAT_TYPE_NAMED	1	/* name/value pair; ks_ndata >= 1 */
+#define	KSTAT_TYPE_INTR		2	/* interrupt stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_IO		3	/* I/O stats; ks_ndata == 1 */
+#define	KSTAT_TYPE_TIMER	4	/* event timer; ks_ndata >= 1 */
+#define	KSTAT_TYPE_TXG		5	/* txg sync; ks_ndata >= 1 */
+#define	KSTAT_NUM_TYPES		6
+
+#define	KSTAT_DATA_CHAR		0
+#define	KSTAT_DATA_INT32	1
+#define	KSTAT_DATA_UINT32	2
+#define	KSTAT_DATA_INT64	3
+#define	KSTAT_DATA_UINT64	4
+#define	KSTAT_DATA_LONG		5
+#define	KSTAT_DATA_ULONG	6
+#define	KSTAT_DATA_STRING	7
+#define	KSTAT_NUM_DATAS		8
+
+#define	KSTAT_INTR_HARD		0
+#define	KSTAT_INTR_SOFT		1
+#define	KSTAT_INTR_WATCHDOG	2
+#define	KSTAT_INTR_SPURIOUS	3
+#define	KSTAT_INTR_MULTSVC	4
+#define	KSTAT_NUM_INTRS		5
+
+#define	KSTAT_FLAG_VIRTUAL	0x01
+#define	KSTAT_FLAG_VAR_SIZE	0x02
+#define	KSTAT_FLAG_WRITABLE	0x04
+#define	KSTAT_FLAG_PERSISTENT	0x08
+#define	KSTAT_FLAG_DORMANT	0x10
+#define	KSTAT_FLAG_UNSUPPORTED (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_WRITABLE | \
+    KSTAT_FLAG_PERSISTENT | KSTAT_FLAG_DORMANT)
+#define	KSTAT_FLAG_INVALID	0x20
+
+#define	KS_MAGIC		0x9d9d9d9d
+
+/* Dynamic updates */
+#define	KSTAT_READ		0
+#define	KSTAT_WRITE		1
+
+struct kstat_s;
+
+typedef int kid_t;					/* unique kstat id */
+typedef int kstat_update_t(struct kstat_s *, int);	/* dynamic update cb */
+
+typedef struct kstat_s {
+	int	ks_magic;		/* magic value */
+	kid_t	ks_kid;			/* unique kstat ID */
+	hrtime_t ks_crtime;		/* creation time */
+	hrtime_t ks_snaptime;		/* last access time */
+	char	ks_module[KSTAT_STRLEN+1]; /* provider module name */
+	int	ks_instance;		/* provider module instance */
+	char	ks_name[KSTAT_STRLEN+1]; /* kstat name */
+	char	ks_class[KSTAT_STRLEN+1]; /* kstat class */
+	uchar_t	ks_type;		/* kstat data type */
+	uchar_t	ks_flags;		/* kstat flags */
+	void	*ks_data;		/* kstat type-specific data */
+	uint_t	ks_ndata;		/* # of type-specific data records */
+	size_t	ks_data_size;		/* size of kstat data section */
+	struct proc_dir_entry *ks_proc; /* proc linkage */
+	kstat_update_t *ks_update;	/* dynamic updates */
+	void	*ks_private;		/* private data */
+	kmutex_t *ks_lock;		/* kstat data lock */
+} kstat_t;
+
+typedef struct kstat_named_s {
+	char	name[KSTAT_STRLEN];	/* name of counter */
+	uchar_t	data_type;		/* data type */
+	union {
+		char		c[16];	/* 128-bit int */
+		int32_t		i32;	/* 32-bit signed int */
+		uint32_t	ui32;	/* 32-bit unsigned int */
+		int64_t		i64;	/* 64-bit signed int */
+		uint64_t	ui64;	/* 64-bit unsigned int */
+		long		l;	/* native signed long */
+		ulong_t		ul;	/* native unsigned long */
+		struct {
+			union {
+				char	*ptr;	/* NULL-term string */
+				char	__pad[8]; /* 64-bit padding */
+			} addr;
+			uint32_t	len;	/* # bytes for strlen + '\0' */
+		} string;
+	} value;
+} kstat_named_t;
+
+#define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
+#define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
+
+typedef struct kstat_intr {
+	uint_t intrs[KSTAT_NUM_INTRS];
+} kstat_intr_t;
+
+typedef struct kstat_io {
+	u_longlong_t	nread;		/* number of bytes read */
+	u_longlong_t	nwritten;	/* number of bytes written */
+	uint_t		reads;		/* number of read operations */
+	uint_t		writes;		/* number of write operations */
+	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
+	hrtime_t	wlentime;	/* cumulative wait len * time product */
+	hrtime_t	wlastupdate;	/* last time wait queue changed */
+	hrtime_t	rtime;		/* cumulative run (service) time */
+	hrtime_t	rlentime;	/* cumulative run length*time product */
+	hrtime_t	rlastupdate;	/* last time run queue changed */
+	uint_t		wcnt;		/* count of elements in wait state */
+	uint_t		rcnt;		/* count of elements in run state */
+} kstat_io_t;
+
+typedef struct kstat_timer {
+	char		name[KSTAT_STRLEN+1];	/* event name */
+	u_longlong_t	num_events;		/* number of events */
+	hrtime_t	elapsed_time;		/* cumulative elapsed time */
+	hrtime_t	min_time;		/* shortest event duration */
+	hrtime_t	max_time;		/* longest event duration */
+	hrtime_t	start_time;		/* previous event start time */
+	hrtime_t	stop_time;		/* previous event stop time */
+} kstat_timer_t;
+
+void spl_kstat_init(void);
+void spl_kstat_fini(void);
+
+extern void __kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void* (*addr)(kstat_t *ksp, loff_t index));
+
+extern kstat_t *__kstat_create(char *ks_module, int ks_instance,
+    char *ks_name, char *ks_class,
+    uchar_t ks_type, ulong_t ks_ndata,
+    uchar_t ks_flags);
+extern void __kstat_install(kstat_t *ksp);
+extern void __kstat_delete(kstat_t *ksp);
+
+#define	kstat_create(m, i, n, c, t, s, f) \
+    __kstat_create(m, i, n, c, t, s, f)
+#define	kstat_install(k)		__kstat_install(k)
+#define	kstat_delete(k)			__kstat_delete(k)
+
+extern void kstat_waitq_enter(kstat_io_t *);
+extern void kstat_waitq_exit(kstat_io_t *);
+extern void kstat_runq_enter(kstat_io_t *);
+extern void kstat_runq_exit(kstat_io_t *);
+extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
+
+#define	kstat_set_raw_ops(k, h, d, a) __kstat_set_raw_ops(k, h, d, a)
+
+#endif  /* _SPL_KSTAT_H */
diff --git a/include/os/macos/spl/sys/list.h b/include/os/macos/spl/sys/list.h
new file mode 100644
index 0000000000..c9a72a53a6
--- /dev/null
+++ b/include/os/macos/spl/sys/list.h
@@ -0,0 +1,145 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _SPL_LIST_H
+#define	_SPL_LIST_H
+
+#include <sys/debug.h>
+#include <sys/types.h>
+
+/*
+ * NOTE: I have implemented the Solaris list API in terms of the native
+ * linux API.  This has certain advantages in terms of leveraging the linux
+ * list debugging infrastructure, but it also means that the internals of a
+ * list differ slightly than on Solaris.  This is not a problem as long as
+ * all callers stick to the published API.  The two major differences are:
+ *
+ * 1) A list_node_t is mapped to a linux list_head struct which changes
+ *    the name of the list_next/list_prev pointers to next/prev respectively.
+ *
+ * 2) A list_node_t which is not attached to a list on Solaris is denoted
+ *    by having its list_next/list_prev pointers set to NULL.  Under linux
+ *    the next/prev pointers are set to LIST_POISON1 and LIST_POISON2
+ *    respectively.  At this moment this only impacts the implementation
+ *    of the list_link_init() and list_link_active() functions.
+ */
+
+typedef struct list_node {
+    struct list_node *list_next;
+    struct list_node *list_prev;
+} list_node_t;
+
+
+
+typedef struct list {
+	size_t list_size;
+	size_t list_offset;
+	list_node_t list_head;
+} list_t;
+
+void list_create(list_t *, size_t, size_t);
+void list_destroy(list_t *);
+
+void list_insert_after(list_t *, void *, void *);
+void list_insert_before(list_t *, void *, void *);
+void list_insert_head(list_t *, void *);
+void list_insert_tail(list_t *, void *);
+void list_remove(list_t *, void *);
+void list_move_tail(list_t *, list_t *);
+
+void *list_head(list_t *);
+void *list_tail(list_t *);
+void *list_next(list_t *, void *);
+void *list_prev(list_t *, void *);
+
+int list_link_active(list_node_t *);
+int list_is_empty(list_t *);
+
+#define	LIST_POISON1 NULL
+#define	LIST_POISON2 NULL
+
+#define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
+#define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
+#define	list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
+
+
+static inline void
+list_link_init(list_node_t *node)
+{
+	node->list_next = LIST_POISON1;
+	node->list_prev = LIST_POISON2;
+}
+
+static inline void
+__list_del(list_node_t *prev, list_node_t *next)
+{
+	next->list_prev = prev;
+	prev->list_next = next;
+}
+
+static inline void list_del(list_node_t *entry)
+{
+	__list_del(entry->list_prev, entry->list_next);
+	entry->list_next = LIST_POISON1;
+	entry->list_prev = LIST_POISON2;
+}
+
+static inline void *
+list_remove_head(list_t *list)
+{
+	list_node_t *head = list->list_head.list_next;
+	if (head == &list->list_head)
+		return (NULL);
+
+	list_del(head);
+	return (list_object(list, head));
+}
+
+static inline void *
+list_remove_tail(list_t *list)
+{
+	list_node_t *tail = list->list_head.list_prev;
+	if (tail == &list->list_head)
+		return (NULL);
+
+	list_del(tail);
+	return (list_object(list, tail));
+}
+
+static inline void
+list_link_replace(list_node_t *old_node, list_node_t *new_node)
+{
+	ASSERT(list_link_active(old_node));
+	ASSERT(!list_link_active(new_node));
+
+	new_node->list_next = old_node->list_next;
+	new_node->list_prev = old_node->list_prev;
+	old_node->list_prev->list_next = new_node;
+	old_node->list_next->list_prev = new_node;
+	list_link_init(old_node);
+}
+
+#endif /* SPL_LIST_H */
diff --git a/include/os/macos/spl/sys/mod_os.h b/include/os/macos/spl/sys/mod_os.h
new file mode 100644
index 0000000000..f9c1d80ee1
--- /dev/null
+++ b/include/os/macos/spl/sys/mod_os.h
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _SPL_MOD_H
+#define	_SPL_MOD_H
+
+#define	MODULE_INIT(s)
+#define	MODULE_AUTHOR(s)
+#define	MODULE_LICENSE(s)
+#define	MODULE_VERSION(s)
+#define	ZFS_MODULE_DESCRIPTION(s)
+#define	ZFS_MODULE_AUTHOR(s)
+#define	ZFS_MODULE_LICENSE(s)
+#define	ZFS_MODULE_VERSION(s)
+
+#define	ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, \
+    getfunc, perm, desc)
+
+#define	__init __attribute__((unused))
+#define	__exit __attribute__((unused))
+
+/*
+ * The init/fini functions need to be called, but they are all static
+ */
+#define	module_init(fn)		\
+	int wrap_ ## fn(void)	\
+	{			\
+		return (fn());	\
+	}
+
+#define	module_exit(fn)		\
+	void wrap_ ## fn(void)	\
+	{			\
+		fn();		\
+	}
+
+#define	ZFS_MODULE_PARAM_ARGS	void
+
+#define	ZFS_MODULE_PARAM(A, B, C, D, E, F)
+#define	module_param_call(a, b, c, d, e)
+#define	module_param_named(a, b, c, d)
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+kern_return_t spl_start(kmod_info_t *ki, void *d);
+kern_return_t spl_stop(kmod_info_t *ki, void *d);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* SPL_MOD_H */
diff --git a/include/os/macos/spl/sys/mutex.h b/include/os/macos/spl/sys/mutex.h
new file mode 100644
index 0000000000..11a4a89da6
--- /dev/null
+++ b/include/os/macos/spl/sys/mutex.h
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * OSX mutex functions
+ *
+ * Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef OSX_MUTEX_H
+#define	OSX_MUTEX_H
+
+#include <libkern/locks.h>
+
+#include <libkern/OSAtomic.h>
+#include <kern/locks.h>
+#include <kern/thread.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+	MUTEX_ADAPTIVE = 0,	/* spin if owner is running, otherwise block */
+	MUTEX_SPIN = 1,		/* block interrupts and spin */
+	MUTEX_DRIVER = 4,	/* driver (DDI) mutex */
+	MUTEX_DEFAULT = 6	/* kernel default mutex */
+} kmutex_type_t;
+
+#define	MUTEX_NOLOCKDEP	0
+
+/*
+ * Alas lck_mtx_t; is opaque and not available at compile time, and we
+ * really want to embed them. Luckily, mutex size has not changed in
+ * many versions of OSX. We should possibly to a startup check of
+ * the size though.
+ */
+typedef struct {
+	uint32_t opaque[4];
+} wrapper_mutex_t;
+
+/*
+ * To enable watchdog to keep an eye on mutex being held for too long
+ * define this debug variable.
+ */
+
+#define	SPL_DEBUG_MUTEX
+
+#ifdef SPL_DEBUG_MUTEX
+#define	SPL_MUTEX_WATCHDOG_SLEEP   10 /* How long to sleep between checking */
+#define	SPL_MUTEX_WATCHDOG_TIMEOUT 60 /* When is a mutex held too long? */
+#endif
+
+/*
+ * Solaris kmutex defined.
+ *
+ * and is embedded into ZFS structures (see dbuf) so we need to match the
+ * size carefully. It appears to be 32 bytes. Or rather, it needs to be
+ * aligned.
+ */
+
+typedef struct kmutex {
+	void		*m_owner;
+	wrapper_mutex_t	m_lock;
+
+#ifdef SPL_DEBUG_MUTEX
+	void		*leak;
+	uint64_t	m_initialised;
+#define	MUTEX_INIT	0x123456789abcdef0ULL
+#define	MUTEX_DESTROYED	0xaabbccddaabbccddULL
+#endif
+
+} kmutex_t;
+
+#include <sys/proc.h>
+
+#define	MUTEX_HELD(x)		(mutex_owned(x))
+#define	MUTEX_NOT_HELD(x)	(!mutex_owned(x))
+
+/*
+ * On OS X, CoreStorage provides these symbols, so we have to redefine them,
+ * preferably without having to modify SPL users.
+ */
+#ifdef SPL_DEBUG_MUTEX
+
+#define	mutex_init(A, B, C, D) \
+    spl_mutex_init(A, B, C, D, __FILE__, __FUNCTION__, __LINE__)
+void spl_mutex_init(kmutex_t *mp, char *name, kmutex_type_t type,
+    void *ibc, const char *f, const char *fn, int l);
+
+#else
+
+#define	mutex_init spl_mutex_init
+void spl_mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc);
+
+#endif
+
+#ifdef SPL_DEBUG_MUTEX
+#define	mutex_enter(X) spl_mutex_enter((X), __FILE__, __LINE__)
+void spl_mutex_enter(kmutex_t *mp, char *file, int line);
+#else
+#define	mutex_enter spl_mutex_enter
+void spl_mutex_enter(kmutex_t *mp);
+#endif
+
+#define	mutex_enter_nested(A, B)	mutex_enter(A)
+
+#define	mutex_destroy spl_mutex_destroy
+#define	mutex_exit spl_mutex_exit
+#define	mutex_tryenter spl_mutex_tryenter
+#define	mutex_owned spl_mutex_owned
+#define	mutex_owner spl_mutex_owner
+
+void spl_mutex_destroy(kmutex_t *mp);
+void spl_mutex_exit(kmutex_t *mp);
+int  spl_mutex_tryenter(kmutex_t *mp);
+int  spl_mutex_owned(kmutex_t *mp);
+
+struct thread *spl_mutex_owner(kmutex_t *mp);
+
+int  spl_mutex_subsystem_init(void);
+void spl_mutex_subsystem_fini(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/os/macos/spl/sys/param.h b/include/os/macos/spl/sys/param.h
new file mode 100644
index 0000000000..2819402be9
--- /dev/null
+++ b/include/os/macos/spl/sys/param.h
@@ -0,0 +1,40 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_PARAM_H
+#define	_SPL_PARAM_H
+
+#include_next <sys/param.h>
+#include <mach/vm_param.h>
+
+/* Pages to bytes and back */
+#define	ptob(pages)			(pages << PAGE_SHIFT)
+#define	btop(bytes)			(bytes >> PAGE_SHIFT)
+
+#define	MAXUID				UINT32_MAX
+
+#endif /* SPL_PARAM_H */
diff --git a/include/os/macos/spl/sys/policy.h b/include/os/macos/spl/sys/policy.h
new file mode 100644
index 0000000000..b8953209db
--- /dev/null
+++ b/include/os/macos/spl/sys/policy.h
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_POLICY_H
+#define	_SPL_POLICY_H
+
+#ifdef _KERNEL
+
+#include <sys/vnode.h>
+#include <sys/cred.h>
+
+struct vattr;
+
+int secpolicy_fs_unmount(cred_t *, struct mount *);
+int secpolicy_nfs(const cred_t *);
+int secpolicy_sys_config(const cred_t *, boolean_t);
+int secpolicy_zfs(const cred_t *);
+int secpolicy_zinject(const cred_t *);
+
+/*
+ * This function to be called from xxfs_setattr().
+ * Must be called with the node's attributes read-write locked.
+ *
+ *		cred_t *		- acting credentials
+ *		struct vnode *		- vnode we're operating on
+ *		struct vattr *va	- new attributes, va_mask may be
+ *					  changed on return from a call
+ *		struct vattr *oldva	- old attributes, need include owner
+ *					  and mode only
+ *		int flags		- setattr flags
+ *		int iaccess(void *node, int mode, cred_t *cr)
+ *					- non-locking internal access function
+ *						mode be checked
+ *						w/ VREAD|VWRITE|VEXEC, not fs
+ *						internal mode encoding.
+ *
+ *		void *node		- internal node (inode, tmpnode) to
+ *					  pass as arg to iaccess
+ */
+int secpolicy_vnode_setattr(cred_t *, struct vnode *, vattr_t *,
+    const vattr_t *, int, int (void *, int, cred_t *), void *);
+
+int secpolicy_vnode_stky_modify(const cred_t *);
+int	secpolicy_setid_setsticky_clear(struct vnode *vp, vattr_t *vap,
+    const vattr_t *ovap, cred_t *cr);
+
+int secpolicy_vnode_remove(struct vnode *, const cred_t *);
+int secpolicy_vnode_create_gid(const cred_t *);
+int secpolicy_vnode_setids_setgids(struct vnode *, const cred_t *, gid_t);
+int secpolicy_vnode_setdac(struct vnode *, const cred_t *, uid_t);
+int secpolicy_vnode_chown(struct vnode *, const cred_t *, uid_t);
+int secpolicy_vnode_setid_retain(const cred_t *, boolean_t);
+int secpolicy_xvattr(vattr_t *, uid_t, const cred_t *, mode_t);
+int secpolicy_setid_clear(vattr_t *, const cred_t *);
+int secpolicy_basic_link(const cred_t *);
+int secpolicy_fs_mount_clearopts(const cred_t *, struct mount *);
+int secpolicy_fs_mount(const cred_t *, struct vnode *, struct mount *);
+
+#endif	/* _KERNEL */
+
+#endif /* SPL_POLICY_H */
diff --git a/include/os/macos/spl/sys/priv.h b/include/os/macos/spl/sys/priv.h
new file mode 100644
index 0000000000..a8da8f101e
--- /dev/null
+++ b/include/os/macos/spl/sys/priv.h
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2006 nCircle Network Security, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson for the TrustedBSD
+ * Project under contract to nCircle Network Security, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY,
+ * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Privilege checking interface for BSD kernel.
+ */
+#ifndef _SPL_PRIV_H
+#define	_SPL_PRIV_H
+
+/*
+ * Privilege list, sorted loosely by kernel subsystem.
+ *
+ * Think carefully before adding or reusing one of these privileges -- are
+ * there existing instances referring to the same privilege?  Third party
+ * vendors may request the assignment of privileges to be used in loadable
+ * modules.  Particular numeric privilege assignments are part of the
+ * loadable kernel module ABI, and should not be changed across minor
+ * releases.
+ *
+ * When adding a new privilege, remember to determine if it's appropriate for
+ * use in jail, and update the privilege switch in kern_jail.c as necessary.
+ */
+
+/*
+ * Track beginning of privilege list.
+ */
+#define	_PRIV_LOWEST	1
+
+/*
+ * The remaining privileges typically correspond to one or a small
+ * number of specific privilege checks, and have (relatively) precise
+ * meanings.  They are loosely sorted into a set of base system
+ * privileges, such as the ability to reboot, and then loosely by
+ * subsystem, indicated by a subsystem name.
+ */
+#define	_PRIV_ROOT		1	/* Removed. */
+#define	PRIV_ACCT		2	/* Manage process accounting. */
+#define	PRIV_MAXFILES		3	/* Exceed system open files limit. */
+#define	PRIV_MAXPROC		4	/* Exceed system processes limit. */
+#define	PRIV_KTRACE		5	/* Set/clear KTRFAC_ROOT on ktrace. */
+#define	PRIV_SETDUMPER		6	/* Configure dump device. */
+#define	PRIV_REBOOT		8	/* Can reboot system. */
+#define	PRIV_SWAPON		9	/* Can swapon(). */
+#define	PRIV_SWAPOFF		10	/* Can swapoff(). */
+#define	PRIV_MSGBUF		11	/* Can read kernel message buffer. */
+#define	PRIV_IO			12	/* Can perform low-level I/O. */
+#define	PRIV_KEYBOARD		13	/* Reprogram keyboard. */
+#define	PRIV_DRIVER		14	/* Low-level driver privilege. */
+#define	PRIV_ADJTIME		15	/* Set time adjustment. */
+#define	PRIV_NTP_ADJTIME	16	/* Set NTP time adjustment. */
+#define	PRIV_CLOCK_SETTIME	17	/* Can call clock_settime. */
+#define	PRIV_SETTIMEOFDAY	18	/* Can call settimeofday. */
+#define	_PRIV_SETHOSTID		19	/* Removed. */
+#define	_PRIV_SETDOMAINNAME	20	/* Removed. */
+
+/*
+ * Audit subsystem privileges.
+ */
+#define	PRIV_AUDIT_CONTROL	40	/* Can configure audit. */
+#define	PRIV_AUDIT_FAILSTOP	41	/* Can run during audit fail stop. */
+#define	PRIV_AUDIT_GETAUDIT	42	/* Can get proc audit properties. */
+#define	PRIV_AUDIT_SETAUDIT	43	/* Can set proc audit properties. */
+#define	PRIV_AUDIT_SUBMIT	44	/* Can submit an audit record. */
+
+/*
+ * Credential management privileges.
+ */
+#define	PRIV_CRED_SETUID	50	/* setuid. */
+#define	PRIV_CRED_SETEUID	51	/* seteuid to !ruid and !svuid. */
+#define	PRIV_CRED_SETGID	52	/* setgid. */
+#define	PRIV_CRED_SETEGID	53	/* setgid to !rgid and !svgid. */
+#define	PRIV_CRED_SETGROUPS	54	/* Set process additional groups. */
+#define	PRIV_CRED_SETREUID	55	/* setreuid. */
+#define	PRIV_CRED_SETREGID	56	/* setregid. */
+#define	PRIV_CRED_SETRESUID	57	/* setresuid. */
+#define	PRIV_CRED_SETRESGID	58	/* setresgid. */
+#define	PRIV_SEEOTHERGIDS	59	/* Exempt bsd.seeothergids. */
+#define	PRIV_SEEOTHERUIDS	60	/* Exempt bsd.seeotheruids. */
+
+/*
+ * Debugging privileges.
+ */
+#define	PRIV_DEBUG_DIFFCRED	80	/* Exempt debugging other users. */
+#define	PRIV_DEBUG_SUGID	81	/* Exempt debugging setuid proc. */
+#define	PRIV_DEBUG_UNPRIV	82	/* Exempt unprivileged debug limit. */
+#define	PRIV_DEBUG_DENIED	83	/* Exempt P2_NOTRACE. */
+
+/*
+ * Dtrace privileges.
+ */
+#define	PRIV_DTRACE_KERNEL	90	/* Allow use of DTrace on the kernel. */
+#define	PRIV_DTRACE_PROC	91	/* Allow attaching DTrace to process. */
+#define	PRIV_DTRACE_USER	92	/* Process may submit DTrace events. */
+
+/*
+ * Firmware privilegs.
+ */
+#define	PRIV_FIRMWARE_LOAD	100	/* Can load firmware. */
+
+/*
+ * Jail privileges.
+ */
+#define	PRIV_JAIL_ATTACH	110	/* Attach to a jail. */
+#define	PRIV_JAIL_SET		111	/* Set jail parameters. */
+#define	PRIV_JAIL_REMOVE	112	/* Remove a jail. */
+
+/*
+ * Kernel environment priveleges.
+ */
+#define	PRIV_KENV_SET		120	/* Set kernel env. variables. */
+#define	PRIV_KENV_UNSET		121	/* Unset kernel env. variables. */
+
+/*
+ * Loadable kernel module privileges.
+ */
+#define	PRIV_KLD_LOAD		130	/* Load a kernel module. */
+#define	PRIV_KLD_UNLOAD		131	/* Unload a kernel module. */
+
+/*
+ * Privileges associated with the MAC Framework and specific MAC policy
+ * modules.
+ */
+#define	PRIV_MAC_PARTITION	140	/* Privilege in mac_partition policy. */
+#define	PRIV_MAC_PRIVS		141	/* Privilege in the mac_privs policy. */
+
+/*
+ * Process-related privileges.
+ */
+#define	PRIV_PROC_LIMIT		160	/* Exceed user process limit. */
+#define	PRIV_PROC_SETLOGIN	161	/* Can call setlogin. */
+#define	PRIV_PROC_SETRLIMIT	162	/* Can raise resources limits. */
+#define	PRIV_PROC_SETLOGINCLASS	163	/* Can call setloginclass(2). */
+
+/*
+ * System V IPC privileges.
+ */
+#define	PRIV_IPC_READ		170	/* Can override IPC read perm. */
+#define	PRIV_IPC_WRITE		171	/* Can override IPC write perm. */
+#define	PRIV_IPC_ADMIN		172	/* Can override IPC owner-only perm. */
+#define	PRIV_IPC_MSGSIZE	173	/* Exempt IPC message queue limit. */
+
+/*
+ * POSIX message queue privileges.
+ */
+#define	PRIV_MQ_ADMIN		180	/* Can override msgq owner-only perm. */
+
+/*
+ * Performance monitoring counter privileges.
+ */
+#define	PRIV_PMC_MANAGE		190	/* Can administer PMC. */
+#define	PRIV_PMC_SYSTEM		191	/* Can allocate a system-wide PMC. */
+
+/*
+ * Scheduling privileges.
+ */
+#define	PRIV_SCHED_DIFFCRED	200	/* Exempt scheduling other users. */
+#define	PRIV_SCHED_SETPRIORITY	201	/* Can set lower nice value for proc. */
+#define	PRIV_SCHED_RTPRIO	202	/* Can set real time scheduling. */
+#define	PRIV_SCHED_SETPOLICY	203	/* Can set scheduler policy. */
+#define	PRIV_SCHED_SET		204	/* Can set thread scheduler. */
+#define	PRIV_SCHED_SETPARAM	205	/* Can set thread scheduler params. */
+#define	PRIV_SCHED_CPUSET	206	/* Can manipulate cpusets. */
+#define	PRIV_SCHED_CPUSET_INTR	207	/* Can adjust IRQ to CPU binding. */
+
+/*
+ * POSIX semaphore privileges.
+ */
+#define	PRIV_SEM_WRITE		220	/* Can override sem write perm. */
+
+/*
+ * Signal privileges.
+ */
+#define	PRIV_SIGNAL_DIFFCRED	230	/* Exempt signalling other users. */
+#define	PRIV_SIGNAL_SUGID	231	/* Non-conserv signal setuid proc. */
+
+/*
+ * Sysctl privileges.
+ */
+#define	PRIV_SYSCTL_DEBUG	240	/* Can invoke sysctl.debug. */
+#define	PRIV_SYSCTL_WRITE	241	/* Can write sysctls. */
+#define	PRIV_SYSCTL_WRITEJAIL	242	/* Can write sysctls, jail permitted. */
+
+/*
+ * TTY privileges.
+ */
+#define	PRIV_TTY_CONSOLE	250	/* Set console to tty. */
+#define	PRIV_TTY_DRAINWAIT	251	/* Set tty drain wait time. */
+#define	PRIV_TTY_DTRWAIT	252	/* Set DTR wait on tty. */
+#define	PRIV_TTY_EXCLUSIVE	253	/* Override tty exclusive flag. */
+#define	_PRIV_TTY_PRISON	254	/* Removed. */
+#define	PRIV_TTY_STI		255	/* Simulate input on another tty. */
+#define	PRIV_TTY_SETA		256	/* Set tty termios structure. */
+
+/*
+ * UFS-specific privileges.
+ */
+#define	PRIV_UFS_EXTATTRCTL	270	/* Can configure EAs on UFS1. */
+#define	PRIV_UFS_QUOTAOFF	271	/* quotaoff(). */
+#define	PRIV_UFS_QUOTAON	272	/* quotaon(). */
+#define	PRIV_UFS_SETUSE		273	/* setuse(). */
+
+/*
+ * ZFS-specific privileges.
+ */
+#define	PRIV_ZFS_POOL_CONFIG	280	/* Can configure ZFS pools. */
+
+/* Can inject faults in the ZFS fault injection framework. */
+#define	PRIV_ZFS_INJECT		281
+
+/* Can attach/detach ZFS file systems to/from jails. */
+#define	PRIV_ZFS_JAIL		282
+
+/*
+ * NFS-specific privileges.
+ */
+#define	PRIV_NFS_DAEMON		290	/* Can become the NFS daemon. */
+#define	PRIV_NFS_LOCKD		291	/* Can become NFS lock daemon. */
+
+/*
+ * VFS privileges.
+ */
+#define	PRIV_VFS_READ		310	/* Override vnode DAC read perm. */
+#define	PRIV_VFS_WRITE		311	/* Override vnode DAC write perm. */
+#define	PRIV_VFS_ADMIN		312	/* Override vnode DAC admin perm. */
+#define	PRIV_VFS_EXEC		313	/* Override vnode DAC exec perm. */
+#define	PRIV_VFS_LOOKUP		314	/* Override vnode DAC lookup perm. */
+#define	PRIV_VFS_BLOCKRESERVE	315	/* Can use free block reserve. */
+#define	PRIV_VFS_CHFLAGS_DEV	316	/* Can chflags() a device node. */
+#define	PRIV_VFS_CHOWN		317	/* Can set user; group to non-member. */
+#define	PRIV_VFS_CHROOT		318	/* chroot(). */
+#define	PRIV_VFS_RETAINSUGID	319	/* Can retain sugid bits on change. */
+#define	PRIV_VFS_EXCEEDQUOTA	320	/* Exempt from quota restrictions. */
+#define	PRIV_VFS_EXTATTR_SYSTEM	321	/* Operate on system EA namespace. */
+#define	PRIV_VFS_FCHROOT	322	/* fchroot(). */
+#define	PRIV_VFS_FHOPEN		323	/* Can fhopen(). */
+#define	PRIV_VFS_FHSTAT		324	/* Can fhstat(). */
+#define	PRIV_VFS_FHSTATFS	325	/* Can fhstatfs(). */
+#define	PRIV_VFS_GENERATION	326	/* stat() returns generation number. */
+#define	PRIV_VFS_GETFH		327	/* Can retrieve file handles. */
+#define	PRIV_VFS_GETQUOTA	328	/* getquota(). */
+#define	PRIV_VFS_LINK		329	/* bsd.hardlink_check_uid */
+#define	PRIV_VFS_MKNOD_BAD	330	/* Can mknod() to mark bad inodes. */
+#define	PRIV_VFS_MKNOD_DEV	331	/* Can mknod() to create dev nodes. */
+#define	PRIV_VFS_MKNOD_WHT	332	/* Can mknod() to create whiteout. */
+#define	PRIV_VFS_MOUNT		333	/* Can mount(). */
+#define	PRIV_VFS_MOUNT_OWNER	334	/* Can manage other users' fsystems. */
+#define	PRIV_VFS_MOUNT_EXPORTED	335	/* Can set MNT_EXPORTED on mount. */
+#define	PRIV_VFS_MOUNT_PERM	336	/* Override dev node perms at mount. */
+#define	PRIV_VFS_MOUNT_SUIDDIR	337	/* Can set MNT_SUIDDIR on mount. */
+#define	PRIV_VFS_MOUNT_NONUSER	338	/* Can perform a non-user mount. */
+#define	PRIV_VFS_SETGID		339	/* Can setgid if not in group. */
+#define	PRIV_VFS_SETQUOTA	340	/* setquota(). */
+#define	PRIV_VFS_STICKYFILE	341	/* Can set sticky bit on file. */
+#define	PRIV_VFS_SYSFLAGS	342	/* Can modify system flags. */
+#define	PRIV_VFS_UNMOUNT	343	/* Can unmount(). */
+#define	PRIV_VFS_STAT		344	/* Override vnode MAC stat perm. */
+
+/*
+ * Virtual memory privileges.
+ */
+#define	PRIV_VM_MADV_PROTECT	360	/* Can set MADV_PROTECT. */
+#define	PRIV_VM_MLOCK		361	/* Can mlock(), mlockall(). */
+#define	PRIV_VM_MUNLOCK		362	/* Can munlock(), munlockall(). */
+/* Can override the global swap reservation limits. */
+#define	PRIV_VM_SWAP_NOQUOTA	363
+/* Can override the per-uid swap reservation limits. */
+#define	PRIV_VM_SWAP_NORLIMIT	364
+
+/*
+ * Device file system privileges.
+ */
+#define	PRIV_DEVFS_RULE		370	/* Can manage devfs rules. */
+#define	PRIV_DEVFS_SYMLINK	371	/* Can create symlinks in devfs. */
+
+/*
+ * Random number generator privileges.
+ */
+#define	PRIV_RANDOM_RESEED	380	/* Closing /dev/random reseeds. */
+
+/*
+ * Network stack privileges.
+ */
+#define	PRIV_NET_BRIDGE		390	/* Administer bridge. */
+#define	PRIV_NET_GRE		391	/* Administer GRE. */
+#define	_PRIV_NET_PPP		392	/* Removed. */
+#define	_PRIV_NET_SLIP		393	/* Removed. */
+#define	PRIV_NET_BPF		394	/* Monitor BPF. */
+#define	PRIV_NET_RAW		395	/* Open raw socket. */
+#define	PRIV_NET_ROUTE		396	/* Administer routing. */
+#define	PRIV_NET_TAP		397	/* Can open tap device. */
+#define	PRIV_NET_SETIFMTU	398	/* Set interface MTU. */
+#define	PRIV_NET_SETIFFLAGS	399	/* Set interface flags. */
+#define	PRIV_NET_SETIFCAP	400	/* Set interface capabilities. */
+#define	PRIV_NET_SETIFNAME	401	/* Set interface name. */
+#define	PRIV_NET_SETIFMETRIC	402	/* Set interface metrics. */
+#define	PRIV_NET_SETIFPHYS	403	/* Set interface physical layer prop. */
+#define	PRIV_NET_SETIFMAC	404	/* Set interface MAC label. */
+#define	PRIV_NET_ADDMULTI	405	/* Add multicast addr. to ifnet. */
+#define	PRIV_NET_DELMULTI	406	/* Delete multicast addr. from ifnet. */
+#define	PRIV_NET_HWIOCTL	407	/* Issue hardware ioctl on ifnet. */
+#define	PRIV_NET_SETLLADDR	408	/* Set interface link-level address. */
+#define	PRIV_NET_ADDIFGROUP	409	/* Add new interface group. */
+#define	PRIV_NET_DELIFGROUP	410	/* Delete interface group. */
+#define	PRIV_NET_IFCREATE	411	/* Create cloned interface. */
+#define	PRIV_NET_IFDESTROY	412	/* Destroy cloned interface. */
+#define	PRIV_NET_ADDIFADDR	413	/* Add protocol addr to interface. */
+#define	PRIV_NET_DELIFADDR	414	/* Delete protocol addr on interface. */
+#define	PRIV_NET_LAGG		415	/* Administer lagg interface. */
+#define	PRIV_NET_GIF		416	/* Administer gif interface. */
+#define	PRIV_NET_SETIFVNET	417	/* Move interface to vnet. */
+#define	PRIV_NET_SETIFDESCR	418	/* Set interface description. */
+#define	PRIV_NET_SETIFFIB	419	/* Set interface fib. */
+#define	PRIV_NET_VXLAN		420	/* Administer vxlan. */
+
+/*
+ * 802.11-related privileges.
+ */
+#define	PRIV_NET80211_GETKEY	440	/* Query 802.11 keys. */
+#define	PRIV_NET80211_MANAGE	441	/* Administer 802.11. */
+
+/*
+ * Placeholder for AppleTalk privileges, not supported anymore.
+ */
+#define	_PRIV_NETATALK_RESERVEDPORT	450	/* Bind low port number. */
+
+/*
+ * ATM privileges.
+ */
+#define	PRIV_NETATM_CFG		460
+#define	PRIV_NETATM_ADD		461
+#define	PRIV_NETATM_DEL		462
+#define	PRIV_NETATM_SET		463
+
+/*
+ * Bluetooth privileges.
+ */
+#define	PRIV_NETBLUETOOTH_RAW	470	/* Open raw bluetooth socket. */
+
+/*
+ * Netgraph and netgraph module privileges.
+ */
+#define	PRIV_NETGRAPH_CONTROL	480	/* Open netgraph control socket. */
+#define	PRIV_NETGRAPH_TTY	481	/* Configure tty for netgraph. */
+
+/*
+ * IPv4 and IPv6 privileges.
+ */
+#define	PRIV_NETINET_RESERVEDPORT	490	/* Bind low port number. */
+#define	PRIV_NETINET_IPFW	491	/* Administer IPFW firewall. */
+#define	PRIV_NETINET_DIVERT	492	/* Open IP divert socket. */
+#define	PRIV_NETINET_PF		493	/* Administer pf firewall. */
+#define	PRIV_NETINET_DUMMYNET	494	/* Administer DUMMYNET. */
+#define	PRIV_NETINET_CARP	495	/* Administer CARP. */
+#define	PRIV_NETINET_MROUTE	496	/* Administer multicast routing. */
+#define	PRIV_NETINET_RAW	497	/* Open netinet raw socket. */
+#define	PRIV_NETINET_GETCRED	498	/* Query netinet pcb credentials. */
+#define	PRIV_NETINET_ADDRCTRL6	499	/* Administer IPv6 address scopes. */
+#define	PRIV_NETINET_ND6	500	/* Administer IPv6 neighbor disc. */
+#define	PRIV_NETINET_SCOPE6	501	/* Administer IPv6 address scopes. */
+#define	PRIV_NETINET_ALIFETIME6	502	/* Administer IPv6 address lifetimes. */
+#define	PRIV_NETINET_IPSEC	503	/* Administer IPSEC. */
+#define	PRIV_NETINET_REUSEPORT	504	/* Allow [rapid] port/address reuse. */
+#define	PRIV_NETINET_SETHDROPTS	505	/* Set certain IPv4/6 header options. */
+#define	PRIV_NETINET_BINDANY	506	/* Allow bind to any address. */
+#define	PRIV_NETINET_HASHKEY	507	/* Get and set hash keys for IPv4/6. */
+
+/*
+ * Placeholders for IPX/SPX privileges, not supported any more.
+ */
+#define	_PRIV_NETIPX_RESERVEDPORT	520	/* Bind low port number. */
+#define	_PRIV_NETIPX_RAW		521	/* Open netipx raw socket. */
+
+/*
+ * NCP privileges.
+ */
+#define	PRIV_NETNCP		530	/* Use another user's connection. */
+
+/*
+ * SMB privileges.
+ */
+#define	PRIV_NETSMB		540	/* Use another user's connection. */
+
+/*
+ * VM86 privileges.
+ */
+#define	PRIV_VM86_INTCALL	550	/* Allow invoking vm86 int handlers. */
+
+/*
+ * Set of reserved privilege values, which will be allocated to code as
+ * needed, in order to avoid renumbering later privileges due to insertion.
+ */
+#define	_PRIV_RESERVED0		560
+#define	_PRIV_RESERVED1		561
+#define	_PRIV_RESERVED2		562
+#define	_PRIV_RESERVED3		563
+#define	_PRIV_RESERVED4		564
+#define	_PRIV_RESERVED5		565
+#define	_PRIV_RESERVED6		566
+#define	_PRIV_RESERVED7		567
+#define	_PRIV_RESERVED8		568
+#define	_PRIV_RESERVED9		569
+#define	_PRIV_RESERVED10	570
+#define	_PRIV_RESERVED11	571
+#define	_PRIV_RESERVED12	572
+#define	_PRIV_RESERVED13	573
+#define	_PRIV_RESERVED14	574
+#define	_PRIV_RESERVED15	575
+
+/*
+ * Define a set of valid privilege numbers that can be used by loadable
+ * modules that don't yet have privilege reservations.  Ideally, these should
+ * not be used, since their meaning is opaque to any policies that are aware
+ * of specific privileges, such as jail, and as such may be arbitrarily
+ * denied.
+ */
+#define	PRIV_MODULE0		600
+#define	PRIV_MODULE1		601
+#define	PRIV_MODULE2		602
+#define	PRIV_MODULE3		603
+#define	PRIV_MODULE4		604
+#define	PRIV_MODULE5		605
+#define	PRIV_MODULE6		606
+#define	PRIV_MODULE7		607
+#define	PRIV_MODULE8		608
+#define	PRIV_MODULE9		609
+#define	PRIV_MODULE10		610
+#define	PRIV_MODULE11		611
+#define	PRIV_MODULE12		612
+#define	PRIV_MODULE13		613
+#define	PRIV_MODULE14		614
+#define	PRIV_MODULE15		615
+
+/*
+ * DDB(4) privileges.
+ */
+#define	PRIV_DDB_CAPTURE	620	/* Allow reading of DDB capture log. */
+
+/*
+ * Arla/nnpfs privileges.
+ */
+#define	PRIV_NNPFS_DEBUG	630	/* Perforn ARLA_VIOC_NNPFSDEBUG. */
+
+/*
+ * cpuctl(4) privileges.
+ */
+#define	PRIV_CPUCTL_WRMSR	640	/* Write model-specific register. */
+#define	PRIV_CPUCTL_UPDATE	641	/* Update cpu microcode. */
+
+/*
+ * Capi4BSD privileges.
+ */
+#define	PRIV_C4B_RESET_CTLR	650	/* Load firmware, reset controller. */
+#define	PRIV_C4B_TRACE		651	/* Unrestricted CAPI message tracing. */
+
+/*
+ * OpenAFS privileges.
+ */
+#define	PRIV_AFS_ADMIN		660	/* Can change AFS client settings. */
+#define	PRIV_AFS_DAEMON		661	/* Can become the AFS daemon. */
+
+/*
+ * Resource Limits privileges.
+ */
+#define	PRIV_RCTL_GET_RACCT	670
+#define	PRIV_RCTL_GET_RULES	671
+#define	PRIV_RCTL_GET_LIMITS	672
+#define	PRIV_RCTL_ADD_RULE	673
+#define	PRIV_RCTL_REMOVE_RULE	674
+
+/*
+ * mem(4) privileges.
+ */
+#define	PRIV_KMEM_READ		680	/* Open mem/kmem for reading. */
+#define	PRIV_KMEM_WRITE		681	/* Open mem/kmem for writing. */
+
+/*
+ * Track end of privilege list.
+ */
+#define	_PRIV_HIGHEST		682
+
+/*
+ * Validate that a named privilege is known by the privilege system.  Invalid
+ * privileges presented to the privilege system by a priv_check interface
+ * will result in a panic.  This is only approximate due to sparse allocation
+ * of the privilege space.
+ */
+#define	PRIV_VALID(x)	((x) > _PRIV_LOWEST && (x) < _PRIV_HIGHEST)
+
+#ifdef _KERNEL
+/*
+ * Privilege check interfaces, modeled after historic suser() interfaces, but
+ * with the addition of a specific privilege name.  No flags are currently
+ * defined for the API.  Historically, flags specified using the real uid
+ * instead of the effective uid, and whether or not the check should be
+ * allowed in jail.
+ */
+struct thread;
+struct ucred;
+int	priv_check(struct thread *td, int priv);
+int	priv_check_cred(struct ucred *cred, int priv, int flags);
+#endif
+
+#endif  /* _SPL_PRIV_H */
diff --git a/include/os/macos/spl/sys/proc.h b/include/os/macos/spl/sys/proc.h
new file mode 100644
index 0000000000..132964d9c1
--- /dev/null
+++ b/include/os/macos/spl/sys/proc.h
@@ -0,0 +1,47 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_PROC_H
+#define	_SPL_PROC_H
+
+#include <sys/ucred.h>
+#include <i386/locks.h>
+#include_next <sys/proc.h>
+#include <sys/kernel_types.h>
+#include <sys/vnode.h>
+
+#define	proc_t struct proc
+
+extern proc_t p0; /* process 0 */
+
+static inline boolean_t
+zfs_proc_is_caller(proc_t *p)
+{
+	return (p == curproc);
+}
+
+#endif /* SPL_PROC_H */
diff --git a/include/os/macos/spl/sys/processor.h b/include/os/macos/spl/sys/processor.h
new file mode 100644
index 0000000000..d077817e94
--- /dev/null
+++ b/include/os/macos/spl/sys/processor.h
@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef	_SPL_PROCESSOR_H
+#define	_SPL_PROCESSOR_H
+
+#include <sys/types.h>
+
+extern uint32_t getcpuid(void);
+
+typedef int	processorid_t;
+
+#endif /* _SPL_PROCESSOR_H */
diff --git a/include/os/macos/spl/sys/procfs_list.h b/include/os/macos/spl/sys/procfs_list.h
new file mode 100644
index 0000000000..b90d44da2b
--- /dev/null
+++ b/include/os/macos/spl/sys/procfs_list.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_PROCFS_LIST_H
+#define	_SPL_PROCFS_LIST_H
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+struct seq_file {
+	void *dummy; /* warning: empty struct has size 0 in C, size 1 in C++ */
+};
+
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+
+typedef struct procfs_list {
+	void			*pl_private;
+	kmutex_t		pl_lock;
+	list_t			pl_list;
+	uint64_t		pl_next_id;
+	size_t			pl_node_offset;
+} procfs_list_t;
+
+typedef struct procfs_list_node {
+	list_node_t		pln_link;
+	uint64_t		pln_id;
+} procfs_list_node_t;
+
+void procfs_list_install(const char *module,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off);
+void procfs_list_uninstall(procfs_list_t *procfs_list);
+void procfs_list_destroy(procfs_list_t *procfs_list);
+void procfs_list_add(procfs_list_t *procfs_list, void *p);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/os/macos/spl/sys/random.h b/include/os/macos/spl/sys/random.h
new file mode 100644
index 0000000000..c69184cc84
--- /dev/null
+++ b/include/os/macos/spl/sys/random.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_RANDOM_H
+#define	_SPL_RANDOM_H
+
+#include_next <sys/random.h>
+
+
+static inline int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	read_random(ptr, len);
+	return (0);
+}
+
+static inline int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	read_random(ptr, len);
+	return (0);
+}
+
+#endif	/* _SPL_RANDOM_H */
diff --git a/include/os/macos/spl/sys/rwlock.h b/include/os/macos/spl/sys/rwlock.h
new file mode 100644
index 0000000000..aa69db9bd3
--- /dev/null
+++ b/include/os/macos/spl/sys/rwlock.h
@@ -0,0 +1,80 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SPL_RWLOCK_H
+#define	_SPL_RWLOCK_H
+
+#include <sys/types.h>
+#include <kern/locks.h>
+
+typedef enum {
+	RW_DRIVER  = 2,
+	RW_DEFAULT = 4
+} krw_type_t;
+
+typedef enum {
+	RW_NONE   = 0,
+	RW_WRITER = 1,
+	RW_READER = 2
+} krw_t;
+
+#define	RW_NOLOCKDEP	0
+
+struct krwlock {
+	uint32_t	rw_lock[4];	/* opaque lck_rw_t data */
+	void		*rw_owner;	/* writer (exclusive) lock only */
+	int		rw_readers;	/* reader lock only */
+	int		rw_pad;		/* */
+#ifdef SPL_DEBUG_RWLOCK
+	void		*leak;
+#endif
+};
+typedef struct krwlock  krwlock_t;
+
+#define	RW_WRITE_HELD(x)	(rw_write_held((x)))
+#define	RW_LOCK_HELD(x)		(rw_lock_held((x)))
+
+#ifdef SPL_DEBUG_RWLOCK
+#define	rw_init(A, B, C, D) \
+    rw_initx(A, B, C, D, __FILE__, __FUNCTION__, __LINE__)
+extern  void  rw_initx(krwlock_t *, char *, krw_type_t, void *,
+    const char *, const char *, int);
+#else
+extern  void  rw_init(krwlock_t *, char *, krw_type_t, void *);
+#endif
+extern  void  rw_destroy(krwlock_t *);
+extern  void  rw_enter(krwlock_t *, krw_t);
+extern  int   rw_tryenter(krwlock_t *, krw_t);
+extern  void  rw_exit(krwlock_t *);
+extern  void  rw_downgrade(krwlock_t *);
+extern  int   rw_tryupgrade(krwlock_t *);
+extern  int   rw_write_held(krwlock_t *);
+extern  int   rw_lock_held(krwlock_t *);
+extern  int   rw_isinit(krwlock_t *);
+
+int  spl_rwlock_init(void);
+void spl_rwlock_fini(void);
+
+#endif /* _SPL_RWLOCK_H */
diff --git a/include/os/macos/spl/sys/seg_kmem.h b/include/os/macos/spl/sys/seg_kmem.h
new file mode 100644
index 0000000000..8ef014c129
--- /dev/null
+++ b/include/os/macos/spl/sys/seg_kmem.h
@@ -0,0 +1,92 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VM_SEG_KMEM_H
+#define	_VM_SEG_KMEM_H
+
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/vmem.h>
+
+/*
+ * VM - Kernel Segment Driver
+ */
+
+#if defined(_KERNEL)
+
+extern uint64_t segkmem_total_allocated;
+
+/* qcaching for zio arenas and abd arena */
+extern vmem_t *zio_arena_parent;
+/* arena for zio caches for file blocks */
+extern vmem_t *zio_arena;
+/* arena for zio caches for (zfs) metadata blocks */
+extern vmem_t *zio_metadata_arena;
+
+/*
+ * segkmem page vnodes
+ */
+#define	kvp		(kvps[KV_KVP])
+#define	zvp		(kvps[KV_ZVP])
+#if defined(__sparc)
+#define	mpvp		(kvps[KV_MPVP])
+#define	promvp		(kvps[KV_PROMVP])
+#endif	/* __sparc */
+
+void *segkmem_alloc(vmem_t *, size_t, int);
+extern void segkmem_free(vmem_t *, void *, size_t);
+extern void kernelheap_init(void);
+extern void kernelheap_fini(void);
+
+extern void *segkmem_zio_alloc(vmem_t *, size_t, int);
+extern void segkmem_zio_free(vmem_t *, void *, size_t);
+extern void segkmem_zio_init(void);
+extern void segkmem_zio_fini(void);
+
+/*
+ * Flags for segkmem_xalloc().
+ *
+ * SEGKMEM_SHARELOCKED requests pages which are locked SE_SHARED to be
+ * returned rather than unlocked which is now the default.  Note that
+ * memory returned by SEGKMEM_SHARELOCKED cannot be freed by segkmem_free().
+ * This is a hack for seg_dev that should be cleaned up in the future.
+ */
+#define	SEGKMEM_SHARELOCKED	0x20000
+
+#define	SEGKMEM_USE_LARGEPAGES (segkmem_lpsize > PAGESIZE)
+
+#define	IS_KMEM_VA_LARGEPAGE(vaddr)				        \
+	(((vaddr) >= heap_lp_base) && ((vaddr) < heap_lp_end))
+
+#endif	/* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _VM_SEG_KMEM_H */
diff --git a/include/os/macos/spl/sys/sha2.h b/include/os/macos/spl/sys/sha2.h
new file mode 100644
index 0000000000..9039835f18
--- /dev/null
+++ b/include/os/macos/spl/sys/sha2.h
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define	_SYS_SHA2_H
+
+#ifdef  _KERNEL
+#include <sys/types.h>		/* for uint_* */
+#else
+#include <stdint.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	SHA2_HMAC_MIN_KEY_LEN	1	/* SHA2-HMAC min key length in bytes */
+#define	SHA2_HMAC_MAX_KEY_LEN	INT_MAX	/* SHA2-HMAC max key length in bytes */
+
+#define	SHA256_DIGEST_LENGTH	32	/* SHA256 digest length in bytes */
+#define	SHA384_DIGEST_LENGTH	48	/* SHA384 digest length in bytes */
+#define	SHA512_DIGEST_LENGTH	64	/* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define	SHA512_224_DIGEST_LENGTH	28	/* SHA512/224 digest length */
+#define	SHA512_256_DIGEST_LENGTH	32	/* SHA512/256 digest length */
+
+#define	SHA256_HMAC_BLOCK_SIZE	64	/* SHA256-HMAC block size */
+#define	SHA512_HMAC_BLOCK_SIZE	128	/* SHA512-HMAC block size */
+
+#define	SHA256			0
+#define	SHA256_HMAC		1
+#define	SHA256_HMAC_GEN		2
+#define	SHA384			3
+#define	SHA384_HMAC		4
+#define	SHA384_HMAC_GEN		5
+#define	SHA512			6
+#define	SHA512_HMAC		7
+#define	SHA512_HMAC_GEN		8
+#define	SHA512_224		9
+#define	SHA512_256		10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+typedef struct 	{
+	uint32_t algotype;		/* Algorithm Type */
+
+	/* state (ABCDEFGH) */
+	union {
+		uint32_t s32[8];	/* for SHA256 */
+		uint64_t s64[8];	/* for SHA384/512 */
+	} state;
+	/* number of bits */
+	union {
+		uint32_t c32[2];	/* for SHA256 , modulo 2^64 */
+		uint64_t c64[2];	/* for SHA384/512, modulo 2^128 */
+	} count;
+	union {
+		uint8_t		buf8[128];	/* undigested input */
+		uint32_t	buf32[32];	/* realigned input */
+		uint64_t	buf64[16];	/* realigned input */
+	} buf_un;
+} SHA2_CTX;
+
+typedef SHA2_CTX SHA256_CTX;
+typedef SHA2_CTX SHA384_CTX;
+typedef SHA2_CTX SHA512_CTX;
+
+extern void SHA2Init(uint64_t mech, SHA2_CTX *);
+
+extern void SHA2Update(SHA2_CTX *, const void *, size_t);
+
+extern void SHA2Final(void *, SHA2_CTX *);
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+	SHA256_MECH_INFO_TYPE,		/* SUN_CKM_SHA256 */
+	SHA256_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC */
+	SHA256_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA256_HMAC_GENERAL */
+	SHA384_MECH_INFO_TYPE,		/* SUN_CKM_SHA384 */
+	SHA384_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC */
+	SHA384_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA384_HMAC_GENERAL */
+	SHA512_MECH_INFO_TYPE,		/* SUN_CKM_SHA512 */
+	SHA512_HMAC_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC */
+	SHA512_HMAC_GEN_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_HMAC_GENERAL */
+	SHA512_224_MECH_INFO_TYPE,	/* SUN_CKM_SHA512_224 */
+	SHA512_256_MECH_INFO_TYPE	/* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */
diff --git a/include/os/macos/spl/sys/sid.h b/include/os/macos/spl/sys/sid.h
new file mode 100644
index 0000000000..75b6847096
--- /dev/null
+++ b/include/os/macos/spl/sys/sid.h
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _SPL_SID_H
+#define	_SPL_SID_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+
+typedef struct ksiddomain {
+	char		*kd_name;
+} ksiddomain_t;
+
+typedef enum ksid_index {
+	KSID_USER,
+	KSID_GROUP,
+	KSID_OWNER,
+	KSID_COUNT
+} ksid_index_t;
+
+typedef int ksid_t;
+
+/* Should be in kidmap.h */
+typedef int32_t idmap_stat;
+
+static inline ksiddomain_t *
+ksid_lookupdomain(const char *dom)
+{
+	ksiddomain_t *kd;
+	int len = strlen(dom);
+
+	kd = (ksiddomain_t *)kmem_zalloc(sizeof (ksiddomain_t), KM_SLEEP);
+	kd->kd_name = (char *)kmem_zalloc(len + 1, KM_SLEEP);
+	memcpy(kd->kd_name, dom, len);
+
+	return (kd);
+}
+
+static inline void
+ksiddomain_rele(ksiddomain_t *ksid)
+{
+	kmem_free(ksid->kd_name, strlen(ksid->kd_name) + 1);
+	kmem_free(ksid, sizeof (ksiddomain_t));
+}
+
+#define	UID_NOBODY	65534
+#define	GID_NOBODY	65534
+
+static __inline uint_t
+ksid_getid(ksid_t *ks)
+{
+	panic("%s has been unexpectedly called", __func__);
+	return (0);
+}
+
+static __inline const char *
+ksid_getdomain(ksid_t *ks)
+{
+	panic("%s has been unexpectedly called", __func__);
+	return (0);
+}
+
+static __inline uint_t
+ksid_getrid(ksid_t *ks)
+{
+	panic("%s has been unexpectedly called", __func__);
+	return (0);
+}
+
+#define	kidmap_getsidbyuid(zone, uid, sid_prefix, rid)  (1)
+#define	kidmap_getsidbygid(zone, gid, sid_prefix, rid)  (1)
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SPL_SID_H */
diff --git a/include/os/macos/spl/sys/signal.h b/include/os/macos/spl/sys/signal.h
new file mode 100644
index 0000000000..f4bf1845e9
--- /dev/null
+++ b/include/os/macos/spl/sys/signal.h
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_SYS_SIGNAL_H
+#define	_SPL_SYS_SIGNAL_H
+
+#include <sys/vm.h>
+#include_next <sys/signal.h>
+#include <kern/thread.h>
+
+#define	FORREAL			0		/* Usual side-effects */
+#define	JUSTLOOKING		1		/* Don't stop the process */
+
+struct proc;
+
+extern int thread_issignal(struct proc *, thread_t, sigset_t);
+
+#define	THREADMASK (sigmask(SIGILL)|sigmask(SIGTRAP)|\
+		sigmask(SIGIOT)|sigmask(SIGEMT)|\
+		sigmask(SIGFPE)|sigmask(SIGBUS)|\
+		sigmask(SIGSEGV)|sigmask(SIGSYS)|\
+		sigmask(SIGPIPE)|sigmask(SIGKILL)|\
+		sigmask(SIGTERM)|sigmask(SIGINT))
+
+static __inline__ int
+issig(int why)
+{
+	return (thread_issignal(current_proc(), current_thread(),
+	    THREADMASK));
+}
+
+/* Always called with curthread */
+#define	signal_pending(p) issig(0)
+
+#endif /* SPL_SYS_SIGNAL_H */
diff --git a/include/os/macos/spl/sys/simd.h b/include/os/macos/spl/sys/simd.h
new file mode 100644
index 0000000000..37c06fc78a
--- /dev/null
+++ b/include/os/macos/spl/sys/simd.h
@@ -0,0 +1,712 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ * 	kfpu_begin()
+ * 	kfpu_end()
+ *
+ * SIMD support:
+ *
+ * Following functions should be called to determine whether CPU feature
+ * is supported. All functions are usable in kernel and user space.
+ * If a SIMD algorithm is using more than one instruction set
+ * all relevant feature test functions should be called.
+ *
+ * Supported features:
+ * 	zfs_sse_available()
+ * 	zfs_sse2_available()
+ * 	zfs_sse3_available()
+ * 	zfs_ssse3_available()
+ * 	zfs_sse4_1_available()
+ * 	zfs_sse4_2_available()
+ *
+ * 	zfs_avx_available()
+ * 	zfs_avx2_available()
+ *
+ * 	zfs_bmi1_available()
+ * 	zfs_bmi2_available()
+ *
+ * 	zfs_avx512f_available()
+ * 	zfs_avx512cd_available()
+ * 	zfs_avx512er_available()
+ * 	zfs_avx512pf_available()
+ * 	zfs_avx512bw_available()
+ * 	zfs_avx512dq_available()
+ * 	zfs_avx512vl_available()
+ * 	zfs_avx512ifma_available()
+ * 	zfs_avx512vbmi_available()
+ *
+ * NOTE(AVX-512VL):	If using AVX-512 instructions with 128Bit registers
+ * 			also add zfs_avx512vl_available() to feature check.
+ */
+
+#ifndef _SIMD_X86_H
+#define	_SIMD_X86_H
+
+#include <sys/isa_defs.h>
+
+/* only for __x86 */
+#if defined(__x86)
+
+#include <sys/types.h>
+
+#if defined(_KERNEL)
+#include <i386/cpuid.h>
+#include <i386/proc_reg.h>
+
+#ifdef __APPLE__
+// XNU fpu.h
+static inline uint64_t
+xgetbv(uint32_t c)
+{
+	uint32_t mask_hi, mask_lo;
+	__asm__ __volatile__("xgetbv" : "=a"(mask_lo), "=d"(mask_hi) : "c" (c));
+	return (((uint64_t)mask_hi<<32) + (uint64_t)mask_lo);
+}
+
+#endif
+
+extern uint64_t spl_cpuid_features(void);
+extern uint64_t spl_cpuid_leaf7_features(void);
+
+#define	ZFS_ASM_BUG()	{ ASSERT(0); } break
+
+#define	kfpu_allowed()		1
+
+#endif
+
+#define	kfpu_init()		(0)
+#define	kfpu_fini()		do {} while (0)
+
+#define	kfpu_begin()  ((void)0)
+#define	kfpu_end()    ((void)0)
+
+/*
+ * CPUID feature tests for user-space. Linux kernel provides an interface for
+ * CPU feature testing.
+ */
+#if !defined(_KERNEL)
+
+#include <assert.h>
+
+#define	ZFS_ASM_BUG()	{ assert(0); } break
+
+/*
+ * x86 registers used implicitly by CPUID
+ */
+typedef enum cpuid_regs {
+	EAX = 0,
+	EBX,
+	ECX,
+	EDX,
+	CPUID_REG_CNT = 4
+} cpuid_regs_t;
+
+/*
+ * List of instruction sets identified by CPUID
+ */
+typedef enum cpuid_inst_sets {
+	SSE = 0,
+	SSE2,
+	SSE3,
+	SSSE3,
+	SSE4_1,
+	SSE4_2,
+	OSXSAVE,
+	AVX,
+	AVX2,
+	BMI1,
+	BMI2,
+	AVX512F,
+	AVX512CD,
+	AVX512DQ,
+	AVX512BW,
+	AVX512IFMA,
+	AVX512VBMI,
+	AVX512PF,
+	AVX512ER,
+	AVX512VL,
+	AES,
+	PCLMULQDQ
+} cpuid_inst_sets_t;
+
+/*
+ * Instruction set descriptor.
+ */
+typedef struct cpuid_feature_desc {
+	uint32_t leaf;		/* CPUID leaf */
+	uint32_t subleaf;	/* CPUID sub-leaf */
+	uint32_t flag;		/* bit mask of the feature */
+	cpuid_regs_t reg;	/* which CPUID return register to test */
+} cpuid_feature_desc_t;
+
+#define	_AVX512F_BIT		(1U << 16)
+#define	_AVX512CD_BIT		(_AVX512F_BIT | (1U << 28))
+#define	_AVX512DQ_BIT		(_AVX512F_BIT | (1U << 17))
+#define	_AVX512BW_BIT		(_AVX512F_BIT | (1U << 30))
+#define	_AVX512IFMA_BIT		(_AVX512F_BIT | (1U << 21))
+#define	_AVX512VBMI_BIT		(1U << 1) /* AVX512F_BIT is on another leaf  */
+#define	_AVX512PF_BIT		(_AVX512F_BIT | (1U << 26))
+#define	_AVX512ER_BIT		(_AVX512F_BIT | (1U << 27))
+#define	_AVX512VL_BIT		(1U << 31) /* if used also check other levels */
+#define	_AES_BIT		(1U << 25)
+#define	_PCLMULQDQ_BIT		(1U << 1)
+
+/*
+ * Descriptions of supported instruction sets
+ */
+static const cpuid_feature_desc_t spl_cpuid_features[] = {
+	[SSE]		= {1U, 0U,	1U << 25, 	EDX	},
+	[SSE2]		= {1U, 0U,	1U << 26,	EDX	},
+	[SSE3]		= {1U, 0U,	1U << 0,	ECX	},
+	[SSSE3]		= {1U, 0U,	1U << 9,	ECX	},
+	[SSE4_1]	= {1U, 0U,	1U << 19,	ECX	},
+	[SSE4_2]	= {1U, 0U,	1U << 20,	ECX	},
+	[OSXSAVE]	= {1U, 0U,	1U << 27,	ECX	},
+	[AVX]		= {1U, 0U,	1U << 28,	ECX	},
+	[AVX2]		= {7U, 0U,	1U << 5,	EBX	},
+	[BMI1]		= {7U, 0U,	1U << 3,	EBX	},
+	[BMI2]		= {7U, 0U,	1U << 8,	EBX	},
+	[AVX512F]	= {7U, 0U, _AVX512F_BIT,	EBX	},
+	[AVX512CD]	= {7U, 0U, _AVX512CD_BIT,	EBX	},
+	[AVX512DQ]	= {7U, 0U, _AVX512DQ_BIT,	EBX	},
+	[AVX512BW]	= {7U, 0U, _AVX512BW_BIT,	EBX	},
+	[AVX512IFMA]	= {7U, 0U, _AVX512IFMA_BIT,	EBX	},
+	[AVX512VBMI]	= {7U, 0U, _AVX512VBMI_BIT,	ECX	},
+	[AVX512PF]	= {7U, 0U, _AVX512PF_BIT,	EBX	},
+	[AVX512ER]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
+	[AVX512VL]	= {7U, 0U, _AVX512ER_BIT,	EBX	},
+	[AES]		= {1U, 0U, _AES_BIT,		ECX	},
+	[PCLMULQDQ]	= {1U, 0U, _PCLMULQDQ_BIT,	ECX	},
+};
+
+/*
+ * Check if OS supports AVX and AVX2 by checking XCR0
+ * Only call this function if CPUID indicates that AVX feature is
+ * supported by the CPU, otherwise it might be an illegal instruction.
+ */
+static inline uint64_t
+xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+	/* xgetbv - instruction byte code */
+	__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
+	    : "=a" (eax), "=d" (edx)
+	    : "c" (index));
+
+	return ((((uint64_t)edx)<<32) | (uint64_t)eax);
+}
+
+/*
+ * Check if CPU supports a feature
+ */
+static inline boolean_t
+__cpuid_check_feature(const cpuid_feature_desc_t *desc)
+{
+	uint32_t r[CPUID_REG_CNT];
+
+	if (__get_cpuid_max(0, NULL) >= desc->leaf) {
+		/*
+		 * __cpuid_count is needed to properly check
+		 * for AVX2. It is a macro, so return parameters
+		 * are passed by value.
+		 */
+		__cpuid_count(desc->leaf, desc->subleaf,
+		    r[EAX], r[EBX], r[ECX], r[EDX]);
+		return ((r[desc->reg] & desc->flag) == desc->flag);
+	}
+	return (B_FALSE);
+}
+
+#define	CPUID_FEATURE_CHECK(name, id)				\
+static inline boolean_t						\
+__cpuid_has_ ## name(void)					\
+{								\
+	return (__cpuid_check_feature(&spl_cpuid_features[id]));	\
+}
+
+/*
+ * Define functions for user-space CPUID features testing
+ */
+CPUID_FEATURE_CHECK(sse, SSE);
+CPUID_FEATURE_CHECK(sse2, SSE2);
+CPUID_FEATURE_CHECK(sse3, SSE3);
+CPUID_FEATURE_CHECK(ssse3, SSSE3);
+CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
+CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
+CPUID_FEATURE_CHECK(avx, AVX);
+CPUID_FEATURE_CHECK(avx2, AVX2);
+CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
+CPUID_FEATURE_CHECK(bmi1, BMI1);
+CPUID_FEATURE_CHECK(bmi2, BMI2);
+CPUID_FEATURE_CHECK(avx512f, AVX512F);
+CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
+CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
+CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
+CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
+CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
+CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
+CPUID_FEATURE_CHECK(avx512er, AVX512ER);
+CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
+CPUID_FEATURE_CHECK(aes, AES);
+CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
+
+#endif /* !defined(_KERNEL) */
+
+
+/*
+ * Detect register set support
+ */
+static inline boolean_t
+__simd_state_enabled(const uint64_t state)
+{
+	boolean_t has_osxsave;
+	uint64_t xcr0;
+
+#if defined(_KERNEL)
+	has_osxsave = !!(spl_cpuid_features() & CPUID_FEATURE_OSXSAVE);
+#elif !defined(_KERNEL)
+	has_osxsave = __cpuid_has_osxsave();
+#endif
+	if (!has_osxsave)
+		return (B_FALSE);
+
+	xcr0 = xgetbv(0);
+	return ((xcr0 & state) == state);
+}
+
+#define	_XSTATE_SSE_AVX		(0x2 | 0x4)
+#define	_XSTATE_AVX512		(0xE0 | _XSTATE_SSE_AVX)
+
+#define	__ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
+#define	__zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
+
+
+/*
+ * Check if SSE instruction set is available
+ */
+static inline boolean_t
+zfs_sse_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSE));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_sse());
+#endif
+}
+
+/*
+ * Check if SSE2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse2_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSE2));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_sse2());
+#endif
+}
+
+/*
+ * Check if SSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_sse3_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSE3));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_sse3());
+#endif
+}
+
+/*
+ * Check if SSSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_ssse3_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSSE3));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_ssse3());
+#endif
+}
+
+/*
+ * Check if SSE4.1 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_1_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSE4_1));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_sse4_1());
+#endif
+}
+
+/*
+ * Check if SSE4.2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_2_available(void)
+{
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_SSE4_2));
+#elif !defined(_KERNEL)
+	return (__cpuid_has_sse4_2());
+#endif
+}
+
+/*
+ * Check if AVX instruction set is available
+ */
+static inline boolean_t
+zfs_avx_available(void)
+{
+	boolean_t has_avx;
+#if defined(_KERNEL)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_AVX1_0));
+#elif !defined(_KERNEL)
+	has_avx = __cpuid_has_avx();
+#endif
+
+	return (has_avx && __ymm_enabled());
+}
+
+/*
+ * Check if AVX2 instruction set is available
+ */
+static inline boolean_t
+zfs_avx2_available(void)
+{
+	boolean_t has_avx2;
+#if defined(_KERNEL)
+#if defined(HAVE_AVX2)
+	has_avx2 = (!!(spl_cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_AVX2));
+#else
+	has_avx2 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx2 = __cpuid_has_avx2();
+#endif
+
+	return (has_avx2 && __ymm_enabled());
+}
+
+/*
+ * Check if BMI1 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi1_available(void)
+{
+#if defined(_KERNEL)
+#if defined(CPUID_LEAF7_FEATURE_BMI1)
+	return (!!(spl_cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_BMI1));
+#else
+	return (B_FALSE);
+#endif
+#elif !defined(_KERNEL)
+	return (__cpuid_has_bmi1());
+#endif
+}
+
+/*
+ * Check if BMI2 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi2_available(void)
+{
+#if defined(_KERNEL)
+#if defined(CPUID_LEAF7_FEATURE_BMI2)
+	return (!!(spl_cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_BMI2));
+#else
+	return (B_FALSE);
+#endif
+#elif !defined(_KERNEL)
+	return (__cpuid_has_bmi2());
+#endif
+}
+
+/*
+ * Check if AES instruction set is available
+ */
+static inline boolean_t
+zfs_aes_available(void)
+{
+#if defined(_KERNEL)
+#if defined(HAVE_AES)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_AES));
+#else
+	return (B_FALSE);
+#endif
+#elif !defined(_KERNEL)
+	return (__cpuid_has_aes());
+#endif
+}
+
+/*
+ * Check if PCLMULQDQ instruction set is available
+ */
+static inline boolean_t
+zfs_pclmulqdq_available(void)
+{
+#if defined(_KERNEL)
+#if defined(HAVE_PCLMULQDQ)
+	return (!!(spl_cpuid_features() & CPUID_FEATURE_PCLMULQDQ));
+#else
+	return (B_FALSE);
+#endif
+#elif !defined(_KERNEL)
+	return (__cpuid_has_pclmulqdq());
+#endif
+}
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F	Foundation
+ * AVX512CD	Conflict Detection Instructions
+ * AVX512ER	Exponential and Reciprocal Instructions
+ * AVX512PF	Prefetch Instructions
+ *
+ * AVX512BW	Byte and Word Instructions
+ * AVX512DQ	Double-word and Quadword Instructions
+ * AVX512VL	Vector Length Extensions
+ *
+ * AVX512IFMA	Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI	Vector Byte Manipulation Instructions
+ */
+
+
+/* Check if AVX512F instruction set is available */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512F) && defined(CPUID_LEAF7_FEATURE_AVX512F)
+	return (!!(spl_cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_AVX512F));
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512f();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512CD instruction set is available */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512F) && defined(HAVE_AVX512CD) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512CD)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512CD)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512CD);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512cd();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512ER instruction set is available */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512F) && defined(HAVE_AVX512ER) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512ER)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512ER)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512ER);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512er();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512PF instruction set is available */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512PF) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512PF)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512PF)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512PF);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512pf();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512BW instruction set is available */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512BW) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512BW)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512BW)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F | CPUID_LEAF7_FEATURE_AVX512BW);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512bw();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512DQ instruction set is available */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512DQ) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512DQ)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512DQ)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512DQ);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512dq();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VL instruction set is available */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512VL) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512VL)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512VL)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512VL);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512vl();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512IFMA instruction set is available */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512IFMA) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512IFMA)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512IFMA)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512IFMA);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512ifma();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VBMI instruction set is available */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+	boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL)
+#if defined(HAVE_AVX512VBMI) && defined(HAVE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512F) && \
+	defined(CPUID_LEAF7_FEATURE_AVX512VBMI)
+	has_avx512 = (spl_cpuid_leaf7_features() &
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512VBMI)) ==
+	    (CPUID_LEAF7_FEATURE_AVX512F|CPUID_LEAF7_FEATURE_AVX512VBMI);
+#else
+	has_avx512 = B_FALSE;
+#endif
+#elif !defined(_KERNEL)
+	has_avx512 = __cpuid_has_avx512f() &&
+	    __cpuid_has_avx512vbmi();
+#endif
+
+	return (has_avx512 && __zmm_enabled());
+}
+
+#endif /* defined(__x86) */
+
+#endif /* _SIMD_X86_H */
diff --git a/include/os/macos/spl/sys/strings.h b/include/os/macos/spl/sys/strings.h
new file mode 100644
index 0000000000..af643259c8
--- /dev/null
+++ b/include/os/macos/spl/sys/strings.h
@@ -0,0 +1,26 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_STRINGS_H
+#define	_SPL_STRINGS_H
+
+
+#endif
diff --git a/include/os/macos/spl/sys/stropts.h b/include/os/macos/spl/sys/stropts.h
new file mode 100644
index 0000000000..bdce60f327
--- /dev/null
+++ b/include/os/macos/spl/sys/stropts.h
@@ -0,0 +1,247 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+
+#ifndef _SPL_STROPTS_H
+#define	_SPL_STROPTS_H
+
+#define	LOCORE
+#include <sys/syslimits.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	isprint(c)	((c) >= ' ' && (c) <= '~')
+
+/*
+ * Find highest one bit set.
+ *      Returns bit number + 1 of highest bit that is set, otherwise returns 0.
+ * High order bit is 31 (or 63 in _LP64 kernel).
+ */
+static inline int
+highbit64(unsigned long long i)
+{
+	register int h = 1;
+	if (i == 0)
+		return (0);
+	if (i & 0xffffffff00000000ull) {
+		h += 32; i >>= 32;
+	}
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+}
+
+static inline int
+highbit(unsigned long long i)
+{
+	register int h = 1;
+	if (i == 0)
+		return (0);
+	if (i & 0xffffffff00000000ull) {
+		h += 32; i >>= 32;
+	}
+	if (i & 0xffff0000) {
+		h += 16; i >>= 16;
+	}
+	if (i & 0xff00) {
+		h += 8; i >>= 8;
+	}
+	if (i & 0xf0) {
+		h += 4; i >>= 4;
+	}
+	if (i & 0xc) {
+		h += 2; i >>= 2;
+	}
+	if (i & 0x2) {
+		h += 1;
+	}
+	return (h);
+}
+
+/*
+ * Find lowest one bit set.
+ *	Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
+ * Low order bit is 0.
+ */
+static inline int
+lowbit(unsigned long long i)
+{
+	register int h = 1;
+
+	if (i == 0)
+		return (0);
+
+	if (!(i & 0xffffffff)) {
+		h += 32; i >>= 32;
+	}
+	if (!(i & 0xffff)) {
+		h += 16; i >>= 16;
+	}
+	if (!(i & 0xff)) {
+		h += 8; i >>= 8;
+	}
+	if (!(i & 0xf)) {
+		h += 4; i >>= 4;
+	}
+	if (!(i & 0x3)) {
+		h += 2; i >>= 2;
+	}
+	if (!(i & 0x1)) {
+		h += 1;
+	}
+	return (h);
+}
+
+static inline int
+isdigit(char c)
+{
+	return (c >= ' ' && c <= '9');
+}
+
+
+static inline char *
+strpbrk(const char *s, const char *b)
+{
+	const char *p;
+	do {
+		for (p = b; *p != '\0' && *p != *s; ++p)
+			;
+		if (*p != '\0')
+			return ((char *)s);
+	} while (*s++);
+	return (NULL);
+}
+
+
+static inline char *
+strrchr(const char *p, int ch)
+{
+	union {
+		const char *cp;
+		char *p;
+	} u;
+	char *save;
+
+	u.cp = p;
+	for (save = NULL; /* empty */; ++u.p) {
+		if (*u.p == ch)
+			save = u.p;
+		if (*u.p == '\0')
+			return (save);
+	}
+	/* NOTREACHED */
+}
+
+static inline int
+is_ascii_str(const char *str)
+{
+	unsigned char ch;
+
+	while ((ch = (unsigned char)*str++) != '\0') {
+		if (ch >= 0x80)
+			return (0);
+	}
+	return (1);
+}
+
+
+static inline void *
+kmemchr(const void *s, int c, size_t n)
+{
+	if (n != 0) {
+		const unsigned char *p = (const unsigned char *)s;
+		do {
+			if (*p++ == (unsigned char)c)
+				return ((void *)(uintptr_t)(p - 1));
+		} while (--n != 0);
+	}
+	return (NULL);
+}
+
+#ifndef memchr
+#define	memchr kmemchr
+#endif
+
+#define	IDX(c)	((unsigned char)(c) / LONG_BIT)
+#define	BIT(c)	((unsigned long)1 << ((unsigned char)(c) % LONG_BIT))
+
+static inline size_t
+strcspn(const char *__restrict s, const char *__restrict charset)
+{
+	/*
+	 * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to
+	 * generate better code.  Without them, gcc gets a little confused.
+	 */
+	const char *s1;
+	unsigned long bit;
+	unsigned long tbl[(UCHAR_MAX + 1) / LONG_BIT];
+	int idx;
+
+	if (*s == '\0')
+		return (0);
+
+	tbl[0] = 1;
+	tbl[3] = tbl[2] = tbl[1] = 0;
+
+	for (; *charset != '\0'; charset++) {
+		idx = IDX(*charset);
+		bit = BIT(*charset);
+		tbl[idx] |= bit;
+	}
+
+	for (s1 = s; ; s1++) {
+		idx = IDX(*s1);
+		bit = BIT(*s1);
+		if ((tbl[idx] & bit) != 0)
+			break;
+	}
+	return (s1 - s);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPL_STROPTS_H */
diff --git a/include/os/macos/spl/sys/sunddi.h b/include/os/macos/spl/sys/sunddi.h
new file mode 100644
index 0000000000..ff299eeaf9
--- /dev/null
+++ b/include/os/macos/spl/sys/sunddi.h
@@ -0,0 +1,203 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+
+
+#ifndef _SPL_SUNDDI_H
+#define	_SPL_SUNDDI_H
+
+#include <sys/cred.h>
+#include <sys/uio.h>
+#include <sys/mutex.h>
+#include <sys/u8_textprep.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <libkern/libkern.h>
+
+typedef int ddi_devid_t;
+
+#define	DDI_DEV_T_NONE				((dev_t)-1)
+#define	DDI_DEV_T_ANY				((dev_t)-2)
+#define	DI_MAJOR_T_UNKNOWN			((major_t)0)
+
+#define	DDI_PROP_DONTPASS			0x0001
+#define	DDI_PROP_CANSLEEP			0x0002
+
+#define	DDI_SUCCESS				0
+#define	DDI_FAILURE				-1
+
+#define	ddi_prop_lookup_string(x1, x2, x3, x4, x5)	(*x5 = NULL)
+#define	ddi_prop_free(x)			(void)0
+#define	ddi_root_node()				(void)0
+
+#define	isalnum(ch)	(isalpha(ch) || isdigit(ch))
+#define	isalpha(ch)	(isupper(ch) || islower(ch))
+#define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
+#define	islower(ch)	((ch) >= 'a' && (ch) <= 'z')
+#define	isspace(ch)	(((ch) == ' ') || ((ch) == '\r') || ((ch) == '\n') || \
+	    ((ch) == '\t') || ((ch) == '\f'))
+#define	isupper(ch)	((ch) >= 'A' && (ch) <= 'Z')
+#define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
+	    ((ch) >= 'A' && (ch) <= 'F'))
+#define	tolower(C)	(((C) >= 'A' && (C) <= 'Z') ? (C) - 'A' + 'a' : (C))
+#define	toupper(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A': (C))
+#define	isgraph(C)	((C) >= 0x21 && (C) <= 0x7E)
+#define	ispunct(C)	(((C) >= 0x21 && (C) <= 0x2F) || \
+	    ((C) >= 0x3A && (C) <= 0x40) ||		 \
+	    ((C) >= 0x5B && (C) <= 0x60) ||		 \
+	    ((C) >= 0x7B && (C) <= 0x7E))
+
+// Define proper Solaris API calls, and clean ZFS up to use
+int ddi_copyin(const void *from, void *to, size_t len, int flags);
+int ddi_copyout(const void *from, void *to, size_t len, int flags);
+int ddi_copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done);
+
+static inline int
+ddi_strtol(const char *str, char **nptr, int base, long *result)
+{
+	*result = strtol(str, nptr, base);
+	if (*result == 0)
+		return (EINVAL);
+	else if (*result == LONG_MIN || *result == LONG_MAX)
+		return (ERANGE);
+	return (0);
+}
+
+static inline int
+ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
+{
+	*result = strtoul(str, nptr, base);
+	if (*result == 0)
+		return (EINVAL);
+	else if (*result == ULONG_MAX)
+		return (ERANGE);
+	return (0);
+}
+
+static inline int
+ddi_strtoull(const char *str, char **nptr, int base,
+    unsigned long long *result)
+{
+	*result = (unsigned long long)strtouq(str, nptr, base);
+	if (*result == 0)
+		return (EINVAL);
+	else if (*result == ULLONG_MAX)
+		return (ERANGE);
+	return (0);
+}
+
+static inline int
+ddi_strtoll(const char *str, char **nptr, int base, long long *result)
+{
+	*result = (unsigned long long)strtoq(str, nptr, base);
+	if (*result == 0)
+		return (EINVAL);
+	else if (*result == ULLONG_MAX)
+		return (ERANGE);
+	return (0);
+}
+
+#ifndef OTYPCNT
+#define	OTYPCNT		5
+#define	OTYP_BLK	0
+#define	OTYP_MNT	1
+#define	OTYP_CHR	2
+#define	OTYP_SWP	3
+#define	OTYP_LYR	4
+#endif
+
+#define	P2END(x, align)			(-(~(x) & -(align)))
+
+#define	ddi_name_to_major(name) devsw_name2blk(name, NULL, 0)
+
+struct dev_info {
+    dev_t dev;   // Major / Minor
+    void *devc;
+    void *devb;
+};
+typedef struct dev_info dev_info_t;
+
+
+int	ddi_strtoul(const char *, char **, int, unsigned long *);
+int	ddi_strtol(const char *, char **, int, long *);
+int	ddi_soft_state_init(void **, size_t, size_t);
+int	ddi_soft_state_zalloc(void *, int);
+void	*ddi_get_soft_state(void *, int);
+void	ddi_soft_state_free(void *, int);
+void	ddi_soft_state_fini(void **);
+int	ddi_create_minor_node(dev_info_t *, char *, int,
+    minor_t, char *, int);
+void	ddi_remove_minor_node(dev_info_t *, char *);
+
+int ddi_driver_major(dev_info_t *);
+
+typedef void 	*ldi_ident_t;
+
+#define	DDI_SUCCESS	0
+#define	DDI_FAILURE	-1
+
+#define	DDI_PSEUDO	""
+
+#define	ddi_prop_update_int64(a, b, c, d)	DDI_SUCCESS
+#define	ddi_prop_update_string(a, b, c, d)	DDI_SUCCESS
+
+#define	bioerror(bp, er)	(buf_seterror((bp), (er)))
+#define	biodone(bp) buf_biodone(bp)
+
+#define	ddi_ffs ffs
+static inline long ddi_fls(long mask) {			\
+	/* Algorithm courtesy of Steve Chessin. */	\
+    while (mask) {					\
+		long nx;				\
+		if ((nx = (mask & (mask - 1))) == 0)	\
+			break;				\
+		mask = nx;				\
+	}						\
+	return (ffs(mask));				\
+}
+
+#define	getminor(X) minor((X))
+
+
+
+/*
+ * This data structure is entirely private to the soft state allocator.
+ */
+struct i_ddi_soft_state {
+	void		**array;	/* the array of pointers */
+	kmutex_t	lock;	/* serialize access to this struct */
+	size_t		size;	/* how many bytes per state struct */
+	size_t		n_items;	/* how many structs herein */
+	struct i_ddi_soft_state *next;	/* 'dirty' elements */
+};
+
+#define	MIN_N_ITEMS	8	/* 8 void *'s == 32 bytes */
+
+extern int strspn(const char *string, register char *charset);
+
+
+#endif /* SPL_SUNDDI_H */
diff --git a/include/os/macos/spl/sys/sysmacros.h b/include/os/macos/spl/sys/sysmacros.h
new file mode 100644
index 0000000000..8858f8ae4c
--- /dev/null
+++ b/include/os/macos/spl/sys/sysmacros.h
@@ -0,0 +1,265 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_SYSMACROS_H
+#define	_SPL_SYSMACROS_H
+
+#include <sys/types.h>
+#include <string.h>
+#include <sys/varargs.h>
+#include <sys/zone.h>
+#include <sys/signal.h>
+#include <sys/param.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _KERNEL
+#define	_KERNEL				__KERNEL__
+#endif
+
+#define	FALSE				0
+#define	TRUE				1
+
+#if 0
+#define	INT8_MAX			(127)
+#define	INT8_MIN			(-128)
+#define	UINT8_MAX			(255)
+#define	UINT8_MIN			(0)
+
+#define	INT16_MAX			(32767)
+#define	INT16_MIN			(-32768)
+#define	UINT16_MAX			(65535)
+#define	UINT16_MIN			(0)
+
+#define	INT32_MAX			INT_MAX
+#define	INT32_MIN			INT_MIN
+#define	UINT32_MAX			UINT_MAX
+#define	UINT32_MIN			UINT_MIN
+
+#define	INT64_MAX			LLONG_MAX
+#define	INT64_MIN			LLONG_MIN
+#define	UINT64_MAX			ULLONG_MAX
+#define	UINT64_MIN			ULLONG_MIN
+
+#define	NBBY				8
+#define	MAXBSIZE			8192
+#endif
+
+#define	MAXMSGLEN			256
+#define	MAXNAMELEN			256
+#define	MAXPATHLEN			PATH_MAX
+#define	MAXOFFSET_T			LLONG_MAX
+#define	DEV_BSIZE			512
+#define	DEV_BSHIFT			9 /* log2(DEV_BSIZE) */
+
+#define	proc_pageout			NULL
+#define	curproc				(struct proc *)current_proc()
+
+extern int cpu_number(void);
+#define	CPU_SEQID			(cpu_number())
+#define	is_system_labeled()		0
+
+extern unsigned int max_ncpus;
+#define	boot_ncpus max_ncpus
+
+#ifndef RLIM64_INFINITY
+#define	RLIM64_INFINITY			(~0ULL)
+#endif
+
+/*
+ * 0..MAX_PRIO-1:		Process priority
+ * 0..MAX_RT_PRIO-1:		RT priority tasks
+ * MAX_RT_PRIO..MAX_PRIO-1:	SCHED_NORMAL tasks
+ *
+ * Treat shim tasks as SCHED_NORMAL tasks
+ */
+
+/*
+ * In OSX, the kernel thread priorities start at 81 and goes to
+ * 95 MAXPRI_KERNEL. BASEPRI_REALTIME starts from 96. Since
+ * swap priority is at 92. Most ZFS priorities should probably
+ * stay below this, but kmem_reap needs to be higher.
+ */
+#define	minclsyspri  81 /* BASEPRI_KERNEL */
+#define	defclsyspri  81 /* BASEPRI_KERNEL */
+#define	maxclsyspri  95
+
+
+#define	NICE_TO_PRIO(nice)		(MAX_RT_PRIO + (nice) + 20)
+#define	PRIO_TO_NICE(prio)		((prio) - MAX_RT_PRIO - 20)
+
+/*
+ * Missing macros
+ */
+#define	PAGESIZE			PAGE_SIZE
+
+/* from Solaris sys/byteorder.h */
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+
+/* Dtrace probes do not exist in the linux kernel */
+#ifdef DTRACE_PROBE
+#undef  DTRACE_PROBE
+#endif  /* DTRACE_PROBE */
+#define	DTRACE_PROBE(a)					((void)0)
+
+#ifdef DTRACE_PROBE1
+#undef  DTRACE_PROBE1
+#endif  /* DTRACE_PROBE1 */
+#define	DTRACE_PROBE1(a, b, c)				((void)0)
+
+#ifdef DTRACE_PROBE2
+#undef  DTRACE_PROBE2
+#endif  /* DTRACE_PROBE2 */
+#define	DTRACE_PROBE2(a, b, c, d, e)			((void)0)
+
+#ifdef DTRACE_PROBE3
+#undef  DTRACE_PROBE3
+#endif  /* DTRACE_PROBE3 */
+#define	DTRACE_PROBE3(a, b, c, d, e, f, g)		((void)0)
+
+#ifdef DTRACE_PROBE4
+#undef  DTRACE_PROBE4
+#endif  /* DTRACE_PROBE4 */
+#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
+
+/* Missing globals */
+extern char spl_version[32];
+extern unsigned long spl_hostid;
+extern char hw_serial[11];
+
+/* Missing misc functions */
+extern uint32_t zone_get_hostid(void *zone);
+extern void spl_setup(void);
+extern void spl_cleanup(void);
+
+#define	makedevice(maj, min) makedev(maj, min)
+
+/* common macros */
+#ifndef MIN
+#define	MIN(a, b)		((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define	MAX(a, b)		((a) < (b) ? (b) : (a))
+#endif
+#ifndef ABS
+#define	ABS(a)			((a) < 0 ? -(a) : (a))
+#endif
+#ifndef DIV_ROUND_UP
+#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#endif
+
+#ifndef ARRAY_SIZE
+#define	ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+#endif
+
+/*
+ * Compatibility macros/typedefs needed for Solaris -> Linux port
+ */
+#define	P2ALIGN(x, align)	((x) & -(align))
+#define	P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
+#define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
+#define	P2PHASE(x, align)	((x) & ((align) - 1))
+#define	P2NPHASE(x, align)	(-(x) & ((align) - 1))
+#define	ISP2(x)			(((x) & ((x) - 1)) == 0)
+#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+#define	P2BOUNDARY(off, len, align) \
+				(((off) ^ ((off) + (len) - 1)) > (align) - 1)
+
+/*
+ * Typed version of the P2* macros.  These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment.  For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ *
+ * P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define	P2ALIGN_TYPED(x, align, type)   \
+	((type)(x) & -(type)(align))
+#define	P2PHASE_TYPED(x, align, type)   \
+	((type)(x) & ((type)(align) - 1))
+#define	P2NPHASE_TYPED(x, align, type)  \
+	(-(type)(x) & ((type)(align) - 1))
+#define	P2ROUNDUP_TYPED(x, align, type) \
+	(-(-(type)(x) & -(type)(align)))
+#define	P2END_TYPED(x, align, type)     \
+	(-(~(type)(x) & -(type)(align)))
+#define	P2PHASEUP_TYPED(x, align, phase, type)  \
+	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define	P2CROSS_TYPED(x, y, align, type)        \
+	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define	P2SAMEHIGHBIT_TYPED(x, y, type) \
+	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+/*
+ * P2* Macros from Illumos
+ */
+
+/*
+ * return x rounded up to the next phase (offset) within align.
+ * phase should be < align.
+ * eg, P2PHASEUP(0x1234, 0x100, 0x10) == 0x1310 (0x13*align + phase)
+ * eg, P2PHASEUP(0x5600, 0x100, 0x10) == 0x5610 (0x56*align + phase)
+ */
+#define	P2PHASEUP(x, align, phase)	((phase) - (((phase) - (x)) & -(align)))
+
+/*
+ * Return TRUE if they have the same highest bit set.
+ * eg, P2SAMEHIGHBIT(0x1234, 0x1001) == TRUE (the high bit is 0x1000)
+ * eg, P2SAMEHIGHBIT(0x1234, 0x3010) == FALSE (high bit of 0x3010 is 0x2000)
+ */
+#define	P2SAMEHIGHBIT(x, y)		(((x) ^ (y)) < ((x) & (y)))
+
+/*
+ * End Illumos copy-fest
+ */
+
+/* avoid any possibility of clashing with <stddef.h> version */
+#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
+/*
+ * Use the correct builtin mechanism. The Traditional macro is
+ * not safe on this platform.
+ */
+#define	offsetof(s, m)  __builtin_offsetof(s, m)
+#endif
+
+#define	SET_ERROR(X) (X)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _SPL_SYSMACROS_H */
diff --git a/include/os/macos/spl/sys/systeminfo.h b/include/os/macos/spl/sys/systeminfo.h
new file mode 100644
index 0000000000..d1c15744ec
--- /dev/null
+++ b/include/os/macos/spl/sys/systeminfo.h
@@ -0,0 +1,40 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_SYSTEMINFO_H
+#define	_SPL_SYSTEMINFO_H
+
+#define	HW_INVALID_HOSTID	0xFFFFFFFF	/* an invalid hostid */
+#define	HW_HOSTID_LEN		11		/* minimum buffer size needed */
+						/* to hold a decimal or hex */
+						/* hostid string */
+
+const char *spl_panicstr(void);
+int spl_system_inshutdown(void);
+
+
+#endif /* SPL_SYSTEMINFO_H */
diff --git a/include/os/macos/spl/sys/systm.h b/include/os/macos/spl/sys/systm.h
new file mode 100644
index 0000000000..54b75e29b5
--- /dev/null
+++ b/include/os/macos/spl/sys/systm.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_SYSTM_H
+#define	_SPL_SYSTM_H
+
+#include_next <sys/systm.h>
+#include <sys/sunddi.h>
+
+typedef uintptr_t pc_t;
+
+#endif /* SPL_SYSTM_H */
diff --git a/include/os/macos/spl/sys/taskq.h b/include/os/macos/spl/sys/taskq.h
new file mode 100644
index 0000000000..9dd6776108
--- /dev/null
+++ b/include/os/macos/spl/sys/taskq.h
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (C) 2015 Jorgen Lundman <lundman@lundman.net>
+ */
+
+#ifndef	_SYS_TASKQ_H
+#define	_SYS_TASKQ_H
+
+#include <sys/types.h>
+#include <sys/thread.h>
+#include <sys/rwlock.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	TASKQ_NAMELEN	31
+
+typedef struct taskq taskq_t;
+typedef uintptr_t taskqid_t;
+typedef void (task_func_t)(void *);
+
+struct proc;
+struct taskq_ent;
+
+/* New ZFS expects to find taskq_ent_t as well */
+#include <sys/taskq_impl.h>
+
+/*
+ * Public flags for taskq_create(): bit range 0-15
+ */
+#define	TASKQ_PREPOPULATE	0x0001	/* Prepopulate with threads and data */
+#define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
+#define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
+#define	TASKQ_THREADS_CPU_PCT	0x0008	/* number of threads as % of ncpu */
+#define	TASKQ_DC_BATCH		0x0010	/* Taskq uses SDC in batch mode */
+
+/*
+ * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
+ * KM_SLEEP/KM_NOSLEEP.
+ */
+#define	TQ_SLEEP	0x00	/* Can block for memory */
+#define	TQ_NOSLEEP	0x01	/* cannot block for memory; may fail */
+#define	TQ_NOQUEUE	0x02	/* Do not enqueue if can't dispatch */
+#define	TQ_NOALLOC	0x04	/* cannot allocate memory; may fail */
+#define	TQ_FRONT	0x08	/* Put task at the front of the queue */
+
+#define	TASKQID_INVALID ((taskqid_t)0)
+
+#ifdef _KERNEL
+
+extern taskq_t *system_taskq;
+/* Global dynamic task queue for long delay */
+extern taskq_t *system_delay_taskq;
+
+extern int 	spl_taskq_init(void);
+extern void	spl_taskq_fini(void);
+extern void	taskq_mp_init(void);
+
+extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
+extern taskq_t	*taskq_create_instance(const char *, int, int, pri_t, int,
+    int, uint_t);
+extern taskq_t	*taskq_create_proc(const char *, int, pri_t, int, int,
+    proc_t *, uint_t);
+extern taskq_t	*taskq_create_sysdc(const char *, int, int, int,
+    proc_t *, uint_t, uint_t);
+extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern void	nulltask(void *);
+extern void	taskq_destroy(taskq_t *);
+extern void	taskq_wait(taskq_t *);
+#define	HAVE_TASKQ_WAIT_ID
+extern void taskq_wait_id(taskq_t *, taskqid_t);
+extern void	taskq_suspend(taskq_t *);
+extern int	taskq_suspended(taskq_t *);
+extern void	taskq_resume(taskq_t *);
+extern int	taskq_member(taskq_t *, kthread_t *);
+extern boolean_t taskq_empty(taskq_t *tq);
+extern int taskq_cancel_id(taskq_t *, taskqid_t);
+extern taskq_t *taskq_of_curthread(void);
+extern int taskq_empty_ent(struct taskq_ent *);
+
+#define	taskq_wait_outstanding(T, D) taskq_wait((T))
+
+extern void system_taskq_init(void);
+extern void system_taskq_fini(void);
+
+#endif	/* _KERNEL */
+
+extern int EMPTY_TASKQ(taskq_t *tq);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_TASKQ_H */
diff --git a/include/os/macos/spl/sys/taskq_impl.h b/include/os/macos/spl/sys/taskq_impl.h
new file mode 100644
index 0000000000..60e86b3673
--- /dev/null
+++ b/include/os/macos/spl/sys/taskq_impl.h
@@ -0,0 +1,181 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (C) 2015 Jorgen Lundman <lundman@lundman.net>
+ */
+
+
+#ifndef	_SYS_TASKQ_IMPL_H
+#define	_SYS_TASKQ_IMPL_H
+
+#include <sys/taskq.h>
+#include <sys/inttypes.h>
+#include <sys/vmem.h>
+#include <sys/list.h>
+#include <sys/kstat.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct taskq_bucket taskq_bucket_t;
+
+typedef struct taskq_ent {
+	struct taskq_ent	*tqent_next;
+	struct taskq_ent	*tqent_prev;
+	task_func_t		*tqent_func;
+	void			*tqent_arg;
+	union {
+		taskq_bucket_t	*tqent_bucket;
+		uintptr_t	tqent_flags;
+	}			tqent_un;
+	kthread_t		*tqent_thread;
+	kcondvar_t		tqent_cv;
+#ifdef __APPLE__
+	/* Used to simulate TS_STOPPED */
+	kmutex_t		tqent_thread_lock;
+	kcondvar_t		tqent_thread_cv;
+#endif
+} taskq_ent_t;
+
+#define	TQENT_FLAG_PREALLOC	0x1
+
+/*
+ * Taskq Statistics fields are not protected by any locks.
+ */
+typedef struct tqstat {
+	uint_t		tqs_hits;
+	uint_t		tqs_misses;
+	uint_t		tqs_overflow;	/* no threads to allocate   */
+	uint_t		tqs_tcreates;	/* threads created 	*/
+	uint_t		tqs_tdeaths;	/* threads died		*/
+	uint_t		tqs_maxthreads;	/* max # of alive threads */
+	uint_t		tqs_nomem;	/* # of times there were no memory */
+	uint_t		tqs_disptcreates;
+} tqstat_t;
+
+/*
+ * Per-CPU hash bucket manages taskq_bent_t structures using freelist.
+ */
+struct taskq_bucket {
+	kmutex_t	tqbucket_lock;
+	taskq_t		*tqbucket_taskq;	/* Enclosing taskq */
+	taskq_ent_t	tqbucket_freelist;
+	uint_t		tqbucket_nalloc;	/* # of allocated entries */
+	uint_t		tqbucket_nfree;		/* # of free entries */
+	kcondvar_t	tqbucket_cv;
+	ushort_t	tqbucket_flags;
+	hrtime_t	tqbucket_totaltime;
+	tqstat_t	tqbucket_stat;
+};
+
+/*
+ * Bucket flags.
+ */
+#define	TQBUCKET_CLOSE		0x01
+#define	TQBUCKET_SUSPEND	0x02
+
+#define	TASKQ_INTERFACE_FLAGS	0x0000ffff	/* defined in <sys/taskq.h> */
+
+/*
+ * taskq implementation flags: bit range 16-31
+ */
+#define	TASKQ_CHANGING		0x00010000	/* nthreads != target */
+#define	TASKQ_SUSPENDED		0x00020000	/* taskq is suspended */
+#define	TASKQ_NOINSTANCE	0x00040000	/* no instance number */
+#define	TASKQ_THREAD_CREATED	0x00080000	/* a thread has been created */
+#define	TASKQ_DUTY_CYCLE	0x00100000	/* using the SDC class */
+
+struct taskq {
+	char		tq_name[TASKQ_NAMELEN + 1];
+	kmutex_t	tq_lock;
+	krwlock_t	tq_threadlock;
+	kcondvar_t	tq_dispatch_cv;
+	kcondvar_t	tq_wait_cv;
+	kcondvar_t	tq_exit_cv;
+	pri_t		tq_pri;		/* Scheduling priority */
+	uint_t		tq_flags;
+	int		tq_active;
+	int		tq_nthreads;
+	int		tq_nthreads_target;
+	int		tq_nthreads_max;
+	int		tq_threads_ncpus_pct;
+	int		tq_nalloc;
+	int		tq_minalloc;
+	int		tq_maxalloc;
+	kcondvar_t	tq_maxalloc_cv;
+	int		tq_maxalloc_wait;
+	taskq_ent_t	*tq_freelist;
+	taskq_ent_t	tq_task;
+	int		tq_maxsize;
+	taskq_bucket_t	*tq_buckets;	/* Per-cpu array of buckets */
+	int		tq_instance;
+	uint_t		tq_nbuckets;	/* # of buckets	(2^n)	    */
+	union {
+		kthread_t *_tq_thread;
+		kthread_t **_tq_threadlist;
+	}		tq_thr;
+
+	list_node_t	tq_cpupct_link;	/* linkage for taskq_cpupct_list */
+	proc_t		*tq_proc;	/* process for taskq threads */
+	int		tq_cpupart;	/* cpupart id bound to */
+	uint_t		tq_DC;		/* duty cycle for SDC */
+
+	/*
+	 * Statistics.
+	 */
+	kstat_t		*tq_kstat;	/* Exported statistics */
+	hrtime_t	tq_totaltime;	/* Time spent processing tasks */
+	uint64_t	tq_tasks;	/* Total # of tasks posted */
+	uint64_t	tq_executed;	/* Total # of tasks executed */
+	int		tq_maxtasks;	/* Max number of tasks in the queue */
+	int		tq_tcreates;
+	int		tq_tdeaths;
+};
+
+/* Special form of taskq dispatch that uses preallocated entries. */
+void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t, taskq_ent_t *);
+
+
+#define	tq_thread tq_thr._tq_thread
+#define	tq_threadlist tq_thr._tq_threadlist
+
+/* The MAX guarantees we have at least one thread */
+#define	TASKQ_THREADS_PCT(ncpus, pct)	MAX(((ncpus) * (pct)) / 100, 1)
+
+/* Extra ZOL / Apple */
+extern void taskq_init_ent(taskq_ent_t *t);
+extern taskqid_t taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
+    uint_t flags, clock_t expire_time);
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_TASKQ_IMPL_H */
diff --git a/include/os/macos/spl/sys/thread.h b/include/os/macos/spl/sys/thread.h
new file mode 100644
index 0000000000..b07dca09df
--- /dev/null
+++ b/include/os/macos/spl/sys/thread.h
@@ -0,0 +1,113 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_THREAD_H
+#define	_SPL_THREAD_H
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/tsd.h>
+#include <sys/condvar.h>
+#include <kern/sched_prim.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * OsX thread type is
+ * typedef struct thread *thread_t;
+ *
+ * Map that to the ZFS thread type: kthread_t
+ */
+#define	kthread thread
+#define	kthread_t struct kthread
+
+/*
+ * Thread interfaces
+ */
+#define	TP_MAGIC			0x53535353
+
+#define	TS_FREE		0x00 /* Thread at loose ends */
+#define	TS_SLEEP	0x01 /* Awaiting an event */
+#define	TS_RUN		0x02 /* Runnable, but not yet on a processor */
+#define	TS_ONPROC	0x04 /* Thread is being run on a processor */
+#define	TS_ZOMB		0x08 /* Thread has died but hasn't been reaped */
+#define	TS_STOPPED	0x10 /* Stopped, initial state */
+#define	TS_WAIT		0x20 /* Waiting to become runnable */
+
+
+typedef void (*thread_func_t)(void *);
+
+
+#define	curthread ((kthread_t *)current_thread()) /* current thread pointer */
+#define	curproj   (ttoproj(curthread))		  /* current project pointer */
+
+#define	thread_join(t)	VERIFY(0)
+
+// Drop the p0 argument, not used.
+
+#ifdef SPL_DEBUG_THREAD
+
+#define	thread_create(A, B, C, D, E, F, G, H) \
+    spl_thread_create(A, B, C, D, E, G, __FILE__, __LINE__, H)
+extern kthread_t *spl_thread_create(caddr_t stk, size_t stksize,
+    void (*proc)(void *), void *arg, size_t len, /* proc_t *pp, */ int state,
+    char *, int, pri_t pri);
+
+#else
+
+#define	thread_create(A, B, C, D, E, F, G, H) \
+    spl_thread_create(A, B, C, D, E, G, H)
+extern kthread_t *spl_thread_create(caddr_t stk, size_t stksize,
+    void (*proc)(void *), void *arg, size_t len, /* proc_t *pp, */ int state,
+    pri_t pri);
+
+#endif
+
+#define	thread_exit spl_thread_exit
+extern void spl_thread_exit(void);
+
+extern kthread_t *spl_current_thread(void);
+
+#define	delay osx_delay
+extern void osx_delay(int);
+
+#define	KPREEMPT_SYNC 0
+static inline void kpreempt(int flags)
+{
+	(void) thread_block(THREAD_CONTINUE_NULL);
+}
+
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif  /* _SPL_THREAD_H */
diff --git a/include/os/macos/spl/sys/time.h b/include/os/macos/spl/sys/time.h
new file mode 100644
index 0000000000..c3a33f7e06
--- /dev/null
+++ b/include/os/macos/spl/sys/time.h
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_TIME_H
+#define	_SPL_TIME_H
+
+#include <sys/types.h>
+#include_next <sys/time.h>
+#include <sys/timer.h>
+#include <mach/mach_time.h>
+
+#if defined(CONFIG_64BIT)
+#define	TIME_MAX			INT64_MAX
+#define	TIME_MIN			INT64_MIN
+#else
+#define	TIME_MAX			INT32_MAX
+#define	TIME_MIN			INT32_MIN
+#endif
+
+#define	SEC				1
+#define	MILLISEC			1000
+#define	MICROSEC			1000000
+#define	NANOSEC				1000000000
+
+#define	NSEC2SEC(n)			((n) / (NANOSEC / SEC))
+#define	SEC2NSEC(m)			((hrtime_t)(m) * (NANOSEC / SEC))
+
+/* Already defined in include/linux/time.h */
+#undef CLOCK_THREAD_CPUTIME_ID
+#undef CLOCK_REALTIME
+#undef CLOCK_MONOTONIC
+#undef CLOCK_PROCESS_CPUTIME_ID
+
+typedef enum clock_type {
+	__CLOCK_REALTIME0 =	0,	/* obsolete; same as CLOCK_REALTIME */
+	CLOCK_VIRTUAL =		1,	/* thread's user-level CPU clock */
+	CLOCK_THREAD_CPUTIME_ID	= 2,	/* thread's user+system CPU clock */
+	CLOCK_REALTIME =	3,	/* wall clock */
+	CLOCK_MONOTONIC =	4,	/* high resolution monotonic clock */
+	CLOCK_PROCESS_CPUTIME_ID = 5,	/* process's user+system CPU clock */
+	CLOCK_HIGHRES =		CLOCK_MONOTONIC,	 /* alternate name */
+	CLOCK_PROF =		CLOCK_THREAD_CPUTIME_ID, /* alternate name */
+} clock_type_t;
+
+#define	TIMESPEC_OVERFLOW(ts)		\
+	((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX)
+
+typedef long long	hrtime_t;
+
+extern hrtime_t gethrtime(void);
+extern void gethrestime(struct timespec *);
+extern time_t gethrestime_sec(void);
+extern void hrt2ts(hrtime_t hrt, struct timespec *tsp);
+
+#define	MSEC2NSEC(m)			((hrtime_t)(m) * (NANOSEC / MILLISEC))
+#define	USEC2NSEC(u)			((hrtime_t)(u) * (NANOSEC / MICROSEC))
+#define	NSEC2MSEC(n)			((n) / (NANOSEC / MILLISEC))
+#define	SEC_TO_TICK(sec)		((sec) * hz)
+#define	NSEC_TO_TICK(nsec)		((nsec) / (NANOSEC / hz))
+
+#endif  /* _SPL_TIME_H */
diff --git a/include/os/macos/spl/sys/timer.h b/include/os/macos/spl/sys/timer.h
new file mode 100644
index 0000000000..ccdf64f5df
--- /dev/null
+++ b/include/os/macos/spl/sys/timer.h
@@ -0,0 +1,88 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_TIMER_H
+#define	_SPL_TIMER_H
+
+#include <kern/clock.h>
+
+/* Open Solaris lbolt is in hz */
+static inline uint64_t
+zfs_lbolt(void)
+{
+	struct timeval tv;
+	uint64_t lbolt_hz;
+	microuptime(&tv);
+	lbolt_hz = ((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10000;
+	return (lbolt_hz);
+}
+
+
+#define	lbolt zfs_lbolt()
+#define	lbolt64 zfs_lbolt()
+
+#define	ddi_get_lbolt()		(zfs_lbolt())
+#define	ddi_get_lbolt64()	(zfs_lbolt())
+
+#define	typecheck(type, x)			\
+	( 					\
+	{ type __dummy;				\
+		typeof(x) __dummy2;		\
+		(void) (&__dummy == &__dummy2);	\
+		1;				\
+	})
+
+
+
+#define	ddi_time_before(a, b) (typecheck(clock_t, a) && \
+		typecheck(clock_t, b) && \
+		((a) - (b) < 0))
+#define	ddi_time_after(a, b) ddi_time_before(b, a)
+
+#define	ddi_time_before64(a, b) (typecheck(int64_t, a) && \
+		typecheck(int64_t, b) &&  \
+		((a) - (b) < 0))
+#define	ddi_time_after64(a, b) ddi_time_before64(b, a)
+
+
+
+extern void delay(clock_t ticks);
+
+#define	usleep_range(wakeup, whocares)			\
+	do {						\
+		hrtime_t delta = wakeup - gethrtime();	\
+		if (delta > 0) {			\
+			struct timespec ts;		\
+			ts.tv_sec = delta / NANOSEC;	\
+			ts.tv_nsec = delta % NANOSEC;	\
+			(void) msleep(NULL, NULL, PWAIT, "usleep_range", &ts); \
+		}					\
+	} while (0)
+
+
+#endif  /* _SPL_TIMER_H */
diff --git a/include/os/macos/spl/sys/trace.h b/include/os/macos/spl/sys/trace.h
new file mode 100644
index 0000000000..7b72d3a98d
--- /dev/null
+++ b/include/os/macos/spl/sys/trace.h
@@ -0,0 +1,26 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_TRACE_H
+#define	_SPL_TRACE_H
+
+
+#endif
diff --git a/include/os/macos/spl/sys/tsd.h b/include/os/macos/spl/sys/tsd.h
new file mode 100644
index 0000000000..cfc48000a5
--- /dev/null
+++ b/include/os/macos/spl/sys/tsd.h
@@ -0,0 +1,54 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+
+#ifndef _SPL_TSD_H
+#define	_SPL_TSD_H
+
+#include <sys/types.h>
+
+#define	TSD_HASH_TABLE_BITS_DEFAULT	9
+#define	TSD_KEYS_MAX			32768
+#define	DTOR_PID			(PID_MAX_LIMIT+1)
+#define	PID_KEY				(TSD_KEYS_MAX+1)
+
+typedef void (*dtor_func_t)(void *);
+
+extern int tsd_set(uint_t, void *);
+extern void *tsd_get(uint_t);
+extern void *tsd_get_by_thread(uint_t, thread_t);
+extern void tsd_create(uint_t *, dtor_func_t);
+extern void tsd_destroy(uint_t *);
+extern void tsd_exit(void);
+
+uint64_t spl_tsd_size(void);
+void tsd_thread_exit(void);
+int spl_tsd_init(void);
+void spl_tsd_fini(void);
+
+#endif /* _SPL_TSD_H */
diff --git a/include/os/macos/spl/sys/types.h b/include/os/macos/spl/sys/types.h
new file mode 100644
index 0000000000..38faab8367
--- /dev/null
+++ b/include/os/macos/spl/sys/types.h
@@ -0,0 +1,119 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_TYPES_H
+#define	_SPL_TYPES_H
+
+#define	likely(x)		__builtin_expect(!!(x), 1)
+#define	unlikely(x)		__builtin_expect(!!(x), 0)
+
+#include_next <sys/types.h>
+#include <string.h>
+#include <sys/sysmacros.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Avoid kcdata.h header error */
+extern unsigned long strnlen(const char *, unsigned long);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#include <libkern/libkern.h>
+
+#include <sys/stropts.h>
+
+#ifndef ULLONG_MAX
+#define	ULLONG_MAX			(~0ULL)
+#endif
+
+#ifndef LLONG_MAX
+#define	LLONG_MAX			((long long)(~0ULL>>1))
+#endif
+
+enum { B_FALSE = 0, B_TRUE = 1 };
+typedef short			pri_t;
+typedef unsigned long		ulong_t;
+typedef unsigned long long	u_longlong_t;
+typedef unsigned long long	rlim64_t;
+typedef unsigned long long	loff_t;
+typedef long long		longlong_t;
+typedef unsigned char		uchar_t;
+typedef unsigned int		uint_t;
+typedef unsigned short		ushort_t;
+typedef void 			*spinlock_t;
+typedef long long		offset_t;
+typedef struct timespec		timestruc_t; /* definition per SVr4 */
+typedef struct timespec		timespec_t;
+typedef ulong_t			pgcnt_t;
+typedef unsigned int 		umode_t;
+#define	NODEV32			(dev32_t)(-1)
+typedef	uint32_t		dev32_t;
+typedef uint_t			minor_t;
+typedef	short			index_t;
+
+#include  <sys/fcntl.h>
+#define	FCREAT		O_CREAT
+#define	FTRUNC		O_TRUNC
+#define	FEXCL		O_EXCL
+#define	FNOCTTY		O_NOCTTY
+#define	FNOFOLLOW	O_NOFOLLOW
+
+#ifdef __APPLE__
+#define	FSYNC		O_SYNC  /* file (data+inode) integrity while writing */
+#define	FDSYNC		O_DSYNC /* file data only integrity while writing */
+#define	FOFFMAX		0x0000  /* not used */
+#define	FRSYNC		0x0000  /* not used */
+#else
+#define	FRSYNC		0x8000  /* sync read operations at same level of */
+				/* integrity as specified for writes by */
+				/* FSYNC and FDSYNC flags */
+#define	FOFFMAX		0x2000  /* large file */
+#endif
+
+#define	EXPORT_SYMBOL(X)
+#define	module_param(X, Y, Z)
+#define	MODULE_PARM_DESC(X, Y)
+
+#ifdef __GNUC__
+#define	member_type(type, member) __typeof__(((type *)0)->member)
+#else
+#define	member_type(type, member) void
+#endif
+
+#define	container_of(ptr, type, member) ((type *) \
+	((char *)(member_type(type, member) *) \
+	{ ptr } - offsetof(type, member)))
+
+typedef struct timespec inode_timespec_t;
+
+#endif	/* _SPL_TYPES_H */
diff --git a/include/os/macos/spl/sys/types32.h b/include/os/macos/spl/sys/types32.h
new file mode 100644
index 0000000000..799a956b5e
--- /dev/null
+++ b/include/os/macos/spl/sys/types32.h
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _SPL_TYPES32_H
+#define	_SPL_TYPES32_H
+
+typedef	uint32_t	caddr32_t;
+typedef	int32_t		daddr32_t;
+typedef	int32_t		time32_t;
+typedef	uint32_t	size32_t;
+
+#endif /* SPL_TYPE32_H */
diff --git a/include/os/macos/spl/sys/uio.h b/include/os/macos/spl/sys/uio.h
new file mode 100644
index 0000000000..2327de209d
--- /dev/null
+++ b/include/os/macos/spl/sys/uio.h
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+
+#ifndef _SPL_UIO_H
+#define	_SPL_UIO_H
+
+
+// OSX defines "uio_t" as "struct uio *"
+// ZFS defines "uio_t" as "struct uio"
+#undef uio_t
+#include_next <sys/uio.h>
+#define	uio_t struct uio
+
+#include <sys/types.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+typedef struct iovec iovec_t;
+
+typedef enum uio_seg uio_seg_t;
+typedef enum uio_rw uio_rw_t;
+
+typedef struct aio_req {
+	uio_t		*aio_uio;
+	void		*aio_private;
+} aio_req_t;
+
+typedef enum xuio_type {
+	UIOTYPE_ASYNCIO,
+	UIOTYPE_ZEROCOPY,
+} xuio_type_t;
+
+
+#define	UIOA_IOV_MAX    16
+
+typedef struct uioa_page_s {
+	int	uioa_pfncnt;
+	void	**uioa_ppp;
+	caddr_t	uioa_base;
+	size_t	uioa_len;
+} uioa_page_t;
+
+typedef struct xuio {
+	uio_t *xu_uio;
+	enum xuio_type xu_type;
+	union {
+		struct {
+			uint32_t xu_a_state;
+			ssize_t xu_a_mbytes;
+			uioa_page_t *xu_a_lcur;
+			void **xu_a_lppp;
+			void *xu_a_hwst[4];
+			uioa_page_t xu_a_locked[UIOA_IOV_MAX];
+		} xu_aio;
+
+		struct {
+			int xu_zc_rw;
+			void *xu_zc_priv;
+		} xu_zc;
+	} xu_ext;
+} xuio_t;
+
+#define	XUIO_XUZC_PRIV(xuio)	xuio->xu_ext.xu_zc.xu_zc_priv
+#define	XUIO_XUZC_RW(xuio)	xuio->xu_ext.xu_zc.xu_zc_rw
+
+#define	uio_segflg(U) \
+	(uio_isuserspace((struct uio *)(U))?UIO_USERSPACE:UIO_SYSSPACE)
+#define	uio_advance(U, N)	uio_update((struct uio *)(U), (N))
+
+static inline uint64_t
+uio_iovlen(const struct uio *u, unsigned int i)
+{
+	user_size_t iov_len;
+	uio_getiov((struct uio *)u, i, NULL, &iov_len);
+	return (iov_len);
+}
+
+static inline void *
+uio_iovbase(const struct uio *u, unsigned int i)
+{
+	user_addr_t iov_base;
+	uio_getiov((struct uio *)u, i, &iov_base, NULL);
+	return ((void *)iov_base);
+}
+
+static inline void
+uio_iov_at_index(uio_t *uio, unsigned int idx, void **base, uint64_t *len)
+{
+	(void) uio_getiov(uio, idx, (user_addr_t *)base, len);
+}
+
+static inline long long
+uio_index_at_offset(struct uio *uio, long long off, unsigned int *vec_idx)
+{
+	uint64_t len;
+	*vec_idx = 0;
+	while (*vec_idx < uio_iovcnt(uio) && off >=
+	    (len = uio_iovlen(uio, *vec_idx))) {
+		off -= len;
+		(*vec_idx)++;
+	}
+	return (off);
+}
+
+/*
+ * same as uiomove() but doesn't modify uio structure.
+ * return in cbytes how many bytes were copied.
+ */
+static inline int
+uiocopy(const char *p, size_t n, enum uio_rw rw, struct uio *uio,
+    size_t *cbytes)
+{
+	int result;
+	struct uio *nuio = uio_duplicate(uio);
+	unsigned long long x = uio_resid(uio);
+	if (!nuio)
+		return (ENOMEM);
+	uio_setrw(nuio, rw);
+	result = uiomove(p, n, nuio);
+	*cbytes = x-uio_resid(nuio);
+	uio_free(nuio);
+	return (result);
+}
+
+
+// Apple's uiomove puts the uio_rw in uio_create
+#define	uiomove(A, B, C, D)	uiomove((A), (B), (D))
+#define	uioskip(A, B)		uio_update((A), (B))
+
+extern int uio_prefaultpages(ssize_t, uio_t *);
+
+#ifdef  __cplusplus
+}
+#endif
+#endif /* SPL_UIO_H */
diff --git a/include/os/macos/spl/sys/utsname.h b/include/os/macos/spl/sys/utsname.h
new file mode 100644
index 0000000000..b6bcab77bb
--- /dev/null
+++ b/include/os/macos/spl/sys/utsname.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _SPL_UTSNAME_H
+#define	_SPL_UTSNAME_H
+
+#define	_SYS_NMLN 257
+struct opensolaris_utsname {
+	char sysname[_SYS_NMLN];
+	char nodename[_SYS_NMLN];
+	char release[_SYS_NMLN];
+	char version[_SYS_NMLN];
+	char machine[_SYS_NMLN];
+};
+
+typedef struct opensolaris_utsname utsname_t;
+
+extern utsname_t *utsname(void);
+
+#endif /* SPL_UTSNAME_H */
diff --git a/include/os/macos/spl/sys/varargs.h b/include/os/macos/spl/sys/varargs.h
new file mode 100644
index 0000000000..b7371a1f2a
--- /dev/null
+++ b/include/os/macos/spl/sys/varargs.h
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+#ifndef _SPL_VARARGS_H
+#define	_SPL_VARARGS_H
+
+#define	__va_list va_list
+
+#endif /* SPL_VARARGS_H */
diff --git a/include/os/macos/spl/sys/vfs.h b/include/os/macos/spl/sys/vfs.h
new file mode 100644
index 0000000000..aa78dc4347
--- /dev/null
+++ b/include/os/macos/spl/sys/vfs.h
@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#ifndef _SPL_ZFS_H
+#define	_SPL_ZFS_H
+
+#include <sys/attr.h>
+#include <sys/mount.h>
+
+#define	MAXFIDSZ	64
+
+typedef struct mount vfs_t;
+
+#define	vn_vfswlock(vp) (0)
+#define	vn_vfsunlock(vp)
+#define	VFS_HOLD(vfsp)
+#define	VFS_RELE(vfsp)
+
+
+
+/*
+ * File identifier.  Should be unique per filesystem on a single
+ * machine.  This is typically called by a stateless file server
+ * in order to generate "file handles".
+ *
+ * Do not change the definition of struct fid ... fid_t without
+ * letting the CacheFS group know about it!  They will have to do at
+ * least two things, in the same change that changes this structure:
+ *   1. change CFSVERSION in usr/src/uts/common/sys/fs/cachefs_fs.h
+ *   2. put the old version # in the canupgrade array
+ *      in cachfs_upgrade() in usr/src/cmd/fs.d/cachefs/fsck/fsck.c
+ * This is necessary because CacheFS stores FIDs on disk.
+ *
+ * Many underlying file systems cast a struct fid into other
+ * file system dependent structures which may require 4 byte alignment.
+ * Because a fid starts with a short it may not be 4 byte aligned, the
+ * fid_pad will force the alignment.
+ */
+#define	MAXFIDSZ		64
+#define	OLD_MAXFIDSZ	16
+
+typedef struct fid {
+	union {
+		long fid_pad;
+		struct {
+			ushort_t len;   /* length of data in bytes */
+			char    data[MAXFIDSZ]; /* data (variable len) */
+		} _fid;
+	} un;
+} fid_t;
+
+
+extern void (*mountroot_post_hook)(void);
+
+#endif /* SPL_ZFS_H */
diff --git a/include/os/macos/spl/sys/vmem.h b/include/os/macos/spl/sys/vmem.h
new file mode 100644
index 0000000000..6ff8a6e146
--- /dev/null
+++ b/include/os/macos/spl/sys/vmem.h
@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_VMEM_H
+#define	_SYS_VMEM_H
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+
+#define	KMEM_QUANTUM (PAGESIZE)
+
+
+	/*
+	 * Per-allocation flags
+	 */
+#define	VM_SLEEP	0x00000000 /* same as KM_SLEEP */
+#define	VM_NOSLEEP	0x00000001 /* same as KM_NOSLEEP */
+#define	VM_PANIC	0x00000002 /* same as KM_PANIC */
+#define	VM_PUSHPAGE	0x00000004 /* same as KM_PUSHPAGE */
+#define	VM_NORMALPRI	0x00000008 /* same as KM_NORMALPRI */
+#define	VM_NODEBUG	0x00000010 /* matches KM_NODE~BUG, */
+				    /* not implemented on OSX */
+#define	VM_NO_VBA	0x00000020 /* OSX: do not descend to the bucket layer */
+#define	VM_KMFLAGS	0x000000ff /* flags that must match KM_* flags */
+
+#define	VM_BESTFIT	0x00000100
+#define	VM_FIRSTFIT	0x00000200
+#define	VM_NEXTFIT	0x00000400
+
+/*
+ * The following flags are restricted for use only within the kernel.
+ * VM_MEMLOAD is for use by the HAT to avoid infinite recursion.
+ * VM_NORELOC is used by the kernel when static VA->PA mappings are required.
+ */
+#define	VM_MEMLOAD	0x00000800
+#define	VM_NORELOC	0x00001000
+
+/*
+ * VM_ABORT requests that vmem_alloc() *ignore* the VM_SLEEP/VM_NOSLEEP flags
+ * and forgo reaping if the allocation or attempted import, fails.  This
+ * flag is a segkmem-specific flag, and should not be used by anyone else.
+ */
+#define	VM_ABORT	0x00002000
+
+/*
+ * VM_ENDALLOC requests that large addresses be preferred in allocations.
+ * Has no effect if VM_NEXTFIT is active.
+ */
+#define	VM_ENDALLOC	0x00004000
+
+#define	VM_FLAGS	0x0000FFFF
+
+/*
+ * Arena creation flags
+ */
+#define	VMC_POPULATOR	0x00010000
+#define	VMC_NO_QCACHE	0x00020000	/* cannot use quantum caches */
+#define	VMC_IDENTIFIER	0x00040000	/* not backed by memory */
+// VMC_XALLOC   0x00080000 below
+// VMC_XALIGN   0x00100000 below
+#define	VMC_DUMPSAFE	0x00200000	/* can use alternate dump memory */
+// KMC_IDENTIFIER == 0x00400000
+// KMC_PREFILL ==    0x00800000
+#define	VMC_TIMEFREE	0x01000000	/* keep span creation time, */
+					/* newest spans to front */
+#define	VMC_OLDFIRST	0x02000000	/* must accompany VMC_TIMEFREE, */
+					/* oldest spans to front */
+
+/*
+ * internal use only;	the import function uses the vmem_ximport_t interface
+ *			and may increase the request size if it so desires.
+ *			VMC_XALIGN, for use with vmem_xcreate, specifies that
+ *			the address returned by the import function will be
+ *			aligned according to the alignment argument.
+ */
+#define	VMC_XALLOC		0x00080000
+#define	VMC_XALIGN		0x00100000
+#define	VMC_FLAGS		0xFFFF0000
+
+/*
+ * Public segment types
+ */
+#define	VMEM_ALLOC		0x01
+#define	VMEM_FREE		0x02
+
+/*
+ * Implementation-private segment types
+ */
+#define	VMEM_SPAN		0x10
+#define	VMEM_ROTOR		0x20
+#define	VMEM_WALKER		0x40
+
+/*
+ * VMEM_REENTRANT indicates to vmem_walk() that the callback routine may
+ * call back into the arena being walked, so vmem_walk() must drop the
+ * arena lock before each callback.  The caveat is that since the arena
+ * isn't locked, its state can change.  Therefore it is up to the callback
+ * routine to handle cases where the segment isn't of the expected type.
+ * For example, we use this to walk heap_arena when generating a crash dump;
+ * see segkmem_dump() for sample usage.
+ */
+#define	VMEM_REENTRANT	0x80000000
+
+struct vmem;
+
+typedef struct vmem vmem_t;
+typedef void *(vmem_alloc_t)(vmem_t *, size_t, int);
+typedef void (vmem_free_t)(vmem_t *, void *, size_t);
+
+/*
+ * Alternate import style; the requested size is passed in a pointer,
+ * which can be increased by the import function if desired.
+ */
+typedef void *(vmem_ximport_t)(vmem_t *, size_t *, size_t, int);
+
+#ifdef _KERNEL
+extern vmem_t *vmem_init(const char *, void *, size_t, size_t,
+    vmem_alloc_t *, vmem_free_t *);
+extern void    vmem_fini(vmem_t *);
+extern void vmem_update(void *);
+extern int vmem_is_populator(void);
+extern size_t vmem_seg_size;
+#endif
+
+extern vmem_t *vmem_create(const char *, void *, size_t, size_t,
+    vmem_alloc_t *, vmem_free_t *, vmem_t *, size_t, int);
+extern vmem_t *vmem_xcreate(const char *, void *, size_t, size_t,
+    vmem_ximport_t *, vmem_free_t *, vmem_t *, size_t, int);
+extern void vmem_destroy(vmem_t *);
+extern void *vmem_alloc(vmem_t *, size_t, int);
+extern void *vmem_xalloc(vmem_t *, size_t, size_t, size_t, size_t,
+    void *, void *, int);
+extern void vmem_free(vmem_t *, void *, size_t);
+extern void vmem_xfree(vmem_t *, void *, size_t);
+extern void *vmem_add(vmem_t *, void *, size_t, int);
+extern int vmem_contains(vmem_t *, void *, size_t);
+extern void vmem_walk(vmem_t *, int, void (*)(void *, void *, size_t), void *);
+extern size_t vmem_size(vmem_t *, int);
+extern size_t vmem_size_locked(vmem_t *, int);
+extern size_t vmem_size_semi_atomic(vmem_t *, int);
+extern void vmem_qcache_reap(vmem_t *vmp);
+extern int64_t vmem_buckets_size(int);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_VMEM_H */
diff --git a/include/os/macos/spl/sys/vmem_impl.h b/include/os/macos/spl/sys/vmem_impl.h
new file mode 100644
index 0000000000..233a6b33ae
--- /dev/null
+++ b/include/os/macos/spl/sys/vmem_impl.h
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 1999-2001, 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VMEM_IMPL_H
+#define	_SYS_VMEM_IMPL_H
+
+#include <sys/vmem.h>
+#include <sys/kstat.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct vmem_seg vmem_seg_t;
+
+#define	VMEM_STACK_DEPTH	20
+
+struct vmem_seg {
+	/*
+	 * The first four fields must match vmem_freelist_t exactly.
+	 */
+	uintptr_t	vs_start;	/* start of segment (inclusive) */
+	uintptr_t	vs_end;		/* end of segment (exclusive) */
+	vmem_seg_t	*vs_knext;	/* next of kin (alloc, free, span) */
+	vmem_seg_t	*vs_kprev;	/* prev of kin */
+
+	vmem_seg_t	*vs_anext;	/* next in arena */
+	vmem_seg_t	*vs_aprev;	/* prev in arena */
+	uint8_t		vs_type;	/* alloc, free, span */
+	uint8_t		vs_import;	/* non-zero if segment was imported */
+	uint8_t		vs_depth;	/* stack depth if KMF_AUDIT active */
+	/*
+	 * if VM_FREESORT is set on the arena, then
+	 * this field is set at span creation time.
+	 */
+	hrtime_t	vs_span_createtime;
+	/*
+	 * The following fields are present only when KMF_AUDIT is set.
+	 */
+	kthread_t	*vs_thread;
+	hrtime_t	vs_timestamp;
+	pc_t		vs_stack[VMEM_STACK_DEPTH];
+};
+
+typedef struct vmem_freelist {
+	uintptr_t	vs_start;	/* always zero */
+	uintptr_t	vs_end;		/* segment size */
+	vmem_seg_t	*vs_knext;	/* next of kin */
+	vmem_seg_t	*vs_kprev;	/* prev of kin */
+} vmem_freelist_t;
+
+#define	VS_SIZE(vsp)	((vsp)->vs_end - (vsp)->vs_start)
+
+/*
+ * Segment hashing
+ */
+#define	VMEM_HASH_INDEX(a, s, q, m)	\
+	((((a) + ((a) >> (s)) + ((a) >> ((s) << 1))) >> (q)) & (m))
+
+#define	VMEM_HASH(vmp, addr) \
+	(&(vmp)->vm_hash_table[VMEM_HASH_INDEX(addr, \
+	(vmp)->vm_hash_shift, (vmp)->vm_qshift, (vmp)->vm_hash_mask)])
+
+#define	VMEM_QCACHE_SLABSIZE(max) \
+	MAX(1 << highbit(3 * (max)), 64)
+
+#define	VMEM_NAMELEN		30
+#define	VMEM_HASH_INITIAL	16
+#define	VMEM_NQCACHE_MAX	16
+#define	VMEM_FREELISTS		(sizeof (void *) * 8)
+
+typedef struct vmem_kstat {
+	kstat_named_t	vk_mem_inuse;	/* memory in use */
+	kstat_named_t	vk_mem_import;	/* memory imported */
+	kstat_named_t	vk_mem_total;	/* total memory in arena */
+	kstat_named_t	vk_source_id;	/* vmem id of vmem source */
+	kstat_named_t	vk_alloc;	/* number of allocations */
+	kstat_named_t	vk_free;	/* number of frees */
+	kstat_named_t	vk_wait;	/* number of allocations that waited */
+	kstat_named_t	vk_fail;	/* number of allocations that failed */
+	kstat_named_t	vk_lookup;	/* hash lookup count */
+	kstat_named_t	vk_search;	/* freelist search count */
+	kstat_named_t	vk_populate_fail; /* populates that failed */
+	kstat_named_t	vk_contains;	/* vmem_contains() calls */
+	kstat_named_t	vk_contains_search; /* vmem_contains() search cnt */
+	kstat_named_t	vk_parent_alloc; /* called the source allocator */
+	kstat_named_t	vk_parent_free;	/* called the source free function */
+	kstat_named_t   vk_threads_waiting; /* threads in cv_wait in vmem */
+					/* allocator function */
+	kstat_named_t   vk_excess;	/* count of retained excess imports */
+} vmem_kstat_t;
+
+struct vmem {
+	char		vm_name[VMEM_NAMELEN]; /* arena name */
+	kcondvar_t	vm_cv;		/* cv for blocking allocations */
+	kmutex_t	vm_lock;	/* arena lock */
+	uint32_t	vm_id;		/* vmem id */
+	hrtime_t	vm_createtime;
+	uint32_t	vm_mtbf;	/* induced alloc failure rate */
+	int		vm_cflags;	/* arena creation flags */
+	int		vm_qshift;	/* log2(vm_quantum) */
+	size_t		vm_quantum;	/* vmem quantum */
+	size_t		vm_qcache_max;	/* maximum size to front by kmem */
+	size_t		vm_min_import;	/* smallest amount to import */
+	void		*(*vm_source_alloc)(vmem_t *, size_t, int);
+	void		(*vm_source_free)(vmem_t *, void *, size_t);
+	vmem_t		*vm_source;	/* vmem source for imported memory */
+	vmem_t		*vm_next;	/* next in vmem_list */
+	kstat_t		*vm_ksp;	/* kstat */
+	ssize_t		vm_nsegfree;	/* number of free vmem_seg_t's */
+	vmem_seg_t	*vm_segfree;	/* free vmem_seg_t list */
+	vmem_seg_t	**vm_hash_table; /* allocated-segment hash table */
+	size_t		vm_hash_mask;	/* hash_size - 1 */
+	size_t		vm_hash_shift;	/* log2(vm_hash_mask + 1) */
+	ulong_t		vm_freemap;	/* bitmap of non-empty freelists */
+	vmem_seg_t	vm_seg0;	/* anchor segment */
+	vmem_seg_t	vm_rotor;	/* rotor for VM_NEXTFIT allocations */
+	vmem_seg_t	*vm_hash0[VMEM_HASH_INITIAL]; /* initial hash table */
+	void		*vm_qcache[VMEM_NQCACHE_MAX]; /* quantum caches */
+	vmem_freelist_t	vm_freelist[VMEM_FREELISTS + 1]; /* power-of-2 flists */
+	vmem_kstat_t	vm_kstat;	/* kstat data */
+};
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_VMEM_IMPL_H */
diff --git a/include/os/macos/spl/sys/vmsystm.h b/include/os/macos/spl/sys/vmsystm.h
new file mode 100644
index 0000000000..421e26364c
--- /dev/null
+++ b/include/os/macos/spl/sys/vmsystm.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_VMSYSTM_H
+#define	_SPL_VMSYSTM_H
+
+#include <sys/types.h>
+
+#define	xcopyout	copyout
+
+#endif /* SPL_VMSYSTM_H */
diff --git a/include/os/macos/spl/sys/vnode.h b/include/os/macos/spl/sys/vnode.h
new file mode 100644
index 0000000000..37b7e41e75
--- /dev/null
+++ b/include/os/macos/spl/sys/vnode.h
@@ -0,0 +1,258 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_VNODE_H
+#define	_SPL_VNODE_H
+
+#include <sys/fcntl.h>
+
+#include <sys/mount.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/ubc.h>
+
+#include <kern/locks.h>
+
+
+// Be aware that Apple defines "typedef struct vnode *vnode_t" and
+// ZFS uses "typedef struct vnode vnode_t".
+#undef uio_t
+#undef vnode_t
+#include_next <sys/vnode.h>
+#define	vnode_t struct vnode
+#define	uio_t struct uio
+
+
+struct caller_context;
+typedef struct caller_context caller_context_t;
+typedef int vcexcl_t;
+
+enum vcexcl { NONEXCL, EXCL };
+
+#define	B_INVAL		0x01
+#define	B_TRUNC		0x02
+
+#define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
+
+#define	IS_DEVVP(vp)    \
+	(vnode_ischr(vp) || vnode_isblk(vp) || vnode_isfifo(vp))
+
+enum rm		{ RMFILE, RMDIRECTORY };	/* rm or rmdir (remove) */
+enum create	{ CRCREAT, CRMKNOD, CRMKDIR };	/* reason for create */
+
+#define	va_mask			va_active
+#define	va_nodeid		va_fileid
+#define	va_nblocks		va_filerev
+
+/*
+ * vnode attr translations
+ */
+#define	ATTR_TYPE		VNODE_ATTR_va_type
+#define	ATTR_MODE		VNODE_ATTR_va_mode
+#define	ATTR_ACL		VNODE_ATTR_va_acl
+#define	ATTR_UID		VNODE_ATTR_va_uid
+#define	ATTR_GID		VNODE_ATTR_va_gid
+#define	ATTR_ATIME		VNODE_ATTR_va_access_time
+#define	ATTR_MTIME		VNODE_ATTR_va_modify_time
+#define	ATTR_CTIME		VNODE_ATTR_va_change_time
+#define	ATTR_CRTIME		VNODE_ATTR_va_create_time
+#define	ATTR_SIZE		VNODE_ATTR_va_data_size
+#define	ATTR_NOSET		0
+/*
+ * OSX uses separate vnop getxattr and setxattr to deal with XATTRs, so
+ * we never get vop&XVATTR set from VFS. All internal checks for it in
+ * ZFS is not required.
+ */
+#define	ATTR_XVATTR	0
+#define	AT_XVATTR ATTR_XVATTR
+
+#define	va_size			va_data_size
+#define	va_atime		va_access_time
+#define	va_mtime		va_modify_time
+#define	va_ctime		va_change_time
+#define	va_crtime		va_create_time
+#define	va_bytes		va_data_size
+
+typedef struct vnode_attr vattr;
+typedef struct vnode_attr vattr_t;
+
+/* vsa_mask values */
+#define	VSA_ACL			0x0001
+#define	VSA_ACLCNT		0x0002
+#define	VSA_DFACL		0x0004
+#define	VSA_DFACLCNT		0x0008
+#define	VSA_ACE			0x0010
+#define	VSA_ACECNT		0x0020
+#define	VSA_ACE_ALLTYPES	0x0040
+#define	VSA_ACE_ACLFLAGS	0x0080  /* get/set ACE ACL flags */
+
+
+extern struct vnode *vn_alloc(int flag);
+
+extern int vn_open(char *pnamep, enum uio_seg seg, int filemode,
+    int createmode, struct vnode **vpp, enum create crwhy, mode_t umask);
+extern int vn_openat(char *pnamep, enum uio_seg seg, int filemode,
+    int createmode, struct vnode **vpp, enum create crwhy,
+    mode_t umask, struct vnode *startvp);
+
+#define	vn_renamepath(tdvp, svp, tnm, lentnm)	do { } while (0)
+#define	vn_free(vp)				do { } while (0)
+#define	vn_pages_remove(vp, fl, op)		do { } while (0)
+
+/* XNU is a vn_rdwr, so we work around it to match arguments */
+/* This should be deprecated, if not now, soon. */
+extern int  zfs_vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base,
+    ssize_t len, offset_t offset, enum uio_seg seg, int ioflag,
+    rlim64_t ulimit, cred_t *cr, ssize_t *residp);
+
+#define	vn_rdwr(rw, vp, b, l, o, s, flg, li, cr, resid)     \
+    zfs_vn_rdwr((rw), (vp), (b), (l), (o), (s), (flg), (li), (cr), (resid))
+
+/* Other vn_rdwr for zfs_file_t ops */
+struct spl_fileproc;
+extern int spl_vn_rdwr(enum uio_rw rw, struct spl_fileproc *, caddr_t base,
+    ssize_t len, offset_t offset, enum uio_seg seg, int ioflag,
+	rlim64_t ulimit, cred_t *cr, ssize_t *residp);
+
+extern int vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag);
+extern int vn_rename(char *from, char *to, enum uio_seg seg);
+
+#define	LK_RETRY  0
+#define	LK_SHARED 0
+#define	VN_UNLOCK(vp)
+static inline int
+vn_lock(struct vnode *vp, int fl)
+{
+	return (0);
+}
+
+/*
+ * XNU reserves fileID 1-15, so we remap them high.
+ * 2 is root-of-the-mount.
+ * If ID is same as root, return 2. Otherwise, if it is 0-15, return
+ * adjusted, otherwise, return as-is.
+ * See hfs_format.h: kHFSRootFolderID, kHFSExtentsFileID, ...
+ */
+#define	INO_ROOT 		2ULL
+#define	INO_RESERVED		16ULL	/* [0-15] reserved. */
+#define	INO_ISRESERVED(ID)	((ID) < (INO_RESERVED))
+/*				0xFFFFFFFFFFFFFFF0 */
+#define	INO_MAP			((uint64_t)-INO_RESERVED) /* -16, -15, .., -1 */
+
+#define	INO_ZFSTOXNU(ID, ROOT)	\
+	((ID) == (ROOT)?INO_ROOT:(INO_ISRESERVED(ID)?INO_MAP+(ID):(ID)))
+
+/*
+ * This macro relies on *unsigned*.
+ * If asking for 2, return rootID. If in special range, adjust to
+ * normal, otherwise, return as-is.
+ */
+#define	INO_XNUTOZFS(ID, ROOT)	\
+	((ID) == INO_ROOT)?(ROOT): \
+	(INO_ISRESERVED((ID)-INO_MAP))?((ID)-INO_MAP):(ID)
+
+#define	VN_HOLD(vp)		vnode_getwithref(vp)
+#define	VN_RELE(vp)		vnode_put(vp)
+
+void spl_rele_async(void *arg);
+void vn_rele_async(struct vnode *vp, void *taskq);
+
+extern int vnode_iocount(struct vnode *);
+
+#define	VN_RELE_ASYNC(vp, tq) vn_rele_async((vp), (tq))
+
+#define	vn_exists(vp)
+#define	vn_is_readonly(vp)  vnode_vfsisrdonly(vp)
+
+#define	vnode_pager_setsize(vp, sz)  ubc_setsize((vp), (sz))
+
+#define	VATTR_NULL(v) do { } while (0)
+
+extern int VOP_CLOSE(struct vnode *vp, int flag, int count,
+    offset_t off, void *cr, void *);
+extern int VOP_FSYNC(struct vnode *vp, int flags, void* unused, void *);
+extern int VOP_SPACE(struct vnode *vp, int cmd, struct flock *fl,
+    int flags, offset_t off, cred_t *cr, void *ctx);
+
+extern int VOP_GETATTR(struct vnode *vp, vattr_t *vap, int flags,
+    void *x3, void *x4);
+
+#define	VOP_UNLOCK(vp, fl)	do { } while (0)
+
+void vfs_mountedfrom(struct mount *vfsp, char *osname);
+
+#define	build_path(A, B, C, D, E, F) spl_build_path(A, B, C, D, E, F)
+extern int spl_build_path(struct vnode *vp, char *buff, int buflen,
+    int *outlen, int flags, vfs_context_t ctx);
+
+extern struct vnode *rootdir;
+
+static inline int chklock(struct vnode *vp, int iomode,
+	unsigned long long offset, ssize_t len, int fmode, void *ct)
+{
+	return (0);
+}
+
+#define	vn_ismntpt(vp)   (vnode_mountedhere(vp) != NULL)
+
+extern errno_t VOP_LOOKUP   (struct vnode *, struct vnode **,
+    struct componentname *, vfs_context_t);
+extern errno_t VOP_MKDIR    (struct vnode *, struct vnode **,
+    struct componentname *, struct vnode_attr *,
+    vfs_context_t);
+extern errno_t VOP_REMOVE   (struct vnode *, struct vnode *,
+    struct componentname *, int, vfs_context_t);
+extern errno_t VOP_SYMLINK  (struct vnode *, struct vnode **,
+    struct componentname *, struct vnode_attr *,
+    char *, vfs_context_t);
+
+void spl_vnode_fini(void);
+int  spl_vnode_init(void);
+
+
+extern int spl_vfs_root(mount_t mount, struct vnode **vp);
+#define	VFS_ROOT(V, L, VP) spl_vfs_root((V), (VP))
+
+extern void cache_purgevfs(mount_t mp);
+
+vfs_context_t vfs_context_kernel(void);
+vfs_context_t spl_vfs_context_kernel(void);
+extern int spl_vnode_notify(struct vnode *vp, uint32_t type,
+    struct vnode_attr *vap);
+extern int spl_vfs_get_notify_attributes(struct vnode_attr *vap);
+extern void spl_hijack_mountroot(void *func);
+extern void spl_setrootvnode(struct vnode *vp);
+
+struct vnode *getrootdir(void);
+void spl_vfs_start(void);
+
+#endif /* SPL_VNODE_H */
diff --git a/include/os/macos/spl/sys/zmod.h b/include/os/macos/spl/sys/zmod.h
new file mode 100644
index 0000000000..6965c91f3d
--- /dev/null
+++ b/include/os/macos/spl/sys/zmod.h
@@ -0,0 +1,122 @@
+/*
+ *  zlib.h -- interface of the 'zlib' general purpose compression library
+ *  version 1.2.5, April 19th, 2010
+ *
+ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ *
+ *  Jean-loup Gailly
+ *  Mark Adler
+ */
+
+#ifndef _SPL_ZMOD_H
+#define	_SPL_ZMOD_H
+
+
+#include <sys/types.h>
+#include <libkern/zlib.h>
+#include <sys/kmem.h>
+
+struct _zmemheader {
+	uint64_t	length;
+	char		data[0];
+};
+
+static inline void *
+zfs_zalloc(void* opaque, uInt items, uInt size)
+{
+	struct _zmemheader *hdr;
+	size_t alloc_size = (items * size) + sizeof (uint64_t);
+	hdr = kmem_zalloc(alloc_size, KM_SLEEP);
+	hdr->length = alloc_size;
+	return (&hdr->data);
+}
+
+static inline void
+zfs_zfree(void *opaque, void *addr)
+{
+	struct _zmemheader *hdr;
+	hdr = addr;
+	hdr--;
+	kmem_free(hdr, hdr->length);
+}
+
+/*
+ * Uncompress the buffer 'src' into the buffer 'dst'.  The caller must store
+ * the expected decompressed data size externally so it can be passed in.
+ * The resulting decompressed size is then returned through dstlen.  This
+ * function return Z_OK on success, or another error code on failure.
+ */
+static inline int
+    z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
+{
+	z_stream zs;
+	int err;
+
+	bzero(&zs, sizeof (zs));
+	zs.next_in = (uchar_t *)src;
+	zs.avail_in = srclen;
+	zs.next_out = dst;
+	zs.avail_out = *dstlen;
+	zs.zalloc = zfs_zalloc;
+	zs.zfree = zfs_zfree;
+	if ((err = inflateInit(&zs)) != Z_OK)
+		return (err);
+	if ((err = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
+		(void) inflateEnd(&zs);
+		return (err == Z_OK ? Z_BUF_ERROR : err);
+	}
+	*dstlen = zs.total_out;
+	return (inflateEnd(&zs));
+}
+
+static inline int
+z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
+    int level)
+{
+	z_stream zs;
+	int err;
+	bzero(&zs, sizeof (zs));
+	zs.next_in = (uchar_t *)src;
+	zs.avail_in = srclen;
+	zs.next_out = dst;
+	zs.avail_out = *dstlen;
+	zs.zalloc = zfs_zalloc;
+	zs.zfree = zfs_zfree;
+	if ((err = deflateInit(&zs, level)) != Z_OK)
+		return (err);
+	if ((err = deflate(&zs, Z_FINISH)) != Z_STREAM_END) {
+		(void) deflateEnd(&zs);
+		return (err == Z_OK ? Z_BUF_ERROR : err);
+	}
+	*dstlen = zs.total_out;
+	return (deflateEnd(&zs));
+}
+
+static inline int
+z_compress(void *dst, size_t *dstlen, const void *src, size_t srclen)
+{
+	return (z_compress_level(dst, dstlen, src, srclen,
+	    Z_DEFAULT_COMPRESSION));
+}
+
+
+int spl_zlib_init(void);
+void spl_zlib_fini(void);
+
+#endif /* SPL_ZMOD_H */
diff --git a/include/os/macos/spl/sys/zone.h b/include/os/macos/spl/sys/zone.h
new file mode 100644
index 0000000000..fdd4c2754f
--- /dev/null
+++ b/include/os/macos/spl/sys/zone.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#ifndef _SPL_ZONE_H
+#define	_SPL_ZONE_H
+
+#include <sys/byteorder.h>
+
+#define	GLOBAL_ZONEID			0
+
+#define	zone_dataset_visible(x, y)	(1)
+#define	INGLOBALZONE(z)			(1)
+#define	crgetzoneid(x)			(GLOBAL_ZONEID)
+
+#endif /* SPL_ZONE_H */
diff --git a/include/os/macos/zfs/Makefile.am b/include/os/macos/zfs/Makefile.am
new file mode 100644
index 0000000000..081839c48c
--- /dev/null
+++ b/include/os/macos/zfs/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = sys
diff --git a/include/os/macos/zfs/sys/Makefile.am b/include/os/macos/zfs/sys/Makefile.am
new file mode 100644
index 0000000000..9779de08e0
--- /dev/null
+++ b/include/os/macos/zfs/sys/Makefile.am
@@ -0,0 +1,10 @@
+KERNEL_H = \
+	$(top_srcdir)/include/os/macos/zfs/sys/kstat_osx.h \
+	$(top_srcdir)/include/os/macos/spl/sys/ldi_buf.h \
+	$(top_srcdir)/include/os/macos/spl/sys/ldi_impl_osx.h \
+	$(top_srcdir)/include/os/macos/spl/sys/ldi_osx.h \
+	$(top_srcdir)/include/os/macos/spl/sys/trace_zfs.h \
+	$(top_srcdir)/include/os/macos/spl/sys/vdev_disk_os.h \
+	$(top_srcdir)/include/os/macos/spl/sys/zfs_ioctl_compat.h \
+	$(top_srcdir)/include/os/macos/spl/sys/zfs_vfsops.h \
+	$(top_srcdir)/include/os/macos/spl/sys/zfs_znode_impl.h
diff --git a/include/os/macos/zfs/sys/ZFSDataset.h b/include/os/macos/zfs/sys/ZFSDataset.h
new file mode 100644
index 0000000000..06fa8fdfb0
--- /dev/null
+++ b/include/os/macos/zfs/sys/ZFSDataset.h
@@ -0,0 +1,141 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#ifndef ZFSDATASET_H_INCLUDED
+#define	ZFSDATASET_H_INCLUDED
+
+#ifdef __cplusplus
+
+#include <IOKit/storage/IOMedia.h>
+#include <AvailabilityMacros.h>
+
+#ifdef super
+#undef super
+#endif
+#define	super IOMedia
+
+// #define	kZFSContentHint		"6A898CC3-1DD2-11B2-99A6-080020736631"
+#define	kZFSContentHint		"ZFS_Dataset"
+
+#define	kZFSIOMediaPrefix	"ZFS "
+#define	kZFSIOMediaSuffix	" Media"
+#define	kZFSDatasetNameKey	"ZFS Dataset"
+#define	kZFSDatasetClassKey	"ZFSDataset"
+
+class ZFSDataset : public IOMedia
+{
+	OSDeclareDefaultStructors(ZFSDataset)
+public:
+#if 0
+	/* XXX Only for debug tracing */
+	virtual bool open(IOService *client,
+	    IOOptionBits options, IOStorageAccess access = 0);
+	virtual bool isOpen(const IOService *forClient = 0) const;
+	virtual void close(IOService *client,
+	    IOOptionBits options);
+
+	virtual bool handleOpen(IOService *client,
+	    IOOptionBits options, void *access);
+	virtual bool handleIsOpen(const IOService *client) const;
+	virtual void handleClose(IOService *client,
+	    IOOptionBits options);
+
+	virtual bool attach(IOService *provider);
+	virtual void detach(IOService *provider);
+
+	virtual bool start(IOService *provider);
+	virtual void stop(IOService *provider);
+#endif
+
+	virtual bool init(UInt64 base, UInt64 size,
+	    UInt64 preferredBlockSize,
+	    IOMediaAttributeMask attributes,
+	    bool isWhole, bool isWritable,
+	    const char *contentHint = 0,
+	    OSDictionary *properties = 0);
+	virtual void free();
+
+	static ZFSDataset * withDatasetNameAndSize(const char *name,
+	    uint64_t size);
+
+	virtual void read(IOService *client,
+	    UInt64 byteStart, IOMemoryDescriptor *buffer,
+	    IOStorageAttributes *attributes,
+	    IOStorageCompletion *completion);
+	virtual void write(IOService *client,
+	    UInt64 byteStart, IOMemoryDescriptor *buffer,
+	    IOStorageAttributes *attributes,
+	    IOStorageCompletion *completion);
+
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	virtual IOReturn synchronize(IOService *client,
+	    UInt64 byteStart, UInt64 byteCount,
+	    IOStorageSynchronizeOptions options = 0);
+#else
+	virtual IOReturn synchronizeCache(IOService *client);
+#endif
+
+	virtual IOReturn unmap(IOService *client,
+	    IOStorageExtent *extents, UInt32 extentsCount,
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	    IOStorageUnmapOptions	options = 0);
+#else
+	    UInt32	options = 0);
+#endif
+
+	virtual bool lockPhysicalExtents(IOService *client);
+	virtual IOStorage *copyPhysicalExtent(IOService *client,
+	    UInt64 *byteStart, UInt64 *byteCount);
+	virtual void unlockPhysicalExtents(IOService *client);
+
+#if defined(MAC_OS_X_VERSION_10_10) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
+	virtual IOReturn setPriority(IOService *client,
+	    IOStorageExtent *extents, UInt32 extentsCount,
+	    IOStoragePriority priority);
+#endif
+
+	virtual UInt64 getPreferredBlockSize() const;
+	virtual UInt64 getSize() const;
+	virtual UInt64 getBase() const;
+
+	virtual bool isEjectable() const;
+	virtual bool isFormatted() const;
+	virtual bool isWhole() const;
+	virtual bool isWritable() const;
+
+	virtual const char *getContent() const;
+	virtual const char *getContentHint() const;
+	virtual IOMediaAttributeMask getAttributes() const;
+
+protected:
+private:
+	bool setDatasetName(const char *);
+};
+
+#endif /* __cplusplus */
+
+#endif /* ZFSDATASET_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/ZFSDatasetProxy.h b/include/os/macos/zfs/sys/ZFSDatasetProxy.h
new file mode 100644
index 0000000000..e220cdcf9a
--- /dev/null
+++ b/include/os/macos/zfs/sys/ZFSDatasetProxy.h
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#ifndef ZFSDATASETPROXY_H_INCLUDED
+#define	ZFSDATASETPROXY_H_INCLUDED
+
+#include <IOKit/storage/IOBlockStorageDevice.h>
+
+class ZFSDatasetProxy : public IOBlockStorageDevice
+{
+	OSDeclareDefaultStructors(ZFSDatasetProxy);
+public:
+
+	virtual void free(void);
+	virtual bool init(OSDictionary *properties);
+	virtual bool start(IOService *provider);
+
+	/* IOBlockStorageDevice */
+	virtual IOReturn doSynchronizeCache(void);
+	virtual IOReturn doAsyncReadWrite(IOMemoryDescriptor *,
+	    UInt64, UInt64, IOStorageAttributes *,
+	    IOStorageCompletion *);
+	virtual UInt32 doGetFormatCapacities(UInt64 *,
+	    UInt32) const;
+	virtual IOReturn doFormatMedia(UInt64 byteCapacity);
+	virtual IOReturn doEjectMedia();
+	virtual char *getVendorString();
+	virtual char *getProductString();
+	virtual char *getRevisionString();
+	virtual char *getAdditionalDeviceInfoString();
+	virtual IOReturn reportWriteProtection(bool *);
+	virtual IOReturn reportRemovability(bool *);
+	virtual IOReturn reportMediaState(bool *, bool *);
+	virtual IOReturn reportBlockSize(UInt64 *);
+	virtual IOReturn reportEjectability(bool *);
+	virtual IOReturn reportMaxValidBlock(UInt64 *);
+
+	virtual IOReturn setWriteCacheState(bool enabled);
+	virtual IOReturn getWriteCacheState(bool *enabled);
+#if 0
+	virtual void read(IOService *client, UInt64 byteStart,
+	    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+	    IOStorageCompletion *completion);
+	virtual void write(IOService *client, UInt64 byteStart,
+	    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+	    IOStorageCompletion *completion);
+#endif
+
+protected:
+private:
+	/* These are declared class static to share across instances */
+	const char *vendorString;
+	const char *revisionString;
+	const char *infoString;
+	/* These are per-instance */
+	const char *productString;
+	uint64_t _pool_bcount;
+	bool isReadOnly;
+};
+
+#endif /* ZFSDATASETPROXY_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/ZFSDatasetScheme.h b/include/os/macos/zfs/sys/ZFSDatasetScheme.h
new file mode 100644
index 0000000000..eaa8bb368d
--- /dev/null
+++ b/include/os/macos/zfs/sys/ZFSDatasetScheme.h
@@ -0,0 +1,126 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#ifndef ZFSDATASETSCHEME_H_INCLUDED
+#define	ZFSDATASETSCHEME_H_INCLUDED
+
+#define	kZFSDatasetSchemeClass	"ZFSDatasetScheme"
+
+#include <IOKit/storage/IOPartitionScheme.h>
+#include <sys/ZFSDataset.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+int zfs_osx_proxy_get_osname(const char *bsdname,
+    char *osname, int len);
+int zfs_osx_proxy_get_bsdname(const char *osname,
+    char *bsdname, int len);
+
+
+void zfs_osx_proxy_remove(const char *osname);
+int zfs_osx_proxy_create(const char *osname);
+
+#ifdef __cplusplus
+} /* extern "C" */
+
+/* Not C external */
+ZFSDataset * zfs_osx_proxy_get(const char *osname);
+
+class ZFSDatasetScheme : public IOPartitionScheme
+{
+	OSDeclareDefaultStructors(ZFSDatasetScheme);
+public:
+
+	virtual void free(void);
+	virtual bool init(OSDictionary *properties);
+	virtual bool start(IOService *provider);
+	virtual IOService *probe(IOService *provider, SInt32 *score);
+
+	bool addDataset(const char *osname);
+	bool removeDataset(const char *osname, bool force);
+
+	/* Compatibility shims */
+	virtual void read(IOService *client,
+	    UInt64		byteStart,
+	    IOMemoryDescriptor	*buffer,
+	    IOStorageAttributes	*attributes,
+	    IOStorageCompletion	*completion);
+
+	virtual void write(IOService *client,
+	    UInt64		byteStart,
+	    IOMemoryDescriptor	*buffer,
+	    IOStorageAttributes	*attributes,
+	    IOStorageCompletion	*completion);
+
+#if defined(MAC_OS_X_VERSION_10_11) && \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	virtual IOReturn synchronize(IOService *client,
+	    UInt64			byteStart,
+	    UInt64			byteCount,
+	    IOStorageSynchronizeOptions	options = 0);
+#else
+	virtual IOReturn synchronizeCache(IOService *client);
+#endif
+
+	virtual IOReturn unmap(IOService *client,
+	    IOStorageExtent		*extents,
+	    UInt32			extentsCount,
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	    IOStorageUnmapOptions	options = 0);
+#else
+	    UInt32	options = 0);
+#endif
+
+	virtual bool lockPhysicalExtents(IOService *client);
+
+	virtual IOStorage *copyPhysicalExtent(IOService *client,
+	    UInt64 *    byteStart,
+	    UInt64 *    byteCount);
+
+	virtual void unlockPhysicalExtents(IOService *client);
+
+#if defined(MAC_OS_X_VERSION_10_10) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
+	virtual IOReturn setPriority(IOService *client,
+	    IOStorageExtent	*extents,
+	    UInt32		extentsCount,
+	    IOStoragePriority	priority);
+#endif
+
+protected:
+private:
+	OSSet		*_datasets;
+	OSOrderedSet	*_holes;
+	uint64_t	_max_id;
+
+	uint32_t getNextPartitionID();
+	void returnPartitionID(uint32_t part_id);
+};
+
+#endif /* __cplusplus */
+#endif /* ZFSDATASETSCHEME_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/ZFSPool.h b/include/os/macos/zfs/sys/ZFSPool.h
new file mode 100644
index 0000000000..56a190a0c6
--- /dev/null
+++ b/include/os/macos/zfs/sys/ZFSPool.h
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#ifndef	ZFSPOOL_H_INCLUDED
+#define	ZFSPOOL_H_INCLUDED
+
+#ifdef __cplusplus
+#include <IOKit/IOService.h>
+
+#pragma mark - ZFSPool
+
+#define	kZFSPoolNameKey		"ZFS Pool Name"
+#define	kZFSPoolSizeKey		"ZFS Pool Size"
+#define	kZFSPoolGUIDKey		"ZFS Pool GUID"
+#define	kZFSPoolReadOnlyKey	"ZFS Pool Read-Only"
+
+typedef struct spa spa_t;
+
+class ZFSPool : public IOService {
+	OSDeclareDefaultStructors(ZFSPool);
+
+protected:
+#if 0
+	/* XXX Only for debug tracing */
+	virtual bool open(IOService *client,
+	    IOOptionBits options, void *arg = 0);
+	virtual bool isOpen(const IOService *forClient = 0) const;
+	virtual void close(IOService *client,
+	    IOOptionBits options);
+#endif
+
+	bool setPoolName(const char *name);
+
+	virtual bool handleOpen(IOService *client,
+	    IOOptionBits options, void *arg);
+	virtual bool handleIsOpen(const IOService *client) const;
+	virtual void handleClose(IOService *client,
+	    IOOptionBits options);
+
+	virtual bool init(OSDictionary *properties, spa_t *spa);
+	virtual void free();
+
+#if 0
+	/* IOBlockStorageDevice */
+	virtual IOReturn doSynchronizeCache(void);
+	virtual IOReturn doAsyncReadWrite(IOMemoryDescriptor *,
+	    UInt64, UInt64, IOStorageAttributes *,
+	    IOStorageCompletion *);
+	virtual UInt32 doGetFormatCapacities(UInt64 *,
+	    UInt32) const;
+	virtual IOReturn doFormatMedia(UInt64 byteCapacity);
+	virtual IOReturn doEjectMedia();
+	virtual char *getVendorString();
+	virtual char *getProductString();
+	virtual char *getRevisionString();
+	virtual char *getAdditionalDeviceInfoString();
+	virtual IOReturn reportWriteProtection(bool *);
+	virtual IOReturn reportRemovability(bool *);
+	virtual IOReturn reportMediaState(bool *, bool *);
+	virtual IOReturn reportBlockSize(UInt64 *);
+	virtual IOReturn reportEjectability(bool *);
+	virtual IOReturn reportMaxValidBlock(UInt64 *);
+
+public:
+	virtual void read(IOService *client, UInt64 byteStart,
+	    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+	    IOStorageCompletion *completion);
+	virtual void write(IOService *client, UInt64 byteStart,
+	    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+	    IOStorageCompletion *completion);
+#endif
+public:
+	static ZFSPool * withProviderAndPool(IOService *, spa_t *);
+
+private:
+	OSSet *_openClients;
+	spa_t *_spa;
+
+#if 0
+	/* These are declared class static to share across instances */
+	static const char *vendorString;
+	static const char *revisionString;
+	static const char *infoString;
+	/* These are per-instance */
+	const char *productString;
+	bool isReadOnly;
+#endif
+};
+
+/* C++ wrapper, C uses opaque pointer reference */
+typedef struct spa_iokit {
+	ZFSPool *proxy;
+} spa_iokit_t;
+
+extern "C" {
+#endif /* __cplusplus */
+
+/* C functions */
+void spa_iokit_pool_proxy_destroy(spa_t *spa);
+int spa_iokit_pool_proxy_create(spa_t *spa);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* ZFSPOOL_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/finderinfo.h b/include/os/macos/zfs/sys/finderinfo.h
new file mode 100644
index 0000000000..ee3b48017b
--- /dev/null
+++ b/include/os/macos/zfs/sys/finderinfo.h
@@ -0,0 +1,36 @@
+#ifndef FINDERINFO_H
+#define	FINDERINFO_H
+
+
+struct FndrExtendedDirInfo {
+	u_int32_t document_id;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved3;
+	u_int32_t write_gen_counter;
+} __attribute__((aligned(2), packed));
+
+struct FndrExtendedFileInfo {
+	u_int32_t document_id;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved2;
+	u_int32_t write_gen_counter;
+} __attribute__((aligned(2), packed));
+
+/* Finder information */
+struct FndrFileInfo {
+	u_int32_t fdType;
+	u_int32_t fdCreator;
+	u_int16_t fdFlags;
+	struct {
+		int16_t v;
+		int16_t h;
+	} fdLocation;
+	int16_t opaque;
+} __attribute__((aligned(2), packed));
+typedef struct FndrFileInfo FndrFileInfo;
+
+
+
+#endif
diff --git a/include/os/macos/zfs/sys/hfs_internal.h b/include/os/macos/zfs/sys/hfs_internal.h
new file mode 100644
index 0000000000..db8c76f7a3
--- /dev/null
+++ b/include/os/macos/zfs/sys/hfs_internal.h
@@ -0,0 +1,183 @@
+
+#ifndef HFS_INTERNAL_H
+#define	HFS_INTERNAL_H
+
+// BGH - Definitions of HFS vnops that we will need to emulate
+// including supporting structures.
+
+struct hfs_journal_info {
+	off_t	jstart;
+	off_t	jsize;
+};
+
+struct user32_access_t {
+	uid_t uid;
+	short flags;
+	short num_groups;
+	int num_files;
+	user32_addr_t file_ids;
+	user32_addr_t groups;
+	user32_addr_t access;
+};
+
+struct user64_access_t {
+	uid_t uid;
+	short flags;
+	short num_groups;
+	int num_files;
+	user64_addr_t file_ids;
+	user64_addr_t groups;
+	user64_addr_t access;
+};
+
+struct user32_ext_access_t {
+	uint32_t flags;
+	uint32_t num_files;
+	uint32_t map_size;
+	user32_addr_t file_ids;
+	user32_addr_t bitmap;
+	user32_addr_t access;
+	uint32_t num_parents;
+	user32_addr_t parents;
+};
+
+struct user64_ext_access_t {
+	uint32_t flags;
+	uint32_t num_files;
+	uint32_t map_size;
+	user64_addr_t file_ids;
+	user64_addr_t bitmap;
+	user64_addr_t access;
+	uint32_t num_parents;
+	user64_addr_t parents;
+};
+
+/*
+ * HFS specific fcntl()'s
+ */
+#define	HFS_BULKACCESS	(FCNTL_FS_SPECIFIC_BASE + 0x00001)
+#define	HFS_GET_MOUNT_TIME  (FCNTL_FS_SPECIFIC_BASE + 0x00002)
+#define	HFS_GET_LAST_MTIME  (FCNTL_FS_SPECIFIC_BASE + 0x00003)
+#define	HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
+#define	HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
+
+/* HFS FS CONTROL COMMANDS */
+
+#define	HFSIOC_RESIZE_PROGRESS  _IOR('h', 1, u_int32_t)
+#define	HFS_RESIZE_PROGRESS  IOCBASECMD(HFSIOC_RESIZE_PROGRESS)
+
+#define	HFSIOC_RESIZE_VOLUME  _IOW('h', 2, u_int64_t)
+#define	HFS_RESIZE_VOLUME  IOCBASECMD(HFSIOC_RESIZE_VOLUME)
+
+#define	HFSIOC_CHANGE_NEXT_ALLOCATION  _IOWR('h', 3, u_int32_t)
+#define	HFS_CHANGE_NEXT_ALLOCATION  IOCBASECMD(HFSIOC_CHANGE_NEXT_ALLOCATION)
+/*
+ * Magic value for next allocation to use with fcntl to set next allocation
+ * to zero and never update it again on new block allocation.
+ */
+#define	HFS_NO_UPDATE_NEXT_ALLOCATION 	0xffffFFFF
+
+#define	HFSIOC_GETCREATETIME  _IOR('h', 4, time_t)
+#define	HFS_GETCREATETIME  IOCBASECMD(HFSIOC_GETCREATETIME)
+
+#define	HFSIOC_SETBACKINGSTOREINFO  _IOW('h', 7, struct hfs_backingstoreinfo)
+#define	HFS_SETBACKINGSTOREINFO  IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
+
+#define	HFSIOC_CLRBACKINGSTOREINFO  _IO('h', 8)
+#define	HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
+
+#define	HFSIOC_BULKACCESS _IOW('h', 9, struct user32_access_t)
+#define	HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
+
+#define	HFSIOC_SETACLSTATE  _IOW('h', 10, int32_t)
+#define	HFS_SETACLSTATE  IOCBASECMD(HFSIOC_SETACLSTATE)
+
+#define	HFSIOC_PREV_LINK  _IOWR('h', 11, u_int32_t)
+#define	HFS_PREV_LINK  IOCBASECMD(HFSIOC_PREV_LINK)
+
+#define	HFSIOC_NEXT_LINK  _IOWR('h', 12, u_int32_t)
+#define	HFS_NEXT_LINK  IOCBASECMD(HFSIOC_NEXT_LINK)
+
+#define	HFSIOC_GETPATH  _IOWR('h', 13, pathname_t)
+#define	HFS_GETPATH  IOCBASECMD(HFSIOC_GETPATH)
+#define	HFS_GETPATH_VOLUME_RELATIVE	0x1
+
+/* This define is deemed secret by Apple */
+#define	BUILDPATH_VOLUME_RELATIVE 0x8
+
+/* Enable/disable extent-based extended attributes */
+#define	HFSIOC_SET_XATTREXTENTS_STATE  _IOW('h', 14, u_int32_t)
+#define	HFS_SET_XATTREXTENTS_STATE  IOCBASECMD(HFSIOC_SET_XATTREXTENTS_STATE)
+
+#define	HFSIOC_EXT_BULKACCESS _IOW('h', 15, struct user32_ext_access_t)
+#define	HFS_EXT_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_EXT_BULKACCESS)
+
+#define	HFSIOC_MARK_BOOT_CORRUPT _IO('h', 16)
+#define	HFS_MARK_BOOT_CORRUPT IOCBASECMD(HFSIOC_MARK_BOOT_CORRUPT)
+
+#define	HFSIOC_GET_JOURNAL_INFO	_IOR('h', 17, struct hfs_journal_info)
+#define	HFS_FSCTL_GET_JOURNAL_INFO	IOCBASECMD(HFSIOC_GET_JOURNAL_INFO)
+
+#define	HFSIOC_SET_VERY_LOW_DISK _IOW('h', 20, u_int32_t)
+#define	HFS_FSCTL_SET_VERY_LOW_DISK IOCBASECMD(HFSIOC_SET_VERY_LOW_DISK)
+
+#define	HFSIOC_SET_LOW_DISK _IOW('h', 21, u_int32_t)
+#define	HFS_FSCTL_SET_LOW_DISK IOCBASECMD(HFSIOC_SET_LOW_DISK)
+
+#define	HFSIOC_SET_DESIRED_DISK _IOW('h', 22, u_int32_t)
+#define	HFS_FSCTL_SET_DESIRED_DISK IOCBASECMD(HFSIOC_SET_DESIRED_DISK)
+
+#define	HFSIOC_SET_ALWAYS_ZEROFILL _IOW('h', 23, int32_t)
+#define	HFS_SET_ALWAYS_ZEROFILL IOCBASECMD(HFSIOC_SET_ALWAYS_ZEROFILL)
+
+#define	HFSIOC_VOLUME_STATUS  _IOR('h', 24, u_int32_t)
+#define	HFS_VOLUME_STATUS  IOCBASECMD(HFSIOC_VOLUME_STATUS)
+
+/* Disable metadata zone for given volume */
+#define	HFSIOC_DISABLE_METAZONE	_IO('h', 25)
+#define	HFS_DISABLE_METAZONE	IOCBASECMD(HFSIOC_DISABLE_METAZONE)
+
+/* Change the next CNID value */
+#define	HFSIOC_CHANGE_NEXTCNID	_IOWR('h', 26, u_int32_t)
+#define	HFS_CHANGE_NEXTCNID		IOCBASECMD(HFSIOC_CHANGE_NEXTCNID)
+
+/* Get the low disk space values */
+#define	HFSIOC_GET_VERY_LOW_DISK	_IOR('h', 27, u_int32_t)
+#define	HFS_FSCTL_GET_VERY_LOW_DISK	IOCBASECMD(HFSIOC_GET_VERY_LOW_DISK)
+
+#define	HFSIOC_GET_LOW_DISK	_IOR('h', 28, u_int32_t)
+#define	HFS_FSCTL_GET_LOW_DISK	IOCBASECMD(HFSIOC_GET_LOW_DISK)
+
+#define	HFSIOC_GET_DESIRED_DISK	_IOR('h', 29, u_int32_t)
+#define	HFS_FSCTL_GET_DESIRED_DISK	IOCBASECMD(HFSIOC_GET_DESIRED_DISK)
+
+/*
+ * revisiond only uses this when something transforms in a way
+ * the kernel can't track such as "foo.rtf" -> "foo.rtfd"
+ */
+#define	HFSIOC_TRANSFER_DOCUMENT_ID  _IOW('h', 32, u_int32_t)
+#define	HFS_TRANSFER_DOCUMENT_ID  IOCBASECMD(HFSIOC_TRANSFER_DOCUMENT_ID)
+
+
+/* fcntl.h */
+#define	F_MAKECOMPRESSED 80
+
+/* Get file system information for the given volume */
+// #define	HFSIOC_GET_FSINFO _IOWR('h', 45, hfs_fsinfo)
+// #define	HFS_GET_FSINFO IOCBASECMD(HFSIOC_GET_FSINFO)
+
+/* Re-pin hotfile data; argument controls what state gets repinned */
+#define	HFSIOC_REPIN_HOTFILE_STATE _IOWR('h', 46, u_int32_t)
+#define	HFS_REPIN_HOTFILE_STATE    IOCBASECMD(HFSIOC_REPIN_HOTFILE_STATE)
+
+/* Mark a directory or file as worth caching on any underlying "fast" device */
+#define	HFSIOC_SET_HOTFILE_STATE _IOWR('h', 47, u_int32_t)
+#define	HFS_SET_HOTFILE_STATE    IOCBASECMD(HFSIOC_SET_HOTFILE_STATE)
+
+#define	APFSIOC_SET_NEAR_LOW_DISK _IOW('J', 17, u_int32_t)
+#define	APFSIOC_GET_NEAR_LOW_DISK _IOR('J', 18, u_int32_t)
+
+
+// END of definitions
+
+#endif
diff --git a/include/os/macos/zfs/sys/kstat_osx.h b/include/os/macos/zfs/sys/kstat_osx.h
new file mode 100644
index 0000000000..1787ad87d5
--- /dev/null
+++ b/include/os/macos/zfs/sys/kstat_osx.h
@@ -0,0 +1,370 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014, 2016 Jorgen Lundman <lundman@lundman.net>
+ */
+
+#ifndef KSTAT_OSX_INCLUDED
+#define	KSTAT_OSX_INCLUDED
+
+typedef struct osx_kstat {
+	kstat_named_t spa_version;
+	kstat_named_t zpl_version;
+
+	kstat_named_t darwin_active_vnodes;
+	kstat_named_t darwin_debug;
+	kstat_named_t darwin_reclaim_nodes;
+	kstat_named_t darwin_ignore_negatives;
+	kstat_named_t darwin_ignore_positives;
+	kstat_named_t darwin_create_negatives;
+	kstat_named_t darwin_force_formd_normalized;
+	kstat_named_t darwin_skip_unlinked_drain;
+	kstat_named_t darwin_use_system_sync;
+
+	kstat_named_t arc_zfs_arc_max;
+	kstat_named_t arc_zfs_arc_min;
+	kstat_named_t arc_zfs_arc_meta_limit;
+	kstat_named_t arc_zfs_arc_meta_min;
+	kstat_named_t arc_zfs_arc_grow_retry;
+	kstat_named_t arc_zfs_arc_shrink_shift;
+	kstat_named_t arc_zfs_arc_p_min_shift;
+	kstat_named_t arc_zfs_arc_average_blocksize;
+
+	kstat_named_t l2arc_write_max;
+	kstat_named_t l2arc_write_boost;
+	kstat_named_t l2arc_headroom;
+	kstat_named_t l2arc_headroom_boost;
+	kstat_named_t l2arc_feed_secs;
+	kstat_named_t l2arc_feed_min_ms;
+
+	kstat_named_t zfs_vdev_max_active;
+	kstat_named_t zfs_vdev_sync_read_min_active;
+	kstat_named_t zfs_vdev_sync_read_max_active;
+	kstat_named_t zfs_vdev_sync_write_min_active;
+	kstat_named_t zfs_vdev_sync_write_max_active;
+	kstat_named_t zfs_vdev_async_read_min_active;
+	kstat_named_t zfs_vdev_async_read_max_active;
+	kstat_named_t zfs_vdev_async_write_min_active;
+	kstat_named_t zfs_vdev_async_write_max_active;
+	kstat_named_t zfs_vdev_scrub_min_active;
+	kstat_named_t zfs_vdev_scrub_max_active;
+	kstat_named_t zfs_vdev_async_write_active_min_dirty_percent;
+	kstat_named_t zfs_vdev_async_write_active_max_dirty_percent;
+	kstat_named_t zfs_vdev_aggregation_limit;
+	kstat_named_t zfs_vdev_read_gap_limit;
+	kstat_named_t zfs_vdev_write_gap_limit;
+
+	kstat_named_t arc_lotsfree_percent;
+	kstat_named_t zfs_dirty_data_max;
+	kstat_named_t zfs_delay_max_ns;
+	kstat_named_t zfs_delay_min_dirty_percent;
+	kstat_named_t zfs_delay_scale;
+	kstat_named_t spa_asize_inflation;
+	kstat_named_t zfs_prefetch_disable;
+	kstat_named_t zfetch_max_streams;
+	kstat_named_t zfetch_min_sec_reap;
+	kstat_named_t zfetch_array_rd_sz;
+	kstat_named_t zfs_default_bs;
+	kstat_named_t zfs_default_ibs;
+	kstat_named_t metaslab_aliquot;
+	kstat_named_t spa_max_replication_override;
+	kstat_named_t spa_mode_global;
+	kstat_named_t zfs_flags;
+	kstat_named_t zfs_txg_timeout;
+	kstat_named_t zfs_vdev_cache_max;
+	kstat_named_t zfs_vdev_cache_size;
+	kstat_named_t zfs_vdev_cache_bshift;
+	kstat_named_t vdev_mirror_shift;
+	kstat_named_t zfs_scrub_limit;
+	kstat_named_t zfs_no_scrub_io;
+	kstat_named_t zfs_no_scrub_prefetch;
+	kstat_named_t fzap_default_block_shift;
+	kstat_named_t zfs_immediate_write_sz;
+	kstat_named_t zfs_read_chunk_size;
+	kstat_named_t zfs_nocacheflush;
+	kstat_named_t zil_replay_disable;
+	kstat_named_t metaslab_df_alloc_threshold;
+	kstat_named_t metaslab_df_free_pct;
+	kstat_named_t zio_injection_enabled;
+	kstat_named_t zvol_immediate_write_sz;
+
+	kstat_named_t l2arc_noprefetch;
+	kstat_named_t l2arc_feed_again;
+	kstat_named_t l2arc_norw;
+
+	kstat_named_t zfs_recover;
+
+	kstat_named_t zfs_free_bpobj_enabled;
+
+	kstat_named_t zfs_send_corrupt_data;
+	kstat_named_t zfs_send_queue_length;
+	kstat_named_t zfs_recv_queue_length;
+
+	kstat_named_t zvol_inhibit_dev;
+	kstat_named_t zfs_send_set_freerecords_bit;
+
+	kstat_named_t zfs_write_implies_delete_child;
+	kstat_named_t zfs_send_holes_without_birth_time;
+
+	kstat_named_t dbuf_cache_max_bytes;
+
+	kstat_named_t zfs_vdev_queue_depth_pct;
+	kstat_named_t zio_dva_throttle_enabled;
+
+	kstat_named_t zfs_lua_max_instrlimit;
+	kstat_named_t zfs_lua_max_memlimit;
+
+	kstat_named_t zfs_trim_extent_bytes_max;
+	kstat_named_t zfs_trim_extent_bytes_min;
+	kstat_named_t zfs_trim_metaslab_skip;
+	kstat_named_t zfs_trim_txg_batch;
+	kstat_named_t zfs_trim_queue_limit;
+
+	kstat_named_t zfs_send_unmodified_spill_blocks;
+	kstat_named_t zfs_special_class_metadata_reserve_pct;
+
+	kstat_named_t zfs_vdev_raidz_impl;
+	kstat_named_t icp_gcm_impl;
+	kstat_named_t icp_aes_impl;
+	kstat_named_t zfs_fletcher_4_impl;
+
+	kstat_named_t zfs_expire_snapshot;
+	kstat_named_t zfs_admin_snapshot;
+	kstat_named_t zfs_auto_snapshot;
+
+	kstat_named_t zfs_spa_discard_memory_limit;
+	kstat_named_t zfs_async_block_max_blocks;
+	kstat_named_t zfs_initialize_chunk_size;
+	kstat_named_t zfs_scan_suspend_progress;
+	kstat_named_t zfs_removal_suspend_progress;
+	kstat_named_t zfs_livelist_max_entries;
+
+	kstat_named_t zfs_allow_redacted_dataset_mount;
+	kstat_named_t zfs_checksum_events_per_second;
+	kstat_named_t zfs_commit_timeout_pct;
+	kstat_named_t zfs_compressed_arc_enabled;
+	kstat_named_t zfs_condense_indirect_commit_entry_delay_ms;
+	kstat_named_t zfs_condense_min_mapping_bytes;
+	kstat_named_t zfs_deadman_checktime_ms;
+	kstat_named_t zfs_deadman_failmode;
+	kstat_named_t zfs_deadman_synctime_ms;
+	kstat_named_t zfs_deadman_ziotime_ms;
+	kstat_named_t zfs_disable_ivset_guid_check;
+	kstat_named_t zfs_initialize_value;
+	kstat_named_t zfs_keep_log_spacemaps_at_export;
+	kstat_named_t l2arc_rebuild_blocks_min_l2size;
+	kstat_named_t l2arc_rebuild_enabled;
+	kstat_named_t l2arc_trim_ahead;
+	kstat_named_t zfs_livelist_condense_new_alloc;
+	kstat_named_t zfs_livelist_condense_sync_cancel;
+	kstat_named_t zfs_livelist_condense_sync_pause;
+	kstat_named_t zfs_livelist_condense_zthr_cancel;
+	kstat_named_t zfs_livelist_condense_zthr_pause;
+	kstat_named_t zfs_livelist_min_percent_shared;
+	kstat_named_t zfs_max_dataset_nesting;
+	kstat_named_t zfs_max_missing_tvds;
+	kstat_named_t metaslab_debug_load;
+	kstat_named_t metaslab_force_ganging;
+	kstat_named_t zfs_multihost_fail_intervals;
+	kstat_named_t zfs_multihost_import_intervals;
+	kstat_named_t zfs_multihost_interval;
+	kstat_named_t zfs_override_estimate_recordsize;
+	kstat_named_t zfs_remove_max_segment;
+	kstat_named_t zfs_resilver_min_time_ms;
+	kstat_named_t zfs_scan_legacy;
+	kstat_named_t zfs_scan_vdev_limit;
+	kstat_named_t zfs_slow_io_events_per_second;
+	kstat_named_t spa_load_verify_data;
+	kstat_named_t spa_load_verify_metadata;
+	kstat_named_t zfs_unlink_suspend_progress;
+	kstat_named_t zfs_vdev_min_ms_count;
+	kstat_named_t vdev_validate_skip;
+	kstat_named_t zfs_zevent_len_max;
+	kstat_named_t zio_slow_io_ms;
+} osx_kstat_t;
+
+extern unsigned int debug_vnop_osx_printf;
+extern unsigned int zfs_vnop_ignore_negatives;
+extern unsigned int zfs_vnop_ignore_positives;
+extern unsigned int zfs_vnop_create_negatives;
+extern unsigned int zfs_vnop_skip_unlinked_drain;
+extern uint64_t zfs_vfs_sync_paranoia;
+extern uint64_t vnop_num_vnodes;
+extern uint64_t vnop_num_reclaims;
+
+extern uint64_t zfs_arc_max;
+extern uint64_t zfs_arc_min;
+extern unsigned long zfs_arc_meta_limit;
+extern uint64_t zfs_arc_meta_min;
+extern int zfs_arc_grow_retry;
+extern int zfs_arc_shrink_shift;
+extern int zfs_arc_p_min_shift;
+extern int zfs_arc_average_blocksize;
+
+extern uint64_t l2arc_write_max;
+extern uint64_t l2arc_write_boost;
+extern uint64_t l2arc_headroom;
+extern uint64_t l2arc_headroom_boost;
+extern uint64_t l2arc_feed_secs;
+extern uint64_t l2arc_feed_min_ms;
+
+extern uint32_t zfs_vdev_max_active;
+extern uint32_t zfs_vdev_sync_read_min_active;
+extern uint32_t zfs_vdev_sync_read_max_active;
+extern uint32_t zfs_vdev_sync_write_min_active;
+extern uint32_t zfs_vdev_sync_write_max_active;
+extern uint32_t zfs_vdev_async_read_min_active;
+extern uint32_t zfs_vdev_async_read_max_active;
+extern uint32_t zfs_vdev_async_write_min_active;
+extern uint32_t zfs_vdev_async_write_max_active;
+extern uint32_t zfs_vdev_scrub_min_active;
+extern uint32_t zfs_vdev_scrub_max_active;
+extern int zfs_vdev_async_write_active_min_dirty_percent;
+extern int zfs_vdev_async_write_active_max_dirty_percent;
+extern int zfs_vdev_aggregation_limit;
+extern int zfs_vdev_read_gap_limit;
+extern int zfs_vdev_write_gap_limit;
+
+extern uint_t arc_reduce_dnlc_percent;
+extern int arc_lotsfree_percent;
+extern hrtime_t zfs_delay_max_ns;
+extern int spa_asize_inflation;
+extern unsigned int	zfetch_max_streams;
+extern unsigned int	zfetch_min_sec_reap;
+extern int zfs_default_bs;
+extern int zfs_default_ibs;
+extern uint64_t metaslab_aliquot;
+extern int zfs_vdev_cache_max;
+extern int spa_max_replication_override;
+extern int zfs_no_scrub_io;
+extern int zfs_no_scrub_prefetch;
+extern ssize_t zfs_immediate_write_sz;
+extern offset_t zfs_read_chunk_size;
+extern uint64_t metaslab_df_alloc_threshold;
+extern int metaslab_df_free_pct;
+extern ssize_t zvol_immediate_write_sz;
+
+extern boolean_t l2arc_noprefetch;
+extern boolean_t l2arc_feed_again;
+extern boolean_t l2arc_norw;
+
+extern int zfs_top_maxinflight;
+extern int zfs_resilver_delay;
+extern int zfs_scrub_delay;
+extern int zfs_scan_idle;
+
+extern int64_t zfs_free_bpobj_enabled;
+
+extern int zfs_send_corrupt_data;
+extern int zfs_send_queue_length;
+extern int zfs_recv_queue_length;
+
+extern uint64_t zvol_inhibit_dev;
+extern uint64_t zfs_send_set_freerecords_bit;
+
+extern uint64_t zfs_write_implies_delete_child;
+extern uint64_t send_holes_without_birth_time;
+extern uint64_t zfs_send_holes_without_birth_time;
+
+extern uint64_t dbuf_cache_max_bytes;
+
+extern int zfs_vdev_queue_depth_pct;
+extern boolean_t zio_dva_throttle_enabled;
+
+extern uint64_t zfs_lua_max_instrlimit;
+extern uint64_t zfs_lua_max_memlimit;
+
+
+extern uint64_t  zfs_trim_extent_bytes_max;
+extern uint64_t  zfs_trim_extent_bytes_min;
+extern unsigned int zfs_trim_metaslab_skip;
+extern uint64_t  zfs_trim_txg_batch;
+extern uint64_t  zfs_trim_queue_limit;
+
+extern uint64_t  zfs_send_unmodified_spill_blocks;
+extern uint64_t  zfs_special_class_metadata_reserve_pct;
+
+extern int zfs_vnop_force_formd_normalized_output;
+
+extern int zfs_arc_min_prefetch_ms;
+extern int zfs_arc_min_prescient_prefetch_ms;
+
+extern int zfs_expire_snapshot;
+extern int zfs_admin_snapshot;
+extern int zfs_auto_snapshot;
+
+extern unsigned long zfs_spa_discard_memory_limit;
+extern unsigned long zfs_async_block_max_blocks;
+extern unsigned long zfs_initialize_chunk_size;
+extern int zfs_scan_suspend_progress;
+extern int zfs_removal_suspend_progress;
+extern unsigned long zfs_livelist_max_entries;
+
+extern int zfs_allow_redacted_dataset_mount;
+extern unsigned int zfs_checksum_events_per_second;
+extern int zfs_commit_timeout_pct;
+extern int zfs_compressed_arc_enabled;
+extern int zfs_condense_indirect_commit_entry_delay_ms;
+extern unsigned long zfs_condense_min_mapping_bytes;
+extern unsigned long zfs_deadman_checktime_ms;
+extern char *zfs_deadman_failmode;
+extern unsigned long zfs_deadman_synctime_ms;
+extern unsigned long zfs_deadman_ziotime_ms;
+extern int zfs_disable_ivset_guid_check;
+extern unsigned long zfs_initialize_value;
+extern int zfs_keep_log_spacemaps_at_export;
+extern unsigned long l2arc_rebuild_blocks_min_l2size;
+extern int l2arc_rebuild_enabled;
+extern unsigned long l2arc_trim_ahead;
+extern int zfs_livelist_condense_new_alloc;
+extern int zfs_livelist_condense_sync_cancel;
+extern int zfs_livelist_condense_sync_pause;
+extern int zfs_livelist_condense_zthr_cancel;
+extern int zfs_livelist_condense_zthr_pause;
+extern int zfs_livelist_min_percent_shared;
+extern int zfs_max_dataset_nesting;
+extern unsigned long zfs_max_missing_tvds;
+extern int metaslab_debug_load;
+extern unsigned long metaslab_force_ganging;
+extern unsigned int zfs_multihost_fail_intervals;
+extern unsigned int zfs_multihost_import_intervals;
+extern unsigned long zfs_multihost_interval;
+extern int zfs_override_estimate_recordsize;
+extern int zfs_remove_max_segment;
+extern int zfs_resilver_min_time_ms;
+extern int zfs_scan_legacy;
+extern unsigned long zfs_scan_vdev_limit;
+extern unsigned int zfs_slow_io_events_per_second;
+extern int spa_load_verify_data;
+extern int spa_load_verify_metadata;
+extern int zfs_unlink_suspend_progress;
+extern int zfs_vdev_min_ms_count;
+extern int vdev_validate_skip;
+extern int zfs_zevent_len_max;
+extern int zio_slow_io_ms;
+
+int kstat_osx_init(void);
+void kstat_osx_fini(void);
+
+int arc_kstat_update(kstat_t *ksp, int rw);
+int arc_kstat_update_osx(kstat_t *ksp, int rw);
+
+#endif
diff --git a/include/os/macos/zfs/sys/ldi_buf.h b/include/os/macos/zfs/sys/ldi_buf.h
new file mode 100644
index 0000000000..9b69b0610a
--- /dev/null
+++ b/include/os/macos/zfs/sys/ldi_buf.h
@@ -0,0 +1,77 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ *
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+#ifndef _SYS_LDI_BUF_H
+#define	_SYS_LDI_BUF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*
+ * Buffer context for LDI strategy
+ */
+typedef struct ldi_buf {
+	/* For client use */
+	int		(*b_iodone)(struct ldi_buf *); /* Callback */
+	union {
+		void	*b_addr;	/* Passed buffer address */
+	} b_un;				/* Union to match illumos */
+	uint64_t	b_bcount;	/* Size of IO */
+	uint64_t	b_bufsize;	/* Size of buffer */
+	uint64_t	b_lblkno;	/* logical block number */
+	uint64_t	b_resid;	/* Remaining IO size */
+	int		b_flags;	/* Read or write, options */
+	int		b_error;	/* IO error code */
+	uint64_t	pad;		/* Pad to 64 bytes */
+} ldi_buf_t;				/* XXX Currently 64b */
+
+ldi_buf_t *ldi_getrbuf(int);
+void ldi_freerbuf(ldi_buf_t *);
+void ldi_bioinit(ldi_buf_t *);
+
+/* Define macros to get and release a buffer */
+#define	getrbuf(flags)	ldi_getrbuf(flags)
+#define	freerbuf(lbp)	ldi_freerbuf(lbp)
+#define	bioinit(lbp)	ldi_bioinit(lbp)
+#define	geterror(lbp)	(lbp->b_error)
+#define	biowait(lbp)	(0)
+
+#define	lbtodb(bytes) \
+	(bytes >> DEV_BSHIFT)
+#define	dbtolb(blkno) \
+	(blkno << DEV_BSHIFT)
+#define	ldbtob(blkno)	dbtolb(blkno)
+
+/* Redefine B_BUSY */
+#define	B_BUSY	B_PHYS
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _SYS_LDI_BUF_H */
diff --git a/include/os/macos/zfs/sys/ldi_impl_osx.h b/include/os/macos/zfs/sys/ldi_impl_osx.h
new file mode 100644
index 0000000000..68c8d121ab
--- /dev/null
+++ b/include/os/macos/zfs/sys/ldi_impl_osx.h
@@ -0,0 +1,226 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * Portions of this document are copyright Oracle and Joyent.
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+#ifndef _SYS_LDI_IMPL_OSX_H
+#define	_SYS_LDI_IMPL_OSX_H
+
+#include <sys/ldi_osx.h>
+#include <sys/disk.h>
+#include <sys/dkio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*
+ * OS X
+ */
+#define	LDI_TYPE_INVALID	0x0	/* uninitialized */
+#define	LDI_TYPE_IOKIT		0x1	/* IOMedia device */
+#define	LDI_TYPE_VNODE		0x2	/* vnode (bdev) device */
+
+/*
+ * OS X
+ */
+#define	LDI_STATUS_OFFLINE	0x0	/* device offline (dead-end) */
+#define	LDI_STATUS_CLOSED	0x1	/* just initialized or closed */
+#define	LDI_STATUS_CLOSING	0x2	/* close in-progress */
+#define	LDI_STATUS_OPENING	0x3	/* open in-progress */
+#define	LDI_STATUS_ONLINE	0x4	/* device is open and active */
+typedef uint_t ldi_status_t;
+
+/*
+ * LDI hash definitions
+ */
+#define	LH_HASH_SZ		32	/* number of hash lists */
+
+/*
+ * Flag for LDI handle's lh_flags field
+ */
+#define	LH_FLAGS_NOTIFY		0x0001	/* invoked in context of a notify */
+
+
+/*
+ * LDI handle (OS X)
+ */
+typedef struct _handle_iokit *handle_iokit_t;
+typedef struct _handle_vnode *handle_vnode_t;
+typedef struct _handle_notifier *handle_notifier_t;
+
+struct ldi_handle {
+	/* protected by ldi_handle_hash_lock */
+	list_node_t		lh_node;	/* list membership */
+	uint_t			lh_ref;		/* active references */
+	uint_t			lh_flags;	/* for notify event */
+
+	/* protected by handle lh_lock */
+	kmutex_t		lh_lock;	/* internal lock */
+	kcondvar_t		lh_cv;		/* for concurrent open */
+	ldi_status_t		lh_status;	/* Closed, Offline, Online */
+	uint_t			lh_openref;	/* open client count */
+
+	/* unique/static fields in the handle */
+	union ldi_handle_tsd {
+		handle_iokit_t	iokit_tsd;
+		handle_vnode_t	vnode_tsd;
+	} lh_tsd;				/* union */
+	handle_notifier_t	lh_notifier;	/* pointer */
+	uint_t			lh_type;	/* IOKit or vnode */
+	uint_t			lh_fmode;	/* FREAD | FWRITE */
+	dev_t			lh_dev;		/* device number */
+	uint_t			pad;		/* pad to 96 bytes */
+};						/* XXX Currently 96b */
+
+/* Shared functions */
+struct ldi_handle *handle_alloc_common(uint_t, dev_t, int);
+struct ldi_handle *handle_find(dev_t, int, boolean_t);
+struct ldi_handle *handle_add(struct ldi_handle *);
+int handle_status_change(struct ldi_handle *, int);
+void handle_hold(struct ldi_handle *);
+void handle_release(struct ldi_handle *);
+ldi_status_t handle_open_start(struct ldi_handle *);
+void handle_open_done(struct ldi_handle *, ldi_status_t);
+
+/* Handle IOKit functions */
+void handle_free_iokit(struct ldi_handle *);
+struct ldi_handle *handle_alloc_iokit(dev_t, int);
+int handle_register_notifier(struct ldi_handle *);
+int handle_close_iokit(struct ldi_handle *);
+int handle_free_ioservice(struct ldi_handle *);
+int handle_alloc_ioservice(struct ldi_handle *);
+int handle_remove_notifier(struct ldi_handle *);
+int handle_set_wce_iokit(struct ldi_handle *, int *);
+int handle_get_size_iokit(struct ldi_handle *, uint64_t *);
+int handle_get_dev_path_iokit(struct ldi_handle *lh,
+    char *path, int len);
+int handle_get_media_info_iokit(struct ldi_handle *,
+    struct dk_minfo *);
+int handle_get_media_info_ext_iokit(struct ldi_handle *,
+    struct dk_minfo_ext *);
+int handle_check_media_iokit(struct ldi_handle *, int *);
+int handle_is_solidstate_iokit(struct ldi_handle *, int *);
+int handle_sync_iokit(struct ldi_handle *);
+int buf_strategy_iokit(ldi_buf_t *, struct ldi_handle *);
+int ldi_open_media_by_dev(dev_t, int, ldi_handle_t *);
+int ldi_open_media_by_path(char *, int, ldi_handle_t *);
+int handle_get_bootinfo_iokit(struct ldi_handle *,
+    struct io_bootinfo *);
+int handle_features_iokit(struct ldi_handle *,
+    uint32_t *);
+int handle_unmap_iokit(struct ldi_handle *,
+    dkioc_free_list_ext_t *);
+
+/* Handle vnode functions */
+dev_t dev_from_path(char *);
+void handle_free_vnode(struct ldi_handle *);
+struct ldi_handle *handle_alloc_vnode(dev_t, int);
+int handle_close_vnode(struct ldi_handle *);
+int handle_get_size_vnode(struct ldi_handle *, uint64_t *);
+int handle_get_dev_path_vnode(struct ldi_handle *lh,
+    char *path, int len);
+int handle_get_media_info_vnode(struct ldi_handle *,
+    struct dk_minfo *);
+int handle_get_media_info_ext_vnode(struct ldi_handle *,
+    struct dk_minfo_ext *);
+int handle_check_media_vnode(struct ldi_handle *, int *);
+int handle_is_solidstate_vnode(struct ldi_handle *, int *);
+int handle_sync_vnode(struct ldi_handle *);
+int buf_strategy_vnode(ldi_buf_t *, struct ldi_handle *);
+int ldi_open_vnode_by_path(char *, dev_t, int, ldi_handle_t *);
+int handle_get_bootinfo_vnode(struct ldi_handle *,
+    struct io_bootinfo *);
+int handle_features_vnode(struct ldi_handle *,
+    uint32_t *);
+int handle_unmap_vnode(struct ldi_handle *,
+    dkioc_free_list_ext_t *);
+
+/*
+ * LDI event information
+ */
+typedef struct ldi_ev_callback_impl {
+	struct ldi_handle	*lec_lhp;
+#ifdef illumos
+	dev_info_t	*lec_dip;
+#endif
+	dev_t	lec_dev;
+	int	lec_spec;
+	int	(*lec_notify)(ldi_handle_t, ldi_ev_cookie_t, void *, void *);
+	void	(*lec_finalize)(ldi_handle_t, ldi_ev_cookie_t, int,
+	    void *, void *);
+	void	*lec_arg;
+	void	*lec_cookie;
+	void	*lec_id;
+	list_node_t	lec_list;
+} ldi_ev_callback_impl_t;	/* XXX Currently 72b */
+
+/*
+ * Members of "struct ldi_ev_callback_list" are protected by their le_lock
+ * member.  The struct is currently only used once, as a file-level global,
+ * and the locking protocol is currently implemented in ldi_ev_lock() and
+ * ldi_ev_unlock().
+ *
+ * When delivering events to subscribers, ldi_invoke_notify() and
+ * ldi_invoke_finalize() will walk the list of callbacks: le_head.  It is
+ * possible that an invoked callback function will need to unregister an
+ * arbitrary number of callbacks from this list.
+ *
+ * To enable ldi_ev_remove_callbacks() to remove elements from the list
+ * without breaking the walk-in-progress, we store the next element in the
+ * walk direction on the struct as le_walker_next and le_walker_prev.
+ */
+struct ldi_ev_callback_list {
+	kmutex_t		le_lock;
+	kcondvar_t		le_cv;
+	uint64_t		le_busy;
+	void			*le_thread;
+	list_t			le_head;
+	ldi_ev_callback_impl_t	*le_walker_next;
+	ldi_ev_callback_impl_t	*le_walker_prev;
+};			/* XXX Currently 96b, but only used once */
+
+int ldi_invoke_notify(dev_info_t *, dev_t, int, char *, void *);
+void ldi_invoke_finalize(dev_info_t *, dev_t, int, char *, int, void *);
+int e_ddi_offline_notify(dev_info_t *);
+void e_ddi_offline_finalize(dev_info_t *, int);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _SYS_LDI_IMPL_OSX_H */
diff --git a/include/os/macos/zfs/sys/ldi_osx.h b/include/os/macos/zfs/sys/ldi_osx.h
new file mode 100644
index 0000000000..2d78017c42
--- /dev/null
+++ b/include/os/macos/zfs/sys/ldi_osx.h
@@ -0,0 +1,153 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * Portions of this document are copyright Oracle and Joyent.
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+#ifndef _SYS_LDI_OSX_H
+#define	_SYS_LDI_OSX_H
+
+#include <sys/ldi_buf.h>
+
+/*
+ * OS X - The initialization/destructor functions are available
+ * for zfs-osx.cpp to call during zfs_init/zfs_fini.
+ */
+#ifdef __cplusplus
+extern "C" {
+
+int ldi_init(void *);		/* passes IOService provider */
+void ldi_fini();		/* teardown */
+#endif /* __cplusplus */
+
+/*
+ * Opaque layered driver data structures.
+ * vdev_disk and other C callers may use these LDI interfaces
+ * ldi_ident_t is already defined as typedef void* by spl sunddi.h
+ */
+typedef struct __ldi_handle		*ldi_handle_t;
+typedef struct __ldi_callback_id	*ldi_callback_id_t;
+typedef struct __ldi_ev_cookie		*ldi_ev_cookie_t;
+
+/*
+ * LDI event interface related
+ */
+#define	LDI_EV_SUCCESS	0
+#define	LDI_EV_FAILURE	(-1)
+#define	LDI_EV_NONE	(-2)	/* no matching callbacks registered */
+#define	LDI_EV_OFFLINE	"LDI:EVENT:OFFLINE"
+#define	LDI_EV_DEGRADE	"LDI:EVENT:DEGRADE"
+#define	LDI_EV_DEVICE_REMOVE	"LDI:EVENT:DEVICE_REMOVE"
+
+#define	LDI_EV_CB_VERS_1	1
+#define	LDI_EV_CB_VERS		LDI_EV_CB_VERS_1
+
+typedef struct ldi_ev_callback {
+	uint_t cb_vers;
+	int (*cb_notify)(ldi_handle_t, ldi_ev_cookie_t, void *, void *);
+	void (*cb_finalize)(ldi_handle_t, ldi_ev_cookie_t, int,
+	    void *, void *);
+} ldi_ev_callback_t;
+
+/* Structs passed to media_get_info */
+struct dk_minfo {
+	uint32_t		dki_capacity;	/* Logical block count */
+	uint32_t		dki_lbsize;	/* Logical block size */
+};	/* (8b) */
+
+struct dk_minfo_ext {
+	uint64_t		dki_capacity;	/* Logical block count */
+	uint32_t		dki_lbsize;	/* Logical block size */
+	uint32_t		dki_pbsize;	/* Physical block size */
+};	/* (16b) */
+
+struct io_bootinfo {
+	char			dev_path[MAXPATHLEN];	/* IODeviceTree path */
+	uint64_t		dev_size;	/* IOMedia device size */
+};
+
+/*
+ * XXX This struct is defined in spl but was unused until now.
+ * There is a reference in zvol.c zvol_ioctl, commented out.
+ */
+#if 0
+struct dk_callback {
+	void (*dkc_callback)(void *dkc_cookie, int error);
+	void *dkc_cookie;
+	int dkc_flag;
+};	/* XXX Currently 20b */
+#endif
+
+/* XXX Already defined in spl dkio.h (used elsewhere) */
+#if 0
+#define	DKIOCFLUSHWRITECACHE	(DKIOC | 34)
+#endif
+
+#define	FLUSH_VOLATILE		0x1
+#define	DKIOCGMEDIAINFOEXT	(DKIOC | 48)
+
+/* XXX Created this additional ioctl */
+#define	DKIOCGETBOOTINFO	(DKIOC | 99)
+
+/*
+ * LDI Handle manipulation functions
+ */
+int ldi_open_by_dev(dev_t, int, int, cred_t *,
+    ldi_handle_t *, __unused ldi_ident_t);
+int ldi_open_by_name(char *, int, cred_t *,
+    ldi_handle_t *, __unused ldi_ident_t);
+
+int ldi_close(ldi_handle_t, int, cred_t *);
+
+int ldi_sync(ldi_handle_t);
+int ldi_get_size(ldi_handle_t, uint64_t *);
+int ldi_ioctl(ldi_handle_t, int, intptr_t, int, cred_t *, int *);
+int ldi_strategy(ldi_handle_t, ldi_buf_t *);
+
+/*
+ * LDI events related declarations
+ */
+extern int ldi_ev_get_cookie(ldi_handle_t, char *, ldi_ev_cookie_t *);
+extern char *ldi_ev_get_type(ldi_ev_cookie_t);
+extern int ldi_ev_register_callbacks(ldi_handle_t, ldi_ev_cookie_t,
+    ldi_ev_callback_t *, void *, ldi_callback_id_t *);
+extern int ldi_ev_remove_callbacks(ldi_callback_id_t);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _SYS_LDI_OSX_H */
diff --git a/include/os/macos/zfs/sys/trace_zfs.h b/include/os/macos/zfs/sys/trace_zfs.h
new file mode 100644
index 0000000000..f32ba529ec
--- /dev/null
+++ b/include/os/macos/zfs/sys/trace_zfs.h
@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define	TRACE_SYSTEM zfs
+
+#if !defined(_TRACE_ZFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define	_TRACE_ZFS_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+
+/*
+ * The sys/trace_dbgmsg.h header defines tracepoint events for
+ * dprintf(), dbgmsg(), and SET_ERROR().
+ */
+#define	_SYS_TRACE_DBGMSG_INDIRECT
+#include <sys/trace_dbgmsg.h>
+#undef _SYS_TRACE_DBGMSG_INDIRECT
+
+/*
+ * Redefine the DTRACE_PROBE* functions to use Linux tracepoints
+ */
+#undef DTRACE_PROBE1
+#define	DTRACE_PROBE1(name, t1, arg1) \
+	trace_zfs_##name((arg1))
+
+#undef DTRACE_PROBE2
+#define	DTRACE_PROBE2(name, t1, arg1, t2, arg2) \
+	trace_zfs_##name((arg1), (arg2))
+
+#undef DTRACE_PROBE3
+#define	DTRACE_PROBE3(name, t1, arg1, t2, arg2, t3, arg3) \
+	trace_zfs_##name((arg1), (arg2), (arg3))
+
+#undef DTRACE_PROBE4
+#define	DTRACE_PROBE4(name, t1, arg1, t2, arg2, t3, arg3, t4, arg4) \
+	trace_zfs_##name((arg1), (arg2), (arg3), (arg4))
+
+#endif /* _TRACE_ZFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define	TRACE_INCLUDE_PATH sys
+#define	TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
+
+#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
diff --git a/include/os/macos/zfs/sys/vdev_disk_os.h b/include/os/macos/zfs/sys/vdev_disk_os.h
new file mode 100644
index 0000000000..79b68c7ee6
--- /dev/null
+++ b/include/os/macos/zfs/sys/vdev_disk_os.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _ZFS_VDEV_DISK_OS_H
+#define	_ZFS_VDEV_DISK_OS_H
+
+#include <sys/ldi_osx.h>
+
+typedef struct vdev_disk {
+	ldi_handle_t vd_lh;
+	list_t vd_ldi_cbs;
+	boolean_t vd_ldi_offline;
+} vdev_disk_t;
+
+/*
+ * The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
+ */
+typedef struct vdev_buf {
+	ldi_buf_t	vb_buf;		/* buffer that describes the io */
+	zio_t		*vb_io;		/* pointer back to the original zio_t */
+} vdev_buf_t;
+
+
+extern int vdev_disk_ldi_physio(ldi_handle_t, caddr_t, size_t, uint64_t, int);
+
+#endif
diff --git a/include/os/macos/zfs/sys/zfs_boot.h b/include/os/macos/zfs/sys/zfs_boot.h
new file mode 100644
index 0000000000..cad5c0bdfd
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_boot.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#ifndef	ZFS_BOOT_H_INCLUDED
+#define	ZFS_BOOT_H_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif	/* __cplusplus */
+
+/* Link data vdevs to virtual devices */
+int zfs_boot_update_bootinfo(spa_t *spa);
+
+int zfs_attach_devicedisk(zfsvfs_t *zfsvfs);
+int zfs_detach_devicedisk(zfsvfs_t *zfsvfs);
+int zfs_devdisk_get_path(void *, char *, int);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif	/* __cplusplus */
+
+
+
+#ifdef __cplusplus
+#include <IOKit/IOService.h>
+bool zfs_boot_init(IOService *);
+void zfs_boot_fini();
+#endif	/* __cplusplus */
+
+
+#endif /* ZFS_BOOT_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/zfs_context_os.h b/include/os/macos/zfs/sys/zfs_context_os.h
new file mode 100644
index 0000000000..097152f26e
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_context_os.h
@@ -0,0 +1,175 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SPL_ZFS_CONTEXT_OS_H
+#define	_SPL_ZFS_CONTEXT_OS_H
+
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/callb.h>
+
+#define	MSEC_TO_TICK(msec)		((msec) / (MILLISEC / hz))
+
+#define	KMALLOC_MAX_SIZE		MAXPHYS
+
+#define	MNTTYPE_ZFS_SUBTYPE ('Z'<<24|'F'<<16|'S'<<8)
+
+#ifndef MAX_UPL_TRANSFER
+#define	MAX_UPL_TRANSFER 256
+#endif
+
+#define	flock64_t	struct flock
+
+struct spa_iokit;
+typedef struct spa_iokit spa_iokit_t;
+
+#define	noinline		__attribute__((noinline))
+
+/* really? */
+#define	kpreempt_disable()	((void)0)
+#define	kpreempt_enable()	((void)0)
+#define	cond_resched()	(void)thread_block(THREAD_CONTINUE_NULL);
+#define	schedule()	(void)thread_block(THREAD_CONTINUE_NULL);
+
+#define	current		curthread
+
+extern boolean_t ml_set_interrupts_enabled(boolean_t);
+
+/* Make sure kmem and vmem are already included */
+#include <sys/seg_kmem.h>
+#include <sys/kmem.h>
+
+/* Since Linux code uses vmem_free() and we already have one: */
+#define	vmem_free(A, B)			zfs_kmem_free((A), (B))
+#define	vmem_alloc(A, B)		zfs_kmem_alloc((A), (B))
+#define	vmem_zalloc(A, B)		zfs_kmem_zalloc((A), (B))
+
+typedef	int	fstrans_cookie_t;
+#define	spl_fstrans_mark()		(0)
+#define	spl_fstrans_unmark(x)	(x = 0)
+
+#ifdef _KERNEL
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+typedef struct {
+	volatile int counter;
+} atomic_t;
+
+#define	ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define	barrier()		__asm__ __volatile__("": : :"memory")
+#define	smp_rmb()		barrier()
+
+#define	READ_ONCE(x) ( \
+{	\
+			__typeof(x) __var = ( \
+					{	\
+					barrier();	\
+					ACCESS_ONCE(x);	\
+				});	\
+			barrier();	\
+			__var;	\
+		})
+
+#define	WRITE_ONCE(x, v) do { \
+		barrier();  \
+		ACCESS_ONCE(x) = (v);	\
+		barrier();	\
+	} while (0)
+
+/* BEGIN CSTYLED */
+#define	hlist_for_each(p, head)	\
+	for (p = (head)->first; p; p = (p)->next)
+
+#define	hlist_entry(ptr, type, field)   container_of(ptr, type, field)
+/* END CSTYLED */
+
+static inline void
+hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	n->next = h->first;
+	if (h->first != NULL)
+		h->first->pprev = &n->next;
+	WRITE_ONCE(h->first, n);
+	n->pprev = &h->first;
+}
+
+static inline void
+hlist_del(struct hlist_node *n)
+{
+	WRITE_ONCE(*(n->pprev), n->next);
+	if (n->next != NULL)
+		n->next->pprev = n->pprev;
+}
+
+
+#define	HLIST_HEAD_INIT { }
+#define	HLIST_HEAD(name) struct hlist_head name = HLIST_HEAD_INIT
+#define	INIT_HLIST_HEAD(head) (head)->first = NULL
+
+/* BEGIN CSTYLED */
+#define	INIT_HLIST_NODE(node)											\
+	do {																\
+		(node)->next = NULL;											\
+		(node)->pprev = NULL;											\
+	} while (0)
+
+/* END CSTYLED */
+
+static inline int
+atomic_read(const atomic_t *v)
+{
+	return (READ_ONCE(v->counter));
+}
+
+static inline int
+atomic_inc(atomic_t *v)
+{
+	return (__sync_fetch_and_add(&v->counter, 1) + 1);
+}
+
+static inline int
+atomic_dec(atomic_t *v)
+{
+	return (__sync_fetch_and_add(&v->counter, -1) - 1);
+}
+
+extern void kx_qsort(void *array, size_t nm, size_t member_size,
+    int (*cmpf)(const void *, const void *));
+#define	qsort kx_qsort
+
+#define	strstr kmem_strstr
+
+void spa_create_os(void *spa);
+void spa_export_os(void *spa);
+void spa_activate_os(void *spa);
+void spa_deactivate_os(void *spa);
+
+#endif // _KERNEL
+
+#endif
diff --git a/include/os/macos/zfs/sys/zfs_ctldir.h b/include/os/macos/zfs/sys/zfs_ctldir.h
new file mode 100644
index 0000000000..4cacf1aefe
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_ctldir.h
@@ -0,0 +1,124 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <rohan.puri15@gmail.com>
+ *   Brian Behlendorf <behlendorf1@llnl.gov>
+ */
+
+#ifndef	_ZFS_CTLDIR_H
+#define	_ZFS_CTLDIR_H
+
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+
+#define	ZFS_CTLDIR_NAME		".zfs"
+#define	ZFS_SNAPDIR_NAME	"snapshot"
+#define	ZFS_SHAREDIR_NAME	"shares"
+
+#define	zfs_has_ctldir(zdp)	\
+	((zdp)->z_id == ZTOZSB(zdp)->z_root && \
+	(ZTOZSB(zdp)->z_ctldir != NULL))
+#define	zfs_show_ctldir(zdp)	\
+	(zfs_has_ctldir(zdp) && \
+	(ZTOZSB(zdp)->z_show_ctldir))
+
+struct path;
+
+extern int zfs_expire_snapshot;
+
+/* zfsctl generic functions */
+extern int zfsctl_create(zfsvfs_t *);
+extern void zfsctl_destroy(zfsvfs_t *);
+extern struct vnode *zfsctl_root(znode_t *);
+extern void zfsctl_init(void);
+extern void zfsctl_fini(void);
+extern boolean_t zfsctl_is_node(struct vnode *ip);
+extern boolean_t zfsctl_is_snapdir(struct vnode *ip);
+extern int zfsctl_fid(struct vnode *ip, fid_t *fidp);
+
+/* zfsctl '.zfs' functions */
+extern int zfsctl_root_lookup(struct vnode *dip, char *name,
+    struct vnode **ipp, int flags, cred_t *cr, int *direntflags,
+    struct componentname *realpnp);
+
+/* zfsctl '.zfs/snapshot' functions */
+extern int zfsctl_snapdir_lookup(struct vnode *dip, char *name,
+    struct vnode **ipp, int flags, cred_t *cr, int *direntflags,
+    struct componentname *realpnp);
+extern int zfsctl_snapdir_rename(struct vnode *sdip, char *sname,
+    struct vnode *tdip, char *tname, cred_t *cr, int flags);
+extern int zfsctl_snapdir_remove(struct vnode *dip, char *name, cred_t *cr,
+    int flags);
+extern int zfsctl_snapdir_mkdir(struct vnode *dip, char *dirname, vattr_t *vap,
+    struct vnode **ipp, cred_t *cr, int flags);
+extern int zfsctl_snapshot_mount(struct vnode *, int flags);
+extern int zfsctl_snapshot_unmount(const char *, int flags);
+extern int zfsctl_snapshot_unmount_node(struct vnode *, const char *,
+    int flags);
+extern int zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid,
+    int delay);
+extern int zfsctl_snapdir_vget(struct mount *sb, uint64_t objsetid,
+    int gen, struct vnode **ipp);
+
+/* zfsctl '.zfs/shares' functions */
+extern int zfsctl_shares_lookup(struct vnode *dip, char *name,
+    struct vnode **ipp, int flags, cred_t *cr, int *direntflags,
+    struct componentname *realpnp);
+
+extern int zfsctl_vnop_lookup(struct vnop_lookup_args *);
+extern int zfsctl_vnop_getattr(struct vnop_getattr_args *);
+extern int zfsctl_vnop_readdir(struct vnop_readdir_args *);
+extern int zfsctl_vnop_mkdir(struct vnop_mkdir_args *);
+extern int zfsctl_vnop_rmdir(struct vnop_rmdir_args *);
+extern int zfsctl_vnop_access(struct vnop_access_args *);
+extern int zfsctl_vnop_open(struct vnop_open_args *);
+extern int zfsctl_vnop_close(struct vnop_close_args *);
+extern int zfsctl_vnop_inactive(struct vnop_inactive_args *);
+extern int zfsctl_vnop_reclaim(struct vnop_reclaim_args *);
+
+extern void zfs_ereport_snapshot_post(const char *subclass, spa_t *spa,
+    const char *name);
+
+extern void	zfsctl_mount_signal(char *, boolean_t);
+
+
+/*
+ * These vnodes numbers are reserved for the .zfs control directory.
+ * It is important that they be no larger that 48-bits because only
+ * 6 bytes are reserved in the NFS file handle for the object number.
+ * However, they should be as large as possible to avoid conflicts
+ * with the objects which are assigned monotonically by the dmu.
+ */
+#define	ZFSCTL_INO_ROOT		0x0000FFFFFFFFFFFFULL
+#define	ZFSCTL_INO_SHARES	0x0000FFFFFFFFFFFEULL
+#define	ZFSCTL_INO_SNAPDIR	0x0000FFFFFFFFFFFDULL
+#define	ZFSCTL_INO_SNAPDIRS	0x0000FFFFFFFFFFFCULL
+
+#define	ZFSCTL_EXPIRE_SNAPSHOT	300
+
+#endif	/* _ZFS_CTLDIR_H */
diff --git a/include/os/macos/zfs/sys/zfs_dir.h b/include/os/macos/zfs/sys/zfs_dir.h
new file mode 100644
index 0000000000..cfee82308a
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_dir.h
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_FS_ZFS_DIR_H
+#define	_SYS_FS_ZFS_DIR_H
+
+#include <sys/pathname.h>
+#include <sys/dmu.h>
+#include <sys/zfs_znode.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* zfs_dirent_lock() flags */
+#define	ZNEW		0x0001		/* entry should not exist */
+#define	ZEXISTS		0x0002		/* entry should exist */
+#define	ZSHARED		0x0004		/* shared access (zfs_dirlook()) */
+#define	ZXATTR		0x0008		/* we want the xattr dir */
+#define	ZRENAMING	0x0010		/* znode is being renamed */
+#define	ZCILOOK		0x0020		/* case-insensitive lookup requested */
+#define	ZCIEXACT	0x0040		/* c-i requires c-s match (rename) */
+#define	ZHAVELOCK	0x0080		/* z_name_lock is already held */
+
+/* mknode flags */
+#define	IS_ROOT_NODE	0x01	/* create a root node */
+#define	IS_XATTR	0x02	/* create an extended attribute node */
+#define	IS_REPLAY	0x04	/* we are replaying intent log */
+
+extern int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name,
+    znode_t **zpp, int flag, int *direntflags,
+    struct componentname *realpnp);
+
+extern void zfs_dirent_unlock(zfs_dirlock_t *);
+extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
+extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
+    boolean_t *);
+
+extern int zfs_dirlook(znode_t *, char *name, znode_t **, int,
+    int *deflg, struct componentname *rpnp);
+
+extern void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
+    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids);
+
+extern void zfs_rmnode(znode_t *);
+extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
+extern boolean_t zfs_dirempty(znode_t *);
+extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
+extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
+extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
+extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
+
+extern int zfs_get_xattrdir(znode_t *, znode_t **, cred_t *, int);
+extern int zfs_make_xattrdir(znode_t *, vattr_t *, znode_t **, cred_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_DIR_H */
diff --git a/include/os/macos/zfs/sys/zfs_ioctl_compat.h b/include/os/macos/zfs/sys/zfs_ioctl_compat.h
new file mode 100644
index 0000000000..15f12b34fe
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_ioctl_compat.h
@@ -0,0 +1,213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Jorgen Lundan <lundman@lundman.net>.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_ZFS_IOCTL_COMPAT_H
+#define	_SYS_ZFS_IOCTL_COMPAT_H
+
+#include <sys/cred.h>
+#include <sys/dmu.h>
+#include <sys/zio.h>
+#include <sys/dsl_deleg.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/ioccom.h>
+
+#ifdef _KERNEL
+#include <sys/nvpair.h>
+#endif  /* _KERNEL */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Backwards ioctl compatibility
+ */
+
+/* ioctl versions for vfs.zfs.version.ioctl */
+#define	ZFS_IOCVER_UNDEF	-1
+#define	ZFS_IOCVER_NONE		0
+#define	ZFS_IOCVER_1_9_4	1
+#define	ZFS_IOCVER_ZOF		15
+
+/* compatibility conversion flag */
+#define	ZFS_CMD_COMPAT_NONE	0
+#define	ZFS_CMD_COMPAT_V15	1
+#define	ZFS_CMD_COMPAT_V28	2
+
+#define	ZFS_IOC_COMPAT_PASS	254
+#define	ZFS_IOC_COMPAT_FAIL	255
+
+#define	ZFS_IOCREQ(ioreq)	((ioreq) & 0xff)
+
+typedef struct zfs_iocparm {
+	uint32_t	zfs_ioctl_version;
+	uint64_t	zfs_cmd;
+	uint64_t	zfs_cmd_size;
+
+	/*
+	 * ioctl() return codes can not be used to communicate -
+	 * as XNU will skip copyout() if there is an error, so it
+	 * is passed along in this wrapping structure.
+	 */
+	int			zfs_ioc_error;	/* ioctl error value */
+} zfs_iocparm_t;
+
+typedef struct zfs_cmd_1_9_4
+{
+	char		zc_name[MAXPATHLEN];	/* name of pool or dataset */
+	uint64_t	zc_nvlist_src;		/* really (char *) */
+	uint64_t	zc_nvlist_src_size;
+	uint64_t	zc_nvlist_dst;		/* really (char *) */
+	uint64_t	zc_nvlist_dst_size;
+	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */
+	int		zc_pad2;
+
+	/*
+	 * The following members are for legacy ioctls which haven't been
+	 * converted to the new method.
+	 */
+	uint64_t	zc_history;		/* really (char *) */
+	char		zc_value[MAXPATHLEN * 2];
+	char		zc_string[MAXNAMELEN];
+	uint64_t	zc_guid;
+	uint64_t	zc_nvlist_conf;		/* really (char *) */
+	uint64_t	zc_nvlist_conf_size;
+	uint64_t	zc_cookie;
+	uint64_t	zc_objset_type;
+	uint64_t	zc_perm_action;
+	uint64_t	zc_history_len;
+	uint64_t	zc_history_offset;
+	uint64_t	zc_obj;
+	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
+	zfs_share_t	zc_share;
+	dmu_objset_stats_t zc_objset_stats;
+	struct drr_begin zc_begin_record;
+	zinject_record_t zc_inject_record;
+	uint32_t	zc_defer_destroy;
+	uint32_t	zc_flags;
+	uint64_t	zc_action_handle;
+	int		zc_cleanup_fd;
+	uint8_t		zc_simple;
+	uint8_t		zc_pad3[3];
+	boolean_t	zc_resumable;
+	uint32_t	zc_pad4;
+	uint64_t	zc_sendobj;
+	uint64_t	zc_fromobj;
+	uint64_t	zc_createtxg;
+	zfs_stat_t	zc_stat;
+    int		zc_ioc_error; /* ioctl error value */
+	uint64_t	zc_dev;	/* OSX doesn't have ddi_driver_major */
+} zfs_cmd_1_9_4_t;
+
+// Figure this out
+unsigned static long zfs_ioctl_1_9_4[] =
+{
+	// ZFS_IOC_POOL_CREATE = _IOWR('Z', 0, struct zfs_cmd),
+
+	0,  /*  0 ZFS_IOC_POOL_CREATE */
+	1,  /*  1 ZFS_IOC_POOL_DESTROY */
+	2,  /*  2 ZFS_IOC_POOL_IMPORT */
+	3,  /*  3 ZFS_IOC_POOL_EXPORT */
+	4,  /*  4 ZFS_IOC_POOL_CONFIGS */
+	5,  /*  5 ZFS_IOC_POOL_STATS */
+	6,  /*  6 ZFS_IOC_POOL_TRYIMPORT */
+	7,  /*  7 ZFS_IOC_POOL_SCRUB */
+	8,  /*  8 ZFS_IOC_POOL_FREEZE */
+	9,  /*  9 ZFS_IOC_POOL_UPGRADE */
+	10, /* 10 ZFS_IOC_POOL_GET_HISTORY */
+	11, /* 11 ZFS_IOC_VDEV_ADD */
+	12, /* 12 ZFS_IOC_VDEV_REMOVE */
+	13, /* 13 ZFS_IOC_VDEV_SET_STATE */
+	14, /* 14 ZFS_IOC_VDEV_ATTACH */
+	15, /* 15 ZFS_IOC_VDEV_DETACH */
+	16, /* 16 ZFS_IOC_VDEV_SETPATH */
+	18, /* 17 ZFS_IOC_OBJSET_STATS */
+	19, /* 18 ZFS_IOC_OBJSET_ZPLPROPS */
+	20, /* 19 ZFS_IOC_DATASET_LIST_NEXT */
+	21, /* 20 ZFS_IOC_SNAPSHOT_LIST_NEXT */
+	22, /* 21 ZFS_IOC_SET_PROP */
+	ZFS_IOC_COMPAT_PASS, /* 22 ZFS_IOC_CREATE_MINOR */
+	ZFS_IOC_COMPAT_PASS, /* 23 ZFS_IOC_REMOVE_MINOR */
+	23, /* 24 ZFS_IOC_CREATE */
+	24, /* 25 ZFS_IOC_DESTROY */
+	25, /* 26 ZFS_IOC_ROLLBACK */
+	26, /* 27 ZFS_IOC_RENAME */
+	27, /* 28 ZFS_IOC_RECV */
+	28, /* 29 ZFS_IOC_SEND */
+	29, /* 30 ZFS_IOC_INJECT_FAULT */
+	30, /* 31 ZFS_IOC_CLEAR_FAULT */
+	31, /* 32 ZFS_IOC_INJECT_LIST_NEXT */
+	32, /* 33 ZFS_IOC_ERROR_LOG */
+	33, /* 34 ZFS_IOC_CLEAR */
+	34, /* 35 ZFS_IOC_PROMOTE */
+	35, /* 36 ZFS_IOC_DESTROY_SNAPS */
+	36, /* 37 ZFS_IOC_SNAPSHOT */
+	37, /* 38 ZFS_IOC_DSOBJ_TO_DSNAME */
+	38, /* 39 ZFS_IOC_OBJ_TO_PATH */
+	39, /* 40 ZFS_IOC_POOL_SET_PROPS */
+	40, /* 41 ZFS_IOC_POOL_GET_PROPS */
+	41, /* 42 ZFS_IOC_SET_FSACL */
+	42, /* 43 ZFS_IOC_GET_FSACL */
+	ZFS_IOC_COMPAT_PASS, /* 44 ZFS_IOC_ISCSI_PERM_CHECK */
+	43, /* 45 ZFS_IOC_SHARE */
+	44, /* 46 ZFS_IOC_IHNERIT_PROP */
+	58, /* 47 ZFS_IOC_JAIL */
+	59, /* 48 ZFS_IOC_UNJAIL */
+	45, /* 49 ZFS_IOC_SMB_ACL */
+	46, /* 50 ZFS_IOC_USERSPACE_ONE */
+	47, /* 51 ZFS_IOC_USERSPACE_MANY */
+	48, /* 52 ZFS_IOC_USERSPACE_UPGRADE */
+	17, /* 53 ZFS_IOC_SETFRU */
+};
+
+#ifdef _KERNEL
+int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
+void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int);
+nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
+nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int,
+    const int);
+#endif	/* _KERNEL */
+void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int);
+void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int);
+
+int	wrap_avl_init(void);
+int	wrap_unicode_init(void);
+int	wrap_nvpair_init(void);
+int	wrap_zcommon_init(void);
+int	wrap_icp_init(void);
+int	wrap_lua_init(void);
+void wrap_avl_fini(void);
+void wrap_unicode_fini(void);
+void wrap_nvpair_fini(void);
+void wrap_zcommon_fini(void);
+void wrap_icp_fini(void);
+void wrap_lua_fini(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_IOCTL_COMPAT_H */
diff --git a/include/os/macos/zfs/sys/zfs_mount.h b/include/os/macos/zfs/sys/zfs_mount.h
new file mode 100644
index 0000000000..b69229c929
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_mount.h
@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _SYS_ZFS_MOUNT_H_
+#define	_SYS_ZFS_MOUNT_H_
+
+struct zfs_mount_args {
+	const char	*fspec;
+	int			mflag;
+	char		*optptr;
+	int			optlen;
+	int			struct_size;
+};
+
+/*
+ * Flag bits passed to mount(2).
+ */
+#define	MS_RDONLY	0x0001	/* Read-only */
+#define	MS_FSS		0x0002	/* Old (4-argument) mount (compatibility) */
+#define	MS_DATA		0x0004	/* 6-argument mount */
+#define	MS_NOSUID	0x0010	/* Setuid programs disallowed */
+#define	MS_REMOUNT	0x0020	/* Remount */
+#define	MS_NOTRUNC	0x0040	/* Return ENAMETOOLONG for long filenames */
+#define	MS_OVERLAY	0x0080	/* Allow overlay mounts */
+#define	MS_OPTIONSTR	0x0100	/* Data is a an in/out option string */
+#define	MS_GLOBAL	0x0200	/* Clustering: Mount into global name space */
+#define	MS_FORCE	0x0400	/* Forced unmount */
+#define	MS_NOMNTTAB	0x0800	/* Don't show mount in mnttab */
+/*
+ * Additional flag bits that domount() is prepared to interpret, but that
+ * can't be passed through mount(2).
+ */
+#define	MS_SYSSPACE	0x0008	/* Mounta already in kernel space */
+#define	MS_NOSPLICE	0x1000	/* Don't splice fs instance into name space */
+#define	MS_NOCHECK	0x2000	/* Clustering: suppress mount busy checks */
+/*
+ * Mask to sift out flag bits allowable from mount(2).
+ */
+#define	MS_MASK	\
+	(MS_RDONLY|MS_FSS|MS_DATA|MS_NOSUID|MS_REMOUNT|MS_NOTRUNC|MS_OVERLAY|\
+	    MS_OPTIONSTR|MS_GLOBAL|MS_NOMNTTAB)
+
+/*
+ * Mask to sift out flag bits allowable from umount2(2).
+ */
+
+#define	MS_UMOUNT_MASK	(MS_FORCE)
+
+/*
+ * Maximum option string length accepted or returned by mount(2).
+ */
+#define	MAX_MNTOPT_STR	1024	/* max length of mount options string */
+
+
+#endif	/* _SYS_ZFS_IOCTL_H */
diff --git a/include/os/macos/zfs/sys/zfs_vfsops.h b/include/os/macos/zfs/sys/zfs_vfsops.h
new file mode 100644
index 0000000000..0e36b2fad4
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_vfsops.h
@@ -0,0 +1,291 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_FS_ZFS_VFSOPS_H
+#define	_SYS_FS_ZFS_VFSOPS_H
+
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/list.h>
+#include <sys/vfs.h>
+#include <sys/zil.h>
+#include <sys/sa.h>
+#include <sys/rrwlock.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zfs_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct zfs_sb;
+struct znode;
+
+#ifdef __APPLE__
+#define	APPLE_SA_RECOVER
+/* #define	WITH_SEARCHFS */
+/* #define	WITH_READDIRATTR */
+#define	HAVE_NAMED_STREAMS 1
+#define	HAVE_PAGEOUT_V2 1
+#define	HIDE_TRIVIAL_ACL 1
+#endif
+
+/*
+ * Status of the zfs_unlinked_drain thread.
+ */
+typedef enum drain_state {
+	ZFS_DRAIN_SHUTDOWN = 0,
+	ZFS_DRAIN_RUNNING,
+    ZFS_DRAIN_SHUTDOWN_REQ
+} drain_state_t;
+
+
+typedef struct zfsvfs zfsvfs_t;
+
+struct zfsvfs {
+	vfs_t	*z_vfs;	/* generic fs struct */
+	zfsvfs_t	*z_parent;	/* parent fs */
+	objset_t	*z_os;	/* objset reference */
+	uint64_t	z_root;	/* id of root znode */
+	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
+	uint64_t	z_max_blksz;	/* maximum block size for files */
+	uint64_t	z_fuid_obj;	/* fuid table object number */
+	uint64_t	z_fuid_size;	/* fuid table size */
+	avl_tree_t	z_fuid_idx;	/* fuid tree keyed by index */
+	avl_tree_t	z_fuid_domain;	/* fuid tree keyed by domain */
+	krwlock_t	z_fuid_lock;	/* fuid lock */
+	boolean_t	z_fuid_loaded;	/* fuid tables are loaded */
+	boolean_t	z_fuid_dirty;	/* need to sync fuid table ? */
+	struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
+	uint64_t	z_assign;	/* TXG_NOWAIT or set by zil_replay() */
+	zilog_t	*z_log;	/* intent log pointer */
+	uint_t	z_acl_mode;	/* acl chmod/mode behavior */
+	uint_t	z_acl_inherit;	/* acl inheritance behavior */
+	zfs_case_t	z_case;	/* case-sense */
+	boolean_t	z_utf8;	/* utf8-only */
+	int	z_norm;	/* normalization flags */
+	boolean_t	z_atime;	/* enable atimes mount option */
+	boolean_t	z_unmounted;	/* unmounted */
+	rrmlock_t	z_teardown_lock;
+	krwlock_t	z_teardown_inactive_lock;
+	list_t	z_all_znodes;	/* all vnodes in the fs */
+	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
+	struct vnode	*z_ctldir;	/* .zfs directory pointer */
+	uint64_t	z_ctldir_startid;	/* Start of snapdir range */
+	boolean_t	z_show_ctldir; 	/* expose .zfs in the root dir */
+	boolean_t	z_issnap;	/* true if this is a snapshot */
+	boolean_t	z_vscan;	/* virus scan on/off */
+	boolean_t	z_use_fuids;	/* version allows fuids */
+	boolean_t	z_replay;	/* set during ZIL replay */
+	boolean_t	z_use_sa;	/* version allow system attributes */
+	boolean_t	z_xattr_sa;	/* allow xattrs to be stores as SA */
+	uint64_t	z_version;
+	uint64_t	z_shares_dir;	/* hidden shares dir */
+	kmutex_t	z_lock;
+
+	/* for controlling async zfs_unlinked_drain */
+	kmutex_t	z_drain_lock;
+	kcondvar_t	z_drain_cv;
+	drain_state_t	z_drain_state;
+
+	uint64_t	z_userquota_obj;
+	uint64_t	z_groupquota_obj;
+	uint64_t	z_userobjquota_obj;
+	uint64_t	z_groupobjquota_obj;
+	uint64_t	z_projectquota_obj;
+	uint64_t	z_projectobjquota_obj;
+
+#ifdef __APPLE__
+	dev_t	z_rdev;	/* proxy device for mount */
+	boolean_t	z_rdonly;	/* is mount read-only? */
+	time_t	z_mount_time;	/* mount timestamp (for Spotlight) */
+	time_t	z_last_unmount_time;	/* unmount timestamp (for Spotlight) */
+	boolean_t	z_xattr;	/* enable atimes mount option */
+
+	avl_tree_t	z_hardlinks;	/* linkid hash avl tree for vget */
+	avl_tree_t	z_hardlinks_linkid;	/* sorted on linkid */
+	krwlock_t	z_hardlinks_lock;	/* lock to access z_hardlinks */
+
+	uint64_t	z_notification_conditions; /* HFSIOC_VOLUME_STATUS */
+	uint64_t	z_freespace_notify_warninglimit;
+	uint64_t	z_freespace_notify_dangerlimit;
+	uint64_t	z_freespace_notify_desiredlevel;
+
+	void	*z_devdisk; /* Hold fake disk if prop devdisk is on */
+
+	uint64_t	z_findernotify_space;
+
+#endif
+	uint64_t	z_replay_eof;	/* New end of file - replay only */
+	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
+
+	uint64_t	z_hold_size;	/* znode hold array size */
+	avl_tree_t	*z_hold_trees;	/* znode hold trees */
+	kmutex_t	*z_hold_locks;	/* znode hold locks */
+	taskqid_t	z_drain_task;	/* task id for the unlink drain task */
+};
+#define	ZFS_OBJ_MTX_SZ	64
+
+#ifdef __APPLE__
+struct hardlinks_struct {
+	avl_node_t hl_node;
+	avl_node_t hl_node_linkid;
+	uint64_t hl_parent;	// parentid of entry
+	uint64_t hl_fileid;	// the fileid (z_id) for vget
+	uint32_t hl_linkid;	// the linkid, persistent over renames
+	char hl_name[PATH_MAX]; // cached name for vget
+};
+typedef struct hardlinks_struct hardlinks_t;
+
+int zfs_vfs_uuid_unparse(uuid_t uuid, char *dst);
+int zfs_vfs_uuid_gen(const char *osname, uuid_t uuid);
+#endif
+
+
+#define	ZFS_SUPER_MAGIC	0x2fc12fc1
+
+#define	ZSB_XATTR	0x0001		/* Enable user xattrs */
+
+/*
+ * Normal filesystems (those not under .zfs/snapshot) have a total
+ * file ID size limited to 12 bytes (including the length field) due to
+ * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
+ * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
+ * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
+ * is not possible to expand beyond 12 bytes without abandoning support
+ * of NFSv2.
+ *
+ * For normal filesystems, we partition up the available space as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *
+ * We reserve only 48 bits for the object number, as this is the limit
+ * currently defined and imposed by the DMU.
+ */
+typedef struct zfid_short {
+	uint16_t	zf_len;
+	uint8_t		zf_object[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_gen[4];		/* gen[i] = gen >> (8 * i) */
+} zfid_short_t;
+
+/*
+ * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
+ * (including the length field).  This makes files under .zfs/snapshot
+ * accessible by NFSv3 and NFSv4, but not NFSv2.
+ *
+ * For files under .zfs/snapshot, we partition up the available space
+ * as follows:
+ *	2 bytes		fid length (required)
+ *	6 bytes		object number (48 bits)
+ *	4 bytes		generation number (32 bits)
+ *	6 bytes		objset id (48 bits)
+ *	4 bytes		currently just zero (32 bits)
+ *
+ * We reserve only 48 bits for the object number and objset id, as these are
+ * the limits currently defined and imposed by the DMU.
+ */
+typedef struct zfid_long {
+	zfid_short_t	z_fid;
+	uint8_t		zf_setid[6];		/* obj[i] = obj >> (8 * i) */
+	uint8_t		zf_setgen[4];		/* gen[i] = gen >> (8 * i) */
+} zfid_long_t;
+
+#define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
+#define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
+
+extern uint_t zfs_fsyncer_key;
+
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t *valuep);
+extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
+extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t quota);
+extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *,
+    boolean_t isgroup);
+extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
+    uint64_t fuid);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
+
+extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop,
+    uint64_t *value);
+
+extern int zfs_sb_create(const char *name, zfsvfs_t **zfsvfsp);
+extern int zfs_sb_setup(zfsvfs_t *zfsvfs, boolean_t mounting);
+extern void zfs_sb_free(zfsvfs_t *zfsvfs);
+extern int zfs_check_global_label(const char *dsname, const char *hexsl);
+extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
+
+
+
+
+extern int  zfs_vfs_init(struct vfsconf *vfsp);
+extern int  zfs_vfs_start(struct mount *mp, int flags, vfs_context_t context);
+extern int  zfs_vfs_mount(struct mount *mp, vnode_t *devvp,
+    user_addr_t data, vfs_context_t context);
+extern int  zfs_vfs_unmount(struct mount *mp, int mntflags,
+    vfs_context_t context);
+extern int  zfs_vfs_root(struct mount *mp, vnode_t **vpp,
+    vfs_context_t context);
+extern int  zfs_vfs_vget(struct mount *mp, ino64_t ino, vnode_t **vpp,
+    vfs_context_t context);
+extern int  zfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap,
+    vfs_context_t context);
+extern int  zfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap,
+    vfs_context_t context);
+extern int  zfs_vfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
+extern int  zfs_vfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp,
+    vnode_t **vpp, vfs_context_t context);
+extern int  zfs_vfs_vptofh(vnode_t *vp, int *fhlenp, unsigned char *fhp,
+    vfs_context_t context);
+extern int  zfs_vfs_sysctl(int *name, uint_t namelen, user_addr_t oldp,
+    size_t *oldlenp,  user_addr_t newp, size_t newlen, vfs_context_t context);
+extern int  zfs_vfs_quotactl(struct mount *mp, int cmds, uid_t uid,
+    caddr_t datap, vfs_context_t context);
+extern int  zfs_vfs_mountroot(struct mount *mp, struct vnode *vp,
+    vfs_context_t context);
+
+extern void zfs_init(void);
+extern void zfs_fini(void);
+
+extern int  zfs_vnode_lock(vnode_t *vp, int flags);
+extern void zfs_freevfs(struct mount *vfsp);
+
+extern int  zfsvfs_create(const char *name, boolean_t rd, zfsvfs_t **zfvp);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
+
+extern int zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop,
+    uint64_t *val, char *setpoint);
+
+extern int zfs_end_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_VFSOPS_H */
diff --git a/include/os/macos/zfs/sys/zfs_vnops.h b/include/os/macos/zfs/sys/zfs_vnops.h
new file mode 100644
index 0000000000..6d7df203ff
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_vnops.h
@@ -0,0 +1,255 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_FS_ZFS_VNOPS_H
+#define	_SYS_FS_ZFS_VNOPS_H
+
+#include <sys/vnode.h>
+#include <sys/xvattr.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/fcntl.h>
+#include <sys/pathname.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Spotlight specific fcntl()'s
+ */
+
+// Older defines
+#define	SPOTLIGHT_GET_MOUNT_TIME	(FCNTL_FS_SPECIFIC_BASE + 0x00002)
+#define	SPOTLIGHT_GET_UNMOUNT_TIME	(FCNTL_FS_SPECIFIC_BASE + 0x00003)
+
+// Newer defines, will these need a OSX version test to compile on older?
+#define	SPOTLIGHT_IOC_GET_MOUNT_TIME	_IOR('h', 18, u_int32_t)
+#define	SPOTLIGHT_FSCTL_GET_MOUNT_TIME	\
+    IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME)
+#define	SPOTLIGHT_IOC_GET_LAST_MTIME	_IOR('h', 19, u_int32_t)
+#define	SPOTLIGHT_FSCTL_GET_LAST_MTIME	\
+    IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME)
+
+/*
+ * Account for user timespec structure differences
+ */
+#ifdef ZFS_LEOPARD_ONLY
+typedef struct timespec		timespec_user32_t;
+typedef struct user_timespec	timespec_user64_t;
+#else
+typedef struct user32_timespec	timespec_user32_t;
+typedef struct user64_timespec	timespec_user64_t;
+#endif
+
+#define	UNKNOWNUID ((uid_t)99)
+#define	UNKNOWNGID ((gid_t)99)
+
+#define	DTTOVT(dtype)   (iftovt_tab[(dtype)])
+#define	kTextEncodingMacUnicode	0x7e
+#define	ZAP_AVENAMELEN  (ZAP_MAXNAMELEN / 4)
+
+/* Finder information */
+struct finderinfo {
+	u_int32_t  fi_type;	/* files only */
+	u_int32_t  fi_creator;	/* files only */
+	u_int16_t  fi_flags;
+	struct {
+		int16_t  v;
+		int16_t  h;
+	} fi_location;
+	int8_t  fi_opaque[18];
+} __attribute__((aligned(2), packed));
+typedef struct finderinfo finderinfo_t;
+
+enum {
+	/* Finder Flags */
+	kHasBeenInited		= 0x0100,
+	kHasCustomIcon		= 0x0400,
+	kIsStationery		= 0x0800,
+	kNameLocked		= 0x1000,
+	kHasBundle		= 0x2000,
+	kIsInvisible		= 0x4000,
+	kIsAlias		= 0x8000
+};
+
+/* Attribute packing information */
+typedef struct attrinfo {
+	struct attrlist *ai_attrlist;
+	void **ai_attrbufpp;
+	void **ai_varbufpp;
+	void *ai_varbufend;
+	vfs_context_t ai_context;
+} attrinfo_t;
+
+/*
+ * Attributes that we can get for free from the zap (ie without a znode)
+ */
+#define	ZFS_DIR_ENT_ATTRS ( \
+	    ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | \
+	    ATTR_CMN_OBJTYPE | ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | \
+	    ATTR_CMN_OBJPERMANENTID | ATTR_CMN_SCRIPT | \
+	    ATTR_CMN_FILEID)
+
+/*
+ * Attributes that we support
+ */
+#define	ZFS_ATTR_BIT_MAP_COUNT  5
+
+#define	ZFS_ATTR_CMN_VALID (                                    \
+	    ATTR_CMN_NAME | ATTR_CMN_DEVID  | ATTR_CMN_FSID |       \
+	    ATTR_CMN_OBJTYPE | ATTR_CMN_OBJTAG | ATTR_CMN_OBJID |   \
+	    ATTR_CMN_OBJPERMANENTID | ATTR_CMN_PAROBJID |           \
+	    ATTR_CMN_SCRIPT | ATTR_CMN_CRTIME | ATTR_CMN_MODTIME |  \
+	    ATTR_CMN_CHGTIME | ATTR_CMN_ACCTIME |                   \
+	    ATTR_CMN_BKUPTIME | ATTR_CMN_FNDRINFO |                 \
+	    ATTR_CMN_OWNERID | ATTR_CMN_GRPID |                     \
+	    ATTR_CMN_ACCESSMASK | ATTR_CMN_FLAGS |                  \
+	    ATTR_CMN_USERACCESS | ATTR_CMN_FILEID |                 \
+	    ATTR_CMN_PARENTID)
+
+#define	ZFS_ATTR_DIR_VALID (                            \
+	    ATTR_DIR_LINKCOUNT | ATTR_DIR_ENTRYCOUNT |      \
+	    ATTR_DIR_MOUNTSTATUS)
+
+#define	ZFS_ATTR_FILE_VALID (                            \
+	    ATTR_FILE_LINKCOUNT |ATTR_FILE_TOTALSIZE |       \
+	    ATTR_FILE_ALLOCSIZE | ATTR_FILE_IOBLOCKSIZE |    \
+	    ATTR_FILE_DEVTYPE | ATTR_FILE_DATALENGTH |       \
+	    ATTR_FILE_DATAALLOCSIZE | ATTR_FILE_RSRCLENGTH | \
+	    ATTR_FILE_RSRCALLOCSIZE)
+
+extern int zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags);
+extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
+	znode_t **zpp, cred_t *cr, int flags, vsecattr_t *vsecp);
+extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
+	cred_t *cr, int flags);
+extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
+extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
+	char *tnm, cred_t *cr, int flags);
+extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap,
+	char *link, znode_t **zpp, cred_t *cr, int flags);
+extern int zfs_link(znode_t *tdzp, znode_t *sp,
+	char *name, cred_t *cr, int flags);
+extern int zfs_space(znode_t *zp, int cmd, struct flock *bfp, int flag,
+	offset_t offset, cred_t *cr);
+extern int zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
+	int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp);
+extern int zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag,
+	cred_t *cr);
+extern int zfs_write_simple(znode_t *zp, const void *data, size_t len,
+	loff_t pos, size_t *resid);
+
+extern int zfs_open(struct vnode *ip, int mode, int flag, cred_t *cr);
+extern int zfs_close(struct vnode *ip, int flag, cred_t *cr);
+extern int zfs_read(struct vnode *ip, uio_t *uio, int ioflag, cred_t *cr);
+extern int zfs_write(struct vnode *ip, uio_t *uio, int ioflag, cred_t *cr);
+extern int zfs_lookup(znode_t *dzp, char *nm, znode_t **zpp,
+    int flags, cred_t *cr, int *direntflags, struct componentname *realpnp);
+extern int zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag,
+    cred_t *cred, int *rvalp, caller_context_t *ct);
+extern int zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
+    int flags, int *a_numdirent);
+extern int zfs_fsync(znode_t *zp, int syncflag, cred_t *cr);
+extern int zfs_getattr(vnode_t *vp, vattr_t *vap, int flags,
+    cred_t *cr, caller_context_t *ct);
+extern int zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr);
+
+extern int zfs_access(struct vnode *ip, int mode, int flag, cred_t *cr);
+extern void   zfs_inactive(vnode_t *vp);
+
+/* zfs_vops_osx.c calls */
+extern int    zfs_znode_getvnode(znode_t *zp, zfsvfs_t *zfsvfs);
+
+extern void   getnewvnode_reserve(int num);
+extern void   getnewvnode_drop_reserve(void);
+extern int    zfs_vfsops_init(void);
+extern int    zfs_vfsops_fini(void);
+extern int    zfs_znode_asyncgetvnode(znode_t *zp, zfsvfs_t *zfsvfs);
+extern void   zfs_znode_asyncput(znode_t *zp);
+extern int    zfs_znode_asyncwait(znode_t *zp);
+
+/* zfs_vnops_osx_lib calls */
+extern int    zfs_ioflags(int ap_ioflag);
+extern int    zfs_getattr_znode_unlocked(struct vnode *vp, vattr_t *vap);
+extern int    ace_trivial_common(void *acep, int aclcnt,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+	    uint16_t *, uint16_t *, uint32_t *));
+extern void   acl_trivial_access_masks(mode_t mode, boolean_t isdir,
+    trivial_acl_t *masks);
+extern int    zpl_obtain_xattr(struct znode *, const char *name, mode_t mode,
+    cred_t *cr, struct vnode **vpp, int flag);
+
+extern void  commonattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp,
+    const char *name, ino64_t objnum, enum vtype vtype,
+    boolean_t user64);
+extern void  dirattrpack(attrinfo_t *aip, znode_t *zp);
+extern void  fileattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp);
+extern void  nameattrpack(attrinfo_t *aip, const char *name, int namelen);
+extern int   getpackedsize(struct attrlist *alp, boolean_t user64);
+extern void  getfinderinfo(znode_t *zp, cred_t *cr, finderinfo_t *fip);
+extern uint32_t getuseraccess(znode_t *zp, vfs_context_t ctx);
+extern void  finderinfo_update(uint8_t *finderinfo, znode_t *zp);
+extern int   zpl_xattr_set_sa(struct vnode *vp, const char *name,
+    const void *value, size_t size, int flags, cred_t *cr);
+extern int zpl_xattr_get_sa(struct vnode *vp, const char *name, void *value,
+    size_t size);
+extern void zfs_zrele_async(znode_t *zp);
+
+/*
+ * OSX ACL Helper funcions
+ *
+ * OSX uses 'guids' for the 'who' part of ACLs, and uses a 'well known'
+ * binary sequence to signify the special rules of "owner", "group" and
+ * "everybody". We translate between this "well-known" guid and ZFS'
+ * flags ACE_OWNER, ACE_GROUP and ACE_EVERYBODY.
+ *
+ */
+#define	KAUTH_WKG_NOT	0	/* not a well-known GUID */
+#define	KAUTH_WKG_OWNER	1
+#define	KAUTH_WKG_GROUP	2
+#define	KAUTH_WKG_NOBODY	3
+#define	KAUTH_WKG_EVERYBODY	4
+
+extern int kauth_wellknown_guid(guid_t *guid);
+extern void aces_from_acl(ace_t *aces, int *nentries, struct kauth_acl *k_acl,
+    int *seen_type);
+extern void nfsacl_set_wellknown(int wkg, guid_t *guid);
+extern int  zfs_addacl_trivial(znode_t *zp, ace_t *aces, int *nentries,
+    int seen_type);
+
+extern struct vnodeopv_desc zfs_dvnodeop_opv_desc;
+extern struct vnodeopv_desc zfs_fvnodeop_opv_desc;
+extern struct vnodeopv_desc zfs_symvnodeop_opv_desc;
+extern struct vnodeopv_desc zfs_xdvnodeop_opv_desc;
+extern struct vnodeopv_desc zfs_evnodeop_opv_desc;
+extern struct vnodeopv_desc zfs_fifonodeop_opv_desc;
+extern struct vnodeopv_desc zfs_ctldir_opv_desc;
+extern int (**zfs_ctldirops)(void *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_FS_ZFS_VNOPS_H */
diff --git a/include/os/macos/zfs/sys/zfs_znode_impl.h b/include/os/macos/zfs/sys/zfs_znode_impl.h
new file mode 100644
index 0000000000..fa53ac8d6e
--- /dev/null
+++ b/include/os/macos/zfs/sys/zfs_znode_impl.h
@@ -0,0 +1,230 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef	_MACOS_ZFS_SYS_ZNODE_IMPL_H
+#define	_MACOS_ZFS_SYS_ZNODE_IMPL_H
+
+#include <sys/list.h>
+#include <sys/dmu.h>
+#include <sys/sa.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/rrwlock.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_acl.h>
+#include <sys/zil.h>
+#include <sys/zfs_project.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	ZFS_UIMMUTABLE	0x0000001000000000ull // OSX
+#define	ZFS_UAPPENDONLY	0x0000004000000000ull // OSX
+
+// #define	ZFS_IMMUTABLE  (ZFS_UIMMUTABLE  | ZFS_SIMMUTABLE)
+// #define	ZFS_APPENDONLY (ZFS_UAPPENDONLY | ZFS_SAPPENDONLY)
+
+#define	ZFS_TRACKED	0x0010000000000000ull
+#define	ZFS_COMPRESSED	0x0020000000000000ull
+
+#define	ZFS_SIMMUTABLE	0x0040000000000000ull
+#define	ZFS_SAPPENDONLY	0x0080000000000000ull
+
+#define	SA_ZPL_ADDTIME(z)	z->z_attr_table[ZPL_ADDTIME]
+#define	SA_ZPL_DOCUMENTID(z)	z->z_attr_table[ZPL_DOCUMENTID]
+
+#define	ZGET_FLAG_UNLINKED	(1<<0) /* Also lookup unlinked */
+#define	ZGET_FLAG_ASYNC		(1<<3) /* taskq the vnode_create call */
+
+extern int zfs_zget_ext(zfsvfs_t *zfsvfs, uint64_t obj_num,
+	struct znode **zpp,	int flags);
+
+
+/*
+ * Directory entry locks control access to directory entries.
+ * They are used to protect creates, deletes, and renames.
+ * Each directory znode has a mutex and a list of locked names.
+ */
+#define	ZNODE_OS_FIELDS		\
+	struct zfsvfs	*z_zfsvfs;	\
+	struct vnode	*z_vnode;	\
+	uint64_t		z_uid;	\
+	uint64_t		z_gid;	\
+	uint64_t		z_gen;	\
+	uint64_t		z_atime[2];	\
+	uint64_t		z_links;	\
+	uint32_t		z_vid;	\
+	uint32_t		z_document_id;	\
+	uint64_t		z_finder_parentid;	\
+	boolean_t		z_finder_hardlink;	\
+	uint64_t		z_write_gencount;	\
+	char			z_name_cache[MAXPATHLEN];	\
+	boolean_t		z_skip_truncate_undo_decmpfs;	\
+	taskq_ent_t		z_attach_taskq;	\
+	kcondvar_t		z_attach_cv;	\
+	kmutex_t		z_attach_lock;	\
+	hrtime_t		z_snap_mount_time;	\
+	krwlock_t		z_map_lock;
+
+#define	ZFS_LINK_MAX	UINT64_MAX
+
+/*
+ * ZFS minor numbers can refer to either a control device instance or
+ * a zvol. Depending on the value of zss_type, zss_data points to either
+ * a zvol_state_t or a zfs_onexit_t.
+ */
+enum zfs_soft_state_type {
+	ZSST_ZVOL,
+	ZSST_CTLDEV
+};
+
+typedef struct zfs_soft_state {
+	enum zfs_soft_state_type zss_type;
+	void *zss_data;
+} zfs_soft_state_t;
+
+extern minor_t zfsdev_minor_alloc(void);
+
+/*
+ * Convert between znode pointers and vnode pointers
+ */
+#define	ZTOV(ZP)		((ZP)->z_vnode)
+#define	ZTOI(ZP)		((ZP)->z_vnode)
+#define	VTOZ(VP)		((znode_t *)vnode_fsnode((VP)))
+#define	ITOZ(VP)		((znode_t *)vnode_fsnode((VP)))
+
+#define	VTOM(VP)		((mount_t *)vnode_mount((VP)))
+
+/* These are not used so far, VN_HOLD returncode must be checked. */
+#define	zhold(zp)		VN_HOLD(ZTOV(zp))
+#define	zrele(zp)		VN_RELE(ZTOV(zp))
+
+#define	ZTOZSB(zp)		((zp)->z_zfsvfs)
+#define	ITOZSB(vp)		((zfsvfs_t *)vfs_fsprivate(vnode_mount(vp)))
+#define	ZTOTYPE(zp)		(vnode_vtype(ZTOV(zp)))
+#define	ZTOGID(zp)		((zp)->z_gid)
+#define	ZTOUID(zp)		((zp)->z_uid)
+#define	ZTONLNK(zp)		((zp)->z_links)
+#define	Z_ISBLK(type)	((type) == VBLK)
+#define	Z_ISCHR(type)	((type) == VCHR)
+#define	Z_ISLNK(type)	((type) == VLNK)
+
+/* Called on entry to each ZFS inode and vfs operation. */
+#define	ZFS_ENTER_IFERROR(zfsvfs)	\
+	rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
+	if ((zfsvfs)->z_unmounted)
+
+#define	ZFS_ENTER_ERROR(zfsvfs, error)	\
+	do {	\
+		rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG);	\
+		if ((zfsvfs)->z_unmounted) {	\
+			ZFS_EXIT(zfsvfs);	\
+			return (error);	\
+		}	\
+	} while (0)
+
+#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
+#define	ZPL_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
+
+/* Must be called before exiting the operation. */
+#define	ZFS_EXIT(zfsvfs)	\
+	do {	\
+		rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);	\
+	} while (0)
+#define	ZPL_EXIT(zfsvfs)	ZFS_EXIT(zfsvfs)
+
+/* Verifies the znode is valid. */
+#define	ZFS_VERIFY_ZP_ERROR(zp, error)	\
+	do {	\
+		if ((zp)->z_sa_hdl == NULL) {	\
+			ZFS_EXIT(ZTOZSB(zp));	\
+			return (error);	\
+		}	\
+	} while (0)
+
+#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
+#define	ZPL_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
+
+/*
+ * Macros for dealing with dmu_buf_hold
+ */
+#define	ZFS_OBJ_MTX_SZ	64
+#define	ZFS_OBJ_MTX_MAX	(1024 * 1024)
+#define	ZFS_OBJ_HASH(zfsvfs, obj)	((obj) & ((zfsvfs->z_hold_size) - 1))
+
+extern unsigned int zfs_object_mutex_size;
+
+/* Encode ZFS stored time values from a struct timespec */
+#define	ZFS_TIME_ENCODE(tp, stmp)	\
+	{	\
+		(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
+		(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
+	}
+
+/* Decode ZFS stored time values to a struct timespec */
+#define	ZFS_TIME_DECODE(tp, stmp)	\
+	{	\
+	(tp)->tv_sec = (time_t)(stmp)[0];	\
+	(tp)->tv_nsec = (long)(stmp)[1];	\
+}
+
+#define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp)	\
+    if ((zfsvfs)->z_atime && !vfs_isrdonly(zfsvfs->z_vfs))	\
+		zfs_tstamp_update_setup_ext(zp, ACCESSED, NULL, NULL, B_FALSE);
+
+extern void	zfs_tstamp_update_setup_ext(struct znode *,
+    uint_t, uint64_t [2], uint64_t [2], boolean_t);
+extern void	zfs_tstamp_update_setup(struct znode *,
+    uint_t, uint64_t [2], uint64_t [2]);
+extern void zfs_znode_free(struct znode *);
+
+extern zil_get_data_t zfs_get_data;
+extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
+extern int zfsfstype;
+
+extern int zfs_znode_parent_and_name(struct znode *zp, struct znode **dzpp,
+    char *buf);
+extern uint32_t zfs_getbsdflags(struct znode *zp);
+extern void zfs_setattr_generate_id(struct znode *, uint64_t, char *name);
+
+extern int zfs_setattr_set_documentid(struct znode *zp,
+    boolean_t update_flags);
+
+/* Legacy macOS uses fnv_32a hash for hostid. */
+#define	FNV1_32A_INIT ((uint32_t)0x811c9dc5)
+uint32_t fnv_32a_str(const char *str, uint32_t hval);
+
+void zfs_setbsdflags(struct znode *, uint32_t bsdflags);
+uint32_t zfs_getbsdflags(struct znode *zp);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _MACOS_SYS_FS_ZFS_ZNODE_H */
diff --git a/include/os/macos/zfs/sys/zpl.h b/include/os/macos/zfs/sys/zpl.h
new file mode 100644
index 0000000000..5d391c6a96
--- /dev/null
+++ b/include/os/macos/zfs/sys/zpl.h
@@ -0,0 +1,27 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SYS_ZPL_H
+#define	_SYS_ZPL_H
+
+
+
+
+#endif // _SYS_ZPL_H
diff --git a/include/os/macos/zfs/sys/zvolIO.h b/include/os/macos/zfs/sys/zvolIO.h
new file mode 100644
index 0000000000..927840aa7b
--- /dev/null
+++ b/include/os/macos/zfs/sys/zvolIO.h
@@ -0,0 +1,142 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2016 Jorgen Lundman <lundman@lundman.net>
+ */
+
+#ifndef	ZVOLIO_H_INCLUDED
+#define	ZVOLIO_H_INCLUDED
+
+/* Linux polutes 'current' */
+#undef current
+
+#ifdef __cplusplus
+#include <IOKit/IOService.h>
+
+extern "C" {
+#endif /* __cplusplus */
+
+#include <sys/zvol.h>
+#include <sys/zvol_impl.h>
+
+struct iomem {
+	IOMemoryDescriptor *buf;
+};
+
+uint64_t zvolIO_kit_read(struct iomem *iomem, uint64_t offset,
+    char *address, uint64_t len);
+uint64_t zvolIO_kit_write(struct iomem *iomem, uint64_t offset,
+    char *address, uint64_t len);
+
+#ifdef __cplusplus
+} /* extern "C" */
+
+class net_lundman_zfs_zvol : public IOService
+{
+	OSDeclareDefaultStructors(net_lundman_zfs_zvol)
+
+private:
+
+public:
+	virtual bool init(OSDictionary* dictionary = NULL);
+	virtual void free(void);
+	virtual IOService* probe(IOService* provider, SInt32* score);
+	virtual bool start(IOService* provider);
+	virtual void stop(IOService* provider);
+
+	virtual bool handleOpen(IOService *client,
+	    IOOptionBits options, void *arg);
+	virtual bool handleIsOpen(const IOService *client) const;
+	virtual void handleClose(IOService *client,
+	    IOOptionBits options);
+	virtual bool isOpen(const IOService *forClient = 0) const;
+
+private:
+	OSSet *_openClients;
+};
+
+#include <IOKit/storage/IOBlockStorageDevice.h>
+
+class net_lundman_zfs_zvol_device : public IOBlockStorageDevice
+{
+	OSDeclareDefaultStructors(net_lundman_zfs_zvol_device)
+
+private:
+	// IOService *m_provider;
+	zvol_state_t *zv;
+
+public:
+	virtual bool init(zvol_state_t *c_zv,
+	    OSDictionary* properties = 0);
+
+	virtual bool attach(IOService* provider);
+	virtual void detach(IOService* provider);
+	virtual IOReturn doEjectMedia(void);
+	virtual IOReturn doFormatMedia(UInt64 byteCapacity);
+	virtual UInt32 doGetFormatCapacities(UInt64 * capacities,
+	    UInt32 capacitiesMaxCount) const;
+
+	virtual IOReturn doLockUnlockMedia(bool doLock);
+	virtual IOReturn doSynchronizeCache(void);
+	virtual char *getVendorString(void);
+	virtual char *getProductString(void);
+	virtual char *getRevisionString(void);
+	virtual char *getAdditionalDeviceInfoString(void);
+	virtual IOReturn reportBlockSize(UInt64 *blockSize);
+	virtual IOReturn reportEjectability(bool *isEjectable);
+	virtual IOReturn reportLockability(bool *isLockable);
+	virtual IOReturn reportMaxValidBlock(UInt64 *maxBlock);
+	virtual IOReturn reportMediaState(bool *mediaPresent,
+	    bool *changedState);
+
+	virtual IOReturn reportPollRequirements(bool *pollRequired,
+	    bool *pollIsExpensive);
+
+	virtual IOReturn reportRemovability(bool *isRemovable);
+	virtual IOReturn reportWriteProtection(bool *isWriteProtected);
+	virtual IOReturn getWriteCacheState(bool *enabled);
+	virtual IOReturn setWriteCacheState(bool enabled);
+	virtual IOReturn doAsyncReadWrite(IOMemoryDescriptor *buffer,
+	    UInt64 block, UInt64 nblks,
+	    IOStorageAttributes *attributes,
+	    IOStorageCompletion *completion);
+
+	virtual IOReturn doDiscard(UInt64 block, UInt64 nblks);
+	virtual IOReturn doUnmap(IOBlockStorageDeviceExtent *extents,
+	    UInt32 extentsCount, UInt32 options);
+
+	virtual bool handleOpen(IOService *client,
+	    IOOptionBits options, void *access);
+
+	virtual void handleClose(IOService *client,
+	    IOOptionBits options);
+
+	virtual int getBSDName(void);
+	virtual int renameDevice(void);
+	virtual int offlineDevice(void);
+	virtual int onlineDevice(void);
+	virtual int refreshDevice(void);
+
+	virtual void clearState(void);
+};
+#endif /* __cplusplus */
+
+#endif /* ZVOLIO_H_INCLUDED */
diff --git a/include/os/macos/zfs/sys/zvol_os.h b/include/os/macos/zfs/sys/zvol_os.h
new file mode 100644
index 0000000000..a5e5d86aa8
--- /dev/null
+++ b/include/os/macos/zfs/sys/zvol_os.h
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _SYS_ZVOL_OS_h
+#define	_SYS_ZVOL_OS_h
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* struct wrapper for IOKit class */
+typedef struct zvol_iokit zvol_iokit_t;
+typedef struct zvol_state zvol_state_t;
+struct iomem;
+
+struct zvol_state_os {
+	dev_t	zvo_dev;	/* device id */
+
+	zvol_iokit_t *zvo_iokitdev;	/* IOKit device */
+	uint64_t	zvo_openflags;	/* Remember flags used at open */
+	char 	zvo_bsdname[MAXPATHLEN]; /* /dev/diskX */
+};
+
+extern int zvol_os_ioctl(dev_t, unsigned long, caddr_t,
+    int isblk, cred_t *, int *rvalp);
+extern int zvol_os_open_zv(zvol_state_t *, int, int, struct proc *p);
+extern int zvol_os_open(dev_t dev, int flag, int otyp, struct proc *p);
+extern int zvol_os_close_zv(zvol_state_t *, int, int, struct proc *p);
+extern int zvol_os_close(dev_t dev, int flag, int otyp, struct proc *p);
+extern int zvol_os_read(dev_t dev, struct uio *uio, int p);
+extern int zvol_os_write(dev_t dev, struct uio *uio, int p);
+
+extern int zvol_os_read_zv(zvol_state_t *zv, uint64_t position,
+    uint64_t count, struct iomem *iomem);
+extern int zvol_os_write_zv(zvol_state_t *zv, uint64_t position,
+    uint64_t count, struct iomem *iomem);
+extern int zvol_os_unmap(zvol_state_t *zv, uint64_t off, uint64_t bytes);
+
+extern void zvol_os_strategy(struct buf *bp);
+extern int zvol_os_get_volume_blocksize(dev_t dev);
+
+extern void zvol_os_lock_zv(zvol_state_t *zv);
+extern void zvol_os_unlock_zv(zvol_state_t *zv);
+
+extern void *zvolRemoveDevice(zvol_iokit_t *iokitdev);
+extern int zvolRemoveDeviceTerminate(void *iokitdev);
+extern int zvolCreateNewDevice(zvol_state_t *zv);
+extern int zvolRegisterDevice(zvol_state_t *zv);
+
+extern int zvolRenameDevice(zvol_state_t *zv);
+extern int zvolSetVolsize(zvol_state_t *zv);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h
index 7ed2470268..7a80733882 100644
--- a/include/sys/abd_impl.h
+++ b/include/sys/abd_impl.h
@@ -59,7 +59,7 @@ struct abd {
 	union {
 		struct abd_scatter {
 			uint_t		abd_offset;
-#if defined(_KERNEL) && ( defined(__FreeBSD__) || defined(__APPLE__) )
+#if defined(_KERNEL) && (defined(__FreeBSD__) || defined(__APPLE__))
 			uint_t  abd_chunk_size;
 			void    *abd_chunks[];
 #else
@@ -134,9 +134,9 @@ void abd_iter_unmap(struct abd_iter *);
 #if defined(__FreeBSD__)
 #define	abd_enter_critical(flags)	critical_enter()
 #define	abd_exit_critical(flags)	critical_exit()
-#elif defined (__APPLE__)
-#define abd_enter_critical(flags)	(flags) = ml_set_interrupts_enabled(FALSE)
-#define abd_exit_critical(flags)	ml_set_interrupts_enabled((flags))
+#elif defined(__APPLE__)
+#define	abd_enter_critical(flags) (flags) = ml_set_interrupts_enabled(FALSE)
+#define	abd_exit_critical(flags) ml_set_interrupts_enabled((flags))
 #else
 #define	abd_enter_critical(flags)	local_irq_save(flags)
 #define	abd_exit_critical(flags)	local_irq_restore(flags)
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index a528340f9f..85d0f3b95e 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1238,7 +1238,7 @@ typedef enum zfs_ioc {
 	/*
 	 * Core features - 81/128 numbers reserved.
 	 */
-#if defined (__FreeBSD__) || defined(__APPLE__)
+#if defined(__FreeBSD__) || defined(__APPLE__)
 	ZFS_IOC_FIRST =	0,
 #else
 	ZFS_IOC_FIRST =	('Z' << 8),
diff --git a/include/sys/mntent.h b/include/sys/mntent.h
index cb463ce292..be62c53976 100644
--- a/include/sys/mntent.h
+++ b/include/sys/mntent.h
@@ -84,8 +84,8 @@
 #define	MNTOPT_NOSETUID	"nosetuid"	/* Set uid not allowed */
 #define	MNTOPT_BROWSE	"browse"	/* browsable autofs mount */
 #define	MNTOPT_NOBROWSE	"nobrowse"	/* non-browsable autofs mount */
-#define MNTOPT_OWNERS	"owners"	/* VFS will not ignore ownership information on filesystem objects */
-#define MNTOPT_NOOWNERS	"noowners"	/* VFS will ignore ownership information on filesystem objects */
+#define	MNTOPT_OWNERS	"owners"	/* use ownership */
+#define	MNTOPT_NOOWNERS	"noowners"	/* ignore ownership */
 #else
 #error "unknown OS"
 #endif
diff --git a/include/sys/sysevent/dev.h b/include/sys/sysevent/dev.h
index 1255a176d3..a41f0c0fe8 100644
--- a/include/sys/sysevent/dev.h
+++ b/include/sys/sysevent/dev.h
@@ -239,7 +239,7 @@ extern "C" {
 #define	DEV_INSTANCE		"instance"
 #define	DEV_PROP_PREFIX		"prop-"
 
-#if defined (__linux__) || defined (__APPLE__)
+#if defined(__linux__) || defined(__APPLE__)
 #define	DEV_IDENTIFIER		"devid"
 #define	DEV_PATH		"path"
 #define	DEV_IS_PART		"is_slice"
diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h
index dfac8ce4b6..b1517b9d3f 100644
--- a/include/sys/zfs_sa.h
+++ b/include/sys/zfs_sa.h
@@ -76,10 +76,11 @@ typedef enum zpl_attr {
 	ZPL_DXATTR,
 	ZPL_PROJID,
 
-	/* Apple defines a ADDEDTIME, which is the time the entry was placed in
-	 * the containing directory. Ie, CRTIME and updated when moved into
-	 * a different directory. This can be retrieved with getxattr "FinderInfo"
-	 * or the getattrlist() syscall.
+	/*
+	 * Apple defines a ADDEDTIME, which is the time the entry was placed
+	 * in the containing directory. Ie, CRTIME and updated when moved
+	 * into a different directory. This can be retrieved with getxattr
+	 * "FinderInfo" or the getattrlist() syscall.
 	 */
 	ZPL_ADDTIME,
 	ZPL_DOCUMENTID,
diff --git a/lib/libefi/rdwr_efi_macos.c b/lib/libefi/rdwr_efi_macos.c
index 9a1a64ebe4..c2ef128a8f 100644
--- a/lib/libefi/rdwr_efi_macos.c
+++ b/lib/libefi/rdwr_efi_macos.c
@@ -248,8 +248,8 @@ efi_get_info(int fd, struct dk_cinfo *dki_info)
 					dki_info->dki_partition = 0;
 			}
 			strlcpy(dki_info->dki_dname,
-				&pathbuf[5],
-				sizeof(dki_info->dki_dname));
+			    &pathbuf[5],
+			    sizeof (dki_info->dki_dname));
 		}
 
 		/*
@@ -1663,7 +1663,7 @@ isDeviceMatchForKeyAndSubstr(char *device, CFStringRef key, CFStringRef substr,
 
 	if ((error = setupDADiskSession(&ds, device)) == 0) {
 		CFDictionaryRef descDict = NULL;
-		if((descDict = DADiskCopyDescription(ds.disk)) != NULL) {
+		if ((descDict = DADiskCopyDescription(ds.disk)) != NULL) {
 			*isMatch =
 			    CFDictionaryValueIfPresentMatchesSubstring(descDict,
 			    key, substr);
@@ -1709,5 +1709,5 @@ osx_device_isvirtual(char *device)
 		    isCoreStorageLV,
 		    isVirtualInterface);
 
-	return (isCoreStorageLV || isVirtualInterface);
+	return (isCoreStorageLV /* || isVirtualInterface*/);
 }
diff --git a/lib/libspl/include/os/Makefile.am b/lib/libspl/include/os/Makefile.am
index 7b362e02ad..22495a05b7 100644
--- a/lib/libspl/include/os/Makefile.am
+++ b/lib/libspl/include/os/Makefile.am
@@ -5,3 +5,7 @@ endif
 if BUILD_LINUX
 SUBDIRS = linux
 endif
+
+if BUILD_MACOS
+SUBDIRS = macos
+endif
diff --git a/lib/libspl/include/os/macos/Makefile.am b/lib/libspl/include/os/macos/Makefile.am
new file mode 100644
index 0000000000..1d3c559bed
--- /dev/null
+++ b/lib/libspl/include/os/macos/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = mach rpc sys
diff --git a/lib/libspl/include/os/macos/dirent.h b/lib/libspl/include/os/macos/dirent.h
new file mode 100644
index 0000000000..b7ffe3d89c
--- /dev/null
+++ b/lib/libspl/include/os/macos/dirent.h
@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBSPL_DIRENT_H
+#define	_LIBSPL_DIRENT_H
+
+#include_next <dirent.h>
+
+
+/* Handle Linux use of 64 names */
+
+#define	readdir64 readdir
+#define	dirent64 dirent
+
+#endif
diff --git a/lib/libspl/include/os/macos/ia32/Makefile.am b/lib/libspl/include/os/macos/ia32/Makefile.am
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/lib/libspl/include/os/macos/ia32/sys/Makefile.am b/lib/libspl/include/os/macos/ia32/sys/Makefile.am
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/lib/libspl/include/os/macos/ia32/sys/asm_linkage.h b/lib/libspl/include/os/macos/ia32/sys/asm_linkage.h
new file mode 100644
index 0000000000..0009705ad6
--- /dev/null
+++ b/lib/libspl/include/os/macos/ia32/sys/asm_linkage.h
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_ASM_LINKAGE_H
+#define	_IA32_SYS_ASM_LINKAGE_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef _ASM	/* The remainder of this file is only for assembly files */
+
+/*
+ * make annoying differences in assembler syntax go away
+ */
+
+/*
+ * D16 and A16 are used to insert instructions prefixes; the
+ * macros help the assembler code be slightly more portable.
+ */
+#if !defined(__GNUC_AS__)
+/*
+ * /usr/ccs/bin/as prefixes are parsed as separate instructions
+ */
+#define	D16	data16;
+#define	A16	addr16;
+
+/*
+ * (There are some weird constructs in constant expressions)
+ */
+#define	_CONST(const)		[const]
+#define	_BITNOT(const)		-1!_CONST(const)
+#define	_MUL(a, b)		_CONST(a \* b)
+
+#else
+/*
+ * Why not use the 'data16' and 'addr16' prefixes .. well, the
+ * assembler doesn't quite believe in real mode, and thus argues with
+ * us about what we're trying to do.
+ */
+#define	D16	.byte	0x66;
+#define	A16	.byte	0x67;
+
+#define	_CONST(const)		(const)
+#define	_BITNOT(const)		~_CONST(const)
+#define	_MUL(a, b)		_CONST(a * b)
+
+#endif
+
+/*
+ * C pointers are different sizes between i386 and amd64.
+ * These constants can be used to compute offsets into pointer arrays.
+ */
+#if defined(__amd64)
+#define	CLONGSHIFT	3
+#define	CLONGSIZE	8
+#define	CLONGMASK	7
+#elif defined(__i386)
+#define	CLONGSHIFT	2
+#define	CLONGSIZE	4
+#define	CLONGMASK	3
+#endif
+
+/*
+ * Since we know we're either ILP32 or LP64 ..
+ */
+#define	CPTRSHIFT	CLONGSHIFT
+#define	CPTRSIZE	CLONGSIZE
+#define	CPTRMASK	CLONGMASK
+
+#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
+#error	"inconsistent shift constants"
+#endif
+
+#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
+#error	"inconsistent mask constants"
+#endif
+
+#define	ASM_ENTRY_ALIGN	4, 0x90
+
+/*
+ * SSE register alignment and save areas
+ */
+
+#define	XMM_SIZE	16
+#define	XMM_ALIGN	16
+#define	XMM_ALIGN_LOG	4, 0x90
+
+#if defined(__amd64)
+
+#define	SAVE_XMM_PROLOG(sreg, nreg)				\
+	subq	$_CONST(_MUL(XMM_SIZE, nreg)), %rsp;		\
+	movq	%rsp, sreg
+
+#define	RSTOR_XMM_EPILOG(sreg, nreg)				\
+	addq	$_CONST(_MUL(XMM_SIZE, nreg)), %rsp
+
+#elif defined(__i386)
+
+#define	SAVE_XMM_PROLOG(sreg, nreg)				\
+	subl	$_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
+	movl	%esp, sreg;					\
+	addl	$XMM_ALIGN, sreg;				\
+	andl	$_BITNOT(XMM_ALIGN-1), sreg
+
+#define	RSTOR_XMM_EPILOG(sreg, nreg)				\
+	addl	$_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
+
+#endif	/* __i386 */
+
+/*
+ * profiling causes definitions of the MCOUNT and RTMCOUNT
+ * particular to the type
+ */
+#ifdef GPROF
+
+#define	MCOUNT(x) \
+	pushl	%ebp; \
+	movl	%esp, %ebp; \
+	call	_mcount; \
+	popl	%ebp
+
+#endif /* GPROF */
+
+#ifdef PROF
+
+#define	MCOUNT(x) \
+/* CSTYLED */ \
+	.lcomm .L_/**/x/**/1, 4, 4; \
+	pushl	%ebp; \
+	movl	%esp, %ebp; \
+/* CSTYLED */ \
+	movl	$.L_/**/x/**/1, %edx; \
+	call	_mcount; \
+	popl	%ebp
+
+#endif /* PROF */
+
+/*
+ * if we are not profiling, MCOUNT should be defined to nothing
+ */
+#if !defined(PROF) && !defined(GPROF)
+#define	MCOUNT(x)
+#endif /* !defined(PROF) && !defined(GPROF) */
+
+#define	RTMCOUNT(x)	MCOUNT(x)
+
+/*
+ * Macro to define weak symbol aliases. These are similar to the ANSI-C
+ *	#pragma weak name = _name
+ * except a compiler can determine type. The assembler must be told. Hence,
+ * the second parameter must be the type of the symbol (i.e.: function,...)
+ */
+#define	ANSI_PRAGMA_WEAK(sym, stype)	\
+	.weak	sym; \
+/* CSTYLED */ \
+sym	= _/**/sym
+
+/*
+ * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
+ *	#pragma weak sym1 = sym2
+ */
+#define	ANSI_PRAGMA_WEAK2(sym1, sym2, stype)	\
+	.weak	sym1; \
+sym1	= sym2
+
+/*
+ * ENTRY provides the standard procedure entry code and an easy way to
+ * insert the calls to mcount for profiling. ENTRY_NP is identical, but
+ * never calls mcount.
+ */
+#define	ENTRY(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:	MCOUNT(x)
+
+#define	ENTRY_NP(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:
+
+#define	RTENTRY(x) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	_##x; \
+	.globl	x; \
+_##x:; \
+x:	RTMCOUNT(x)
+
+/*
+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point.
+ */
+#define	ENTRY2(x, y) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	x, y; \
+/* CSTYLED */ \
+x:; \
+y:	MCOUNT(x)
+
+#define	ENTRY_NP2(x, y) \
+	.text; \
+	.align	ASM_ENTRY_ALIGN; \
+	.globl	x, y; \
+/* CSTYLED */ \
+x:; \
+y:
+
+
+/*
+ * ALTENTRY provides for additional entry points.
+ */
+#define	ALTENTRY(x) \
+	.globl	_##x; \
+	.globl x; \
+_##x:; \
+x:
+
+/*
+ * DGDEF and DGDEF2 provide global data declarations.
+ *
+ * DGDEF provides a word aligned word of storage.
+ *
+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment.  This
+ * implies this macro is best used for byte arrays.
+ *
+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
+ */
+#define	DGDEF2(name, sz) \
+	.data; \
+	.globl	name; \
+name:
+
+#define	DGDEF3(name, sz, algn) \
+	.data; \
+	.align	algn; \
+	.globl	name; \
+name:
+
+#define	DGDEF(name)	DGDEF3(name, 4, 4)
+
+/*
+ * SET_SIZE trails a function and set the size for the ELF symbol table.
+ */
+#define	SET_SIZE(x)
+
+/*
+ * NWORD provides native word value.
+ */
+#if defined(__amd64)
+
+/*CSTYLED*/
+#define	NWORD	quad
+
+#elif defined(__i386)
+
+#define	NWORD	long
+
+#endif  /* __i386 */
+
+#endif /* _ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/lib/libspl/include/os/macos/mach/Makefile.am b/lib/libspl/include/os/macos/mach/Makefile.am
new file mode 100644
index 0000000000..89b0459882
--- /dev/null
+++ b/lib/libspl/include/os/macos/mach/Makefile.am
@@ -0,0 +1,3 @@
+libspldir = $(includedir)/libspl/sys
+libspl_HEADERS = \
+	$(top_srcdir)/lib/libspl/include/os/macos/mach/boolean.h
diff --git a/lib/libspl/include/os/macos/mach/boolean.h b/lib/libspl/include/os/macos/mach/boolean.h
new file mode 100644
index 0000000000..47c93a3151
--- /dev/null
+++ b/lib/libspl/include/os/macos/mach/boolean.h
@@ -0,0 +1,26 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/* Deal with XNU's own boolean_t version */
+
+#define	boolean_t xnu_boolean_t
+#include_next <mach/boolean.h>
+#undef boolean_t
diff --git a/lib/libspl/include/os/macos/mntent.h b/lib/libspl/include/os/macos/mntent.h
new file mode 100644
index 0000000000..8183dda00b
--- /dev/null
+++ b/lib/libspl/include/os/macos/mntent.h
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
+ *		All Rights Reserved
+ */
+
+#ifndef _SYS_MNTENT_H
+#define	_SYS_MNTENT_H
+
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	MNTTAB		"/etc/mnttab"
+#define	VFSTAB		"/etc/vfstab"
+#define	MNTMAXSTR	128
+
+#define	MNTTYPE_ZFS	"zfs"		/* ZFS file system */
+#define	MNTTYPE_UFS	"ufs"		/* Unix file system */
+#define	MNTTYPE_NFS	"nfs"		/* NFS file system */
+#define	MNTTYPE_NFS3	"nfs3"		/* NFS Version 3 file system */
+#define	MNTTYPE_NFS4	"nfs4"		/* NFS Version 4 file system */
+#define	MNTTYPE_CACHEFS	"cachefs"	/* Cache File System */
+#define	MNTTYPE_PCFS	"pcfs"		/* PC (MSDOS) file system */
+#define	MNTTYPE_PC	MNTTYPE_PCFS	/* Deprecated name; use MNTTYPE_PCFS */
+#define	MNTTYPE_LOFS	"lofs"		/* Loop back file system */
+#define	MNTTYPE_LO	MNTTYPE_LOFS	/* Deprecated name; use MNTTYPE_LOFS */
+#define	MNTTYPE_HSFS	"hsfs"		/* High Sierra (9660) file system */
+#define	MNTTYPE_SWAP	"swap"		/* Swap file system */
+#define	MNTTYPE_TMPFS	"tmpfs"		/* Tmp volatile file system */
+#define	MNTTYPE_AUTOFS	"autofs"	/* Automounter ``file'' system */
+#define	MNTTYPE_MNTFS	"mntfs"		/* In-kernel mnttab */
+#define	MNTTYPE_DEV	"dev"		/* /dev file system */
+#define	MNTTYPE_CTFS	"ctfs"		/* Contract file system */
+#define	MNTTYPE_OBJFS	"objfs"		/* Kernel object file system */
+#define	MNTTYPE_SHAREFS	"sharefs"	/* Kernel sharetab file system */
+
+
+#define	MNTOPT_RO	"ro"		/* Read only */
+#define	MNTOPT_RW	"rw"		/* Read/write */
+#define	MNTOPT_RQ	"rq"		/* Read/write with quotas */
+#define	MNTOPT_QUOTA	"quota"		/* Check quotas */
+#define	MNTOPT_NOQUOTA	"noquota"	/* Don't check quotas */
+#define	MNTOPT_ONERROR	"onerror"	/* action to taken on error */
+#define	MNTOPT_SOFT	"soft"		/* Soft mount */
+#define	MNTOPT_SEMISOFT	"semisoft"	/* partial soft, uncommited interface */
+#define	MNTOPT_HARD	"hard"		/* Hard mount */
+#define	MNTOPT_SUID	"suid"		/* Both setuid and devices allowed */
+#define	MNTOPT_NOSUID	"nosuid"	/* Neither setuid nor devices allowed */
+#define	MNTOPT_DEVICES	"devices"	/* Device-special allowed */
+#define	MNTOPT_NODEVICES	"nodevices"	/* Device-special disallowed */
+#define	MNTOPT_SETUID	"setuid"	/* Set uid allowed */
+#define	MNTOPT_NOSETUID	"nosetuid"	/* Set uid not allowed */
+#define	MNTOPT_GRPID	"grpid"		/* SysV-compatible gid on create */
+#define	MNTOPT_REMOUNT	"remount"	/* Change mount options */
+#define	MNTOPT_NOSUB	"nosub"		/* Disallow mounts on subdirs */
+#define	MNTOPT_MULTI	"multi"		/* Do multi-component lookup */
+#define	MNTOPT_INTR	"intr"		/* Allow NFS ops to be interrupted */
+#define	MNTOPT_NOINTR	"nointr"	/* Don't allow interrupted ops */
+#define	MNTOPT_PORT	"port"		/* NFS server IP port number */
+#define	MNTOPT_SECURE	"secure"	/* Secure (AUTH_DES) mounting */
+#define	MNTOPT_RSIZE	"rsize"		/* Max NFS read size (bytes) */
+#define	MNTOPT_WSIZE	"wsize"		/* Max NFS write size (bytes) */
+#define	MNTOPT_TIMEO	"timeo"		/* NFS timeout (1/10 sec) */
+#define	MNTOPT_RETRANS	"retrans"	/* Max retransmissions (soft mnts) */
+#define	MNTOPT_ACTIMEO	"actimeo"	/* Attr cache timeout (sec) */
+#define	MNTOPT_ACREGMIN	"acregmin"	/* Min attr cache timeout (files) */
+#define	MNTOPT_ACREGMAX	"acregmax"	/* Max attr cache timeout (files) */
+#define	MNTOPT_ACDIRMIN	"acdirmin"	/* Min attr cache timeout (dirs) */
+#define	MNTOPT_ACDIRMAX	"acdirmax"	/* Max attr cache timeout (dirs) */
+#define	MNTOPT_NOAC	"noac"		/* Don't cache attributes at all */
+#define	MNTOPT_NOCTO	"nocto"		/* No close-to-open consistency */
+#define	MNTOPT_BG	"bg"		/* Do mount retries in background */
+#define	MNTOPT_FG	"fg"		/* Do mount retries in foreground */
+#define	MNTOPT_RETRY	"retry"		/* Number of mount retries */
+#define	MNTOPT_DEV	"dev"		/* Device id of mounted fs */
+#define	MNTOPT_POSIX	"posix"		/* Get static pathconf for mount */
+#define	MNTOPT_MAP	"map"		/* Automount map */
+#define	MNTOPT_DIRECT	"direct"	/* Automount   direct map mount */
+#define	MNTOPT_INDIRECT	"indirect"	/* Automount indirect map mount */
+#define	MNTOPT_LLOCK	"llock"		/* Local locking (no lock manager) */
+#define	MNTOPT_IGNORE	"ignore"	/* Ignore this entry */
+#define	MNTOPT_VERS	"vers"		/* protocol version number indicator */
+#define	MNTOPT_PROTO	"proto"		/* protocol network_id indicator */
+#define	MNTOPT_SEC	"sec"		/* Security flavor indicator */
+#define	MNTOPT_SYNCDIR	"syncdir"	/* Synchronous local directory ops */
+#define	MNTOPT_NOSETSEC	"nosec"		/* Do no allow setting sec attrs */
+#define	MNTOPT_NOPRINT	"noprint"	/* Do not print messages */
+#define	MNTOPT_LARGEFILES "largefiles"	/* allow large files */
+#define	MNTOPT_NOLARGEFILES "nolargefiles" /* don't allow large files */
+#define	MNTOPT_FORCEDIRECTIO "forcedirectio" /* Force DirectIO on all files */
+#define	MNTOPT_NOFORCEDIRECTIO "noforcedirectio" /* No Force DirectIO */
+#define	MNTOPT_DISABLEDIRECTIO "disabledirectio" /* Disable DirectIO ioctls */
+#define	MNTOPT_PUBLIC	"public"	/* Use NFS public file handlee */
+#define	MNTOPT_LOGGING "logging" 	/* enable logging */
+#define	MNTOPT_NOLOGGING "nologging" 	/* disable logging */
+#define	MNTOPT_ATIME	"atime"		/* update atime for files */
+#define	MNTOPT_NOATIME  "noatime"	/* do not update atime for files */
+#define	MNTOPT_GLOBAL	"global"	/* Cluster-wide global mount */
+#define	MNTOPT_NOGLOBAL	"noglobal"	/* Mount local to single node */
+#define	MNTOPT_DFRATIME	"dfratime"	/* Deferred access time updates */
+#define	MNTOPT_NODFRATIME "nodfratime"	/* No Deferred access time updates */
+#define	MNTOPT_NBMAND	"nbmand"	/* allow non-blocking mandatory locks */
+#define	MNTOPT_NONBMAND	"nonbmand"	/* deny non-blocking mandatory locks */
+#define	MNTOPT_XATTR	"xattr"		/* enable extended attributes */
+#define	MNTOPT_NOXATTR	"noxattr"	/* disable extended attributes */
+#define	MNTOPT_EXEC	"exec"		/* enable executables */
+#define	MNTOPT_NOEXEC	"noexec"	/* disable executables */
+#define	MNTOPT_RESTRICT	"restrict"	/* restricted autofs mount */
+#define	MNTOPT_BROWSE	"browse"	/* browsable autofs mount */
+#define	MNTOPT_NOBROWSE	"nobrowse"	/* non-browsable autofs mount */
+/* VFS will not ignore ownership information on filesystem objects */
+#define	MNTOPT_OWNERS	"owners"
+/* VFS will ignore ownership information on filesystem objects */
+#define	MNTOPT_NOOWNERS	"noowners"
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_MNTENT_H */
diff --git a/lib/libspl/include/os/macos/poll.h b/lib/libspl/include/os/macos/poll.h
new file mode 100644
index 0000000000..2bb5203d00
--- /dev/null
+++ b/lib/libspl/include/os/macos/poll.h
@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LIBSPL_POLL_H
+#define	_LIBSPL_POLL_H
+
+#include_next <sys/poll.h>
+
+#ifndef O_DIRECT
+#define	O_DIRECT 0
+#endif
+
+#endif
diff --git a/lib/libspl/include/os/macos/rpc/Makefile.am b/lib/libspl/include/os/macos/rpc/Makefile.am
new file mode 100644
index 0000000000..645ec772f9
--- /dev/null
+++ b/lib/libspl/include/os/macos/rpc/Makefile.am
@@ -0,0 +1,3 @@
+libspldir = $(includedir)/libspl/sys
+libspl_HEADERS = \
+	$(top_srcdir)/lib/libspl/include/os/macos/rpc/xdr.h
diff --git a/lib/libspl/include/os/macos/rpc/xdr.h b/lib/libspl/include/os/macos/rpc/xdr.h
new file mode 100644
index 0000000000..9fc13fefaf
--- /dev/null
+++ b/lib/libspl/include/os/macos/rpc/xdr.h
@@ -0,0 +1,38 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ *	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ *	  All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#ifndef LIBSPL_MACOS_RPC_XDR_H
+#define	LIBSPL_MACOS_RPC_XDR_H
+
+#include <rpc/types.h>
+#include_next <rpc/xdr.h>
+
+#endif /* LIBSPL_MACOS_RPC_XDR_H */
diff --git a/lib/libspl/include/os/macos/stdlib.h b/lib/libspl/include/os/macos/stdlib.h
new file mode 100644
index 0000000000..30a9d29f17
--- /dev/null
+++ b/lib/libspl/include/os/macos/stdlib.h
@@ -0,0 +1,28 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LIBSPL_STDLIB_H
+#define	_LIBSPL_STDLIB_H
+
+#include_next <stdlib.h>
+#include <sys/sysmacros.h>
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/Makefile.am b/lib/libspl/include/os/macos/sys/Makefile.am
new file mode 100644
index 0000000000..fb063b875c
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/Makefile.am
@@ -0,0 +1,17 @@
+libspldir = $(includedir)/libspl/sys
+libspl_HEADERS = \
+	$(top_srcdir)/lib/libspl/include/os/macos/mach/boolean.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/rpc/xdr.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/byteorder.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/errno.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/file.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/kernel_types.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/mnttab.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/mount.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/param.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/stat.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/sysmacros.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/uio.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/vfs.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/xattr.h \
+	$(top_srcdir)/lib/libspl/include/os/macos/sys/zfs_context_os.h
diff --git a/lib/libspl/include/os/macos/sys/byteorder.h b/lib/libspl/include/os/macos/sys/byteorder.h
new file mode 100644
index 0000000000..dd578e4493
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/byteorder.h
@@ -0,0 +1,279 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_BYTEORDER_H
+#define	_SYS_BYTEORDER_H
+
+#include <sys/cdefs.h>
+#include <_types.h>
+
+/*
+ * Define the order of 32-bit words in 64-bit words.
+ */
+#define	_QUAD_HIGHWORD 1
+#define	_QUAD_LOWWORD 0
+
+/*
+ * Definitions for byte order, according to byte significance from low
+ * address to high.
+ */
+#undef _LITTLE_ENDIAN
+/* LSB first: i386, vax */
+#define	_LITTLE_ENDIAN	1234
+/* LSB first in word, MSW first in long */
+#define	_PDP_ENDIAN		3412
+
+#define	_BYTE_ORDER		_LITTLE_ENDIAN
+
+/*
+ * Deprecated variants that don't have enough underscores to be useful in more
+ * strict namespaces.
+ */
+#if __BSD_VISIBLE
+#define	LITTLE_ENDIAN	_LITTLE_ENDIAN
+#define	PDP_ENDIAN		_PDP_ENDIAN
+#define	BYTE_ORDER		_BYTE_ORDER
+#endif
+
+#define	__bswap16_gen(x)	(__uint16_t)((x) << 8 | (x) >> 8)
+#define	__bswap32_gen(x)                \
+	(((__uint32_t)__bswap16((x) & 0xffff) << 16) | __bswap16((x) >> 16))
+#define	__bswap64_gen(x)                \
+	(((__uint64_t)__bswap32((x) & 0xffffffff) << 32) | __bswap32((x) >> 32))
+
+#ifdef __GNUCLIKE_BUILTIN_CONSTANT_P
+#define	__bswap16(x)                            \
+	((__uint16_t)(__builtin_constant_p(x) ? \
+	__bswap16_gen((__uint16_t)(x)) : __bswap16_var(x)))
+#define	__bswap32(x)                    \
+	(__builtin_constant_p(x) ?      \
+	__bswap32_gen((__uint32_t)(x)) : __bswap32_var(x))
+#define	__bswap64(x)                    \
+	(__builtin_constant_p(x) ?      \
+	__bswap64_gen((__uint64_t)(x)) : __bswap64_var(x))
+#else
+/* XXX these are broken for use in static initializers. */
+#define	__bswap16(x)    __bswap16_var(x)
+#define	__bswap32(x)    __bswap32_var(x)
+#define	__bswap64(x)    __bswap64_var(x)
+#endif
+
+/* These are defined as functions to avoid multiple evaluation of x. */
+
+static __inline __uint16_t
+__bswap16_var(__uint16_t _x)
+{
+
+	return (__bswap16_gen(_x));
+}
+
+static __inline __uint32_t
+__bswap32_var(__uint32_t _x)
+{
+
+#ifdef __GNUCLIKE_ASM
+	__asm("bswap %0" : "+r" (_x));
+	return (_x);
+#else
+	return (__bswap32_gen(_x));
+#endif
+}
+#define	__htonl(x)	__bswap32(x)
+#define	__htons(x)	__bswap16(x)
+#define	__ntohl(x)	__bswap32(x)
+#define	__ntohs(x)	__bswap16(x)
+
+#include <sys/isa_defs.h>
+#include <sys/int_types.h>
+
+#if defined(__GNUC__) && defined(_ASM_INLINES) && \
+	(defined(__i386) || defined(__amd64))
+#include <asm/byteorder.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * macros for conversion between host and (internet) network byte order
+ */
+
+#if defined(_BIG_ENDIAN) && !defined(ntohl) && !defined(__lint)
+/* big-endian */
+#if defined(_BIG_ENDIAN) && (defined(__amd64__) || defined(__amd64))
+#error "incompatible ENDIAN / ARCH combination"
+#endif
+#define	ntohl(x)	(x)
+#define	ntohs(x)	(x)
+#define	htonl(x)	(x)
+#define	htons(x)	(x)
+
+#elif !defined(ntohl) /* little-endian */
+
+#ifndef	_IN_PORT_T
+#define	_IN_PORT_T
+typedef uint16_t in_port_t;
+#endif
+
+#ifndef	_IN_ADDR_T
+#define	_IN_ADDR_T
+typedef uint32_t in_addr_t;
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5)
+extern	uint32_t htonl(uint32_t);
+extern	uint16_t htons(uint16_t);
+extern 	uint32_t ntohl(uint32_t);
+extern	uint16_t ntohs(uint16_t);
+#else
+extern	in_addr_t htonl(in_addr_t);
+extern	in_port_t htons(in_port_t);
+extern 	in_addr_t ntohl(in_addr_t);
+extern	in_port_t ntohs(in_port_t);
+#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */
+#endif
+
+#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
+
+/*
+ * Macros to reverse byte order
+ */
+#define	BSWAP_8(x)	((x) & 0xff)
+#define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
+#define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
+#define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
+
+#define	BMASK_8(x)	((x) & 0xff)
+#define	BMASK_16(x)	((x) & 0xffff)
+#define	BMASK_32(x)	((x) & 0xffffffff)
+#define	BMASK_64(x)	(x)
+
+/*
+ * Macros to convert from a specific byte order to/from native byte order
+ */
+#ifdef _BIG_ENDIAN
+#define	BE_8(x)		BMASK_8(x)
+#define	BE_16(x)	BMASK_16(x)
+#define	BE_32(x)	BMASK_32(x)
+#define	BE_64(x)	BMASK_64(x)
+#define	LE_8(x)		BSWAP_8(x)
+#define	LE_16(x)	BSWAP_16(x)
+#define	LE_32(x)	BSWAP_32(x)
+#define	LE_64(x)	BSWAP_64(x)
+#else
+#define	LE_8(x)		BMASK_8(x)
+#define	LE_16(x)	BMASK_16(x)
+#define	LE_32(x)	BMASK_32(x)
+#define	LE_64(x)	BMASK_64(x)
+#define	BE_8(x)		BSWAP_8(x)
+#define	BE_16(x)	BSWAP_16(x)
+#define	BE_32(x)	BSWAP_32(x)
+#define	BE_64(x)	BSWAP_64(x)
+#endif
+
+/*
+ * Macros to read unaligned values from a specific byte order to
+ * native byte order
+ */
+
+#define	BE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	BE_IN16(xa) \
+	(((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
+
+#define	BE_IN32(xa) \
+	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+
+#define	BE_IN64(xa) \
+	(((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
+
+#define	LE_IN8(xa) \
+	*((uint8_t *)(xa))
+
+#define	LE_IN16(xa) \
+	(((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
+
+#define	LE_IN32(xa) \
+	(((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
+
+#define	LE_IN64(xa) \
+	(((uint64_t)LE_IN32((uint8_t *)(xa) + 4) << 32) | LE_IN32(xa))
+
+/*
+ * Macros to write unaligned values from native byte order to a specific byte
+ * order.
+ */
+
+#define	BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	BE_OUT16(xa, yv) \
+	BE_OUT8((uint8_t *)(xa) + 1, yv); \
+	BE_OUT8((uint8_t *)(xa), (yv) >> 8);
+
+#define	BE_OUT32(xa, yv) \
+	BE_OUT16((uint8_t *)(xa) + 2, yv); \
+	BE_OUT16((uint8_t *)(xa), (yv) >> 16);
+
+#define	BE_OUT64(xa, yv) \
+	BE_OUT32((uint8_t *)(xa) + 4, yv); \
+	BE_OUT32((uint8_t *)(xa), (yv) >> 32);
+
+#define	LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
+
+#define	LE_OUT16(xa, yv) \
+	LE_OUT8((uint8_t *)(xa), yv); \
+	LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
+
+#define	LE_OUT32(xa, yv) \
+	LE_OUT16((uint8_t *)(xa), yv); \
+	LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
+
+#define	LE_OUT64(xa, yv) \
+	LE_OUT32((uint8_t *)(xa), yv); \
+	LE_OUT32((uint8_t *)(xa) + 4, (yv) >> 32);
+
+#endif	/* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_BYTEORDER_H */
diff --git a/lib/libspl/include/os/macos/sys/errno.h b/lib/libspl/include/os/macos/sys/errno.h
new file mode 100644
index 0000000000..af4846ebb9
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/errno.h
@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include_next <sys/errno.h>
+
+#define	EBADE		EBADMACHO
+#define	ECKSUM		EBADE
+#define	EFRAGS		EIDRM
+#define	EREMOTEIO	ENOLINK
+#define	ENOTACTIVE	ENOPOLICY
+#define	ECHRNG		EMULTIHOP
+
+#define	ERESTART	(-1)	/* restart syscall */
diff --git a/lib/libspl/include/os/macos/sys/fcntl.h b/lib/libspl/include/os/macos/sys/fcntl.h
new file mode 100644
index 0000000000..f0b03be99b
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/fcntl.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _LIBSPL_SYS_FCNTL_H
+#define	_LIBSPL_SYS_FCNTL_H
+
+#include_next <sys/fcntl.h>
+
+#define	O_LARGEFILE	0
+#define	O_RSYNC	0
+
+#ifndef O_DIRECT
+#define	O_DIRECT 0
+#endif
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/file.h b/lib/libspl/include/os/macos/sys/file.h
new file mode 100644
index 0000000000..94a33cbb33
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/file.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_FILE_H
+#define	_LIBSPL_SYS_FILE_H
+
+#include_next <sys/file.h>
+
+#define	FCREAT	O_CREAT
+#define	FTRUNC	O_TRUNC
+#define	FOFFMAX 0
+#define	FSYNC	O_SYNC
+#define	FDSYNC	O_DSYNC
+#define	FRSYNC  O_RSYNC
+#define	FEXCL	O_EXCL
+
+#define	IO_DIRECT	0
+
+#define	FNODSYNC	0x10000	/* fsync pseudo flag */
+#define	FNOFOLLOW	0x20000	/* don't follow symlinks */
+#define	FIGNORECASE	0x80000	/* request case-insensitive lookups */
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/kernel_types.h b/lib/libspl/include/os/macos/sys/kernel_types.h
new file mode 100644
index 0000000000..5796351a20
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/kernel_types.h
@@ -0,0 +1,43 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef LIBSPL_SYS_KERNEL_TYPES_H
+#define	LIBSPL_SYS_KERNEL_TYPES_H
+
+/*
+ * Unfortunately, XNU defines uio_t, proc_t and vnode_t differently to
+ * ZFS, so we need to hack around it.
+ */
+
+#undef vnode_t
+#undef uio_t
+#define	proc_t kernel_proc_t
+#include_next <sys/kernel_types.h>
+#define	vnode_t struct vnode
+#define	uio_t struct uio
+#undef proc_t
+
+
+/* Other missing Linux types */
+typedef	off_t	loff_t;
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/mnttab.h b/lib/libspl/include/os/macos/sys/mnttab.h
new file mode 100644
index 0000000000..9ba5d08b21
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/mnttab.h
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/*  All Rights Reserved  */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2006 Ricardo Correia */
+
+#ifndef _SYS_MNTTAB_H
+#define	_SYS_MNTTAB_H
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#ifdef MNTTAB
+#undef MNTTAB
+#endif /* MNTTAB */
+
+#include <paths.h>
+#include <sys/mount.h>
+#define	MNTTAB		_PATH_DEVNULL
+#define	MS_NOMNTTAB		0x0
+#define	MS_RDONLY		0x1
+#define	umount2(p, f)	unmount(p, f)
+#define	MNT_LINE_MAX	4096
+
+#define	MNT_TOOLONG	1	/* entry exceeds MNT_LINE_MAX */
+#define	MNT_TOOMANY	2	/* too many fields in line */
+#define	MNT_TOOFEW	3	/* too few fields in line */
+
+struct mnttab {
+	char *mnt_special;
+	char *mnt_mountp;
+	char *mnt_fstype;
+	char *mnt_mntopts;
+	uint_t mnt_major;
+	uint_t mnt_minor;
+	uint32_t mnt_fssubtype;
+};
+
+#define	extmnttab mnttab
+
+struct stat64;
+struct statfs;
+
+extern DIR *fdopendir(int fd);
+extern int openat64(int, const char *, int, ...);
+
+extern int getmntany(FILE *fd, struct mnttab *mgetp, struct mnttab *mrefp);
+extern int getmntent(FILE *fp, struct mnttab *mp);
+extern char *hasmntopt(struct mnttab *mnt, char *opt);
+extern int getextmntent(const char *path, struct extmnttab *entry,
+	struct stat64 *statbuf);
+
+extern void statfs2mnttab(struct statfs *sfs, struct mnttab *mp);
+
+#ifndef AT_SYMLINK_NOFOLLOW
+#define	AT_SYMLINK_NOFOLLOW 0x100
+#endif
+
+extern int fstatat64(int, const char *, struct stat *, int);
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/mount.h b/lib/libspl/include/os/macos/sys/mount.h
new file mode 100644
index 0000000000..970d8affcc
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/mount.h
@@ -0,0 +1,114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _LIBSPL_SYS_MOUNT_H
+#define	_LIBSPL_SYS_MOUNT_H
+
+#undef _SYS_MOUNT_H_
+
+#include <sys/vnode.h>
+#include <sys/mntent.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Unfortunately, XNU has a different meaning for "vnode_t". */
+#undef vnode_t
+#include_next <sys/mount.h>
+#define	vnode_t struct vnode
+
+/*
+ * Some old glibc headers don't define BLKGETSIZE64
+ * and we don't want to require the kernel headers
+ */
+#if !defined(BLKGETSIZE64)
+#define	BLKGETSIZE64		_IOR(0x12, 114, size_t)
+#endif
+
+/*
+ * Some old glibc headers don't correctly define MS_DIRSYNC and
+ * instead use the enum name S_WRITE.  When using these older
+ * headers define MS_DIRSYNC to be S_WRITE.
+ */
+#if !defined(MS_DIRSYNC)
+#define	MS_DIRSYNC		S_WRITE
+#endif
+
+/*
+ * Some old glibc headers don't correctly define MS_POSIXACL and
+ * instead leave it undefined.  When using these older headers define
+ * MS_POSIXACL to the reserved value of (1<<16).
+ */
+#if !defined(MS_POSIXACL)
+#define	MS_POSIXACL		(1<<16)
+#endif
+
+#define	MS_NOSUID	MNT_NOSUID
+#define	MS_NOEXEC	MNT_NOEXEC
+#define	MS_NODEV	0
+#define	S_WRITE		0
+#define	MS_BIND		0
+#define	MS_REMOUNT	0
+#define	MS_SYNCHRONOUS	MNT_SYNCHRONOUS
+
+#define	MS_USERS	(MS_NOEXEC|MS_NOSUID|MS_NODEV)
+#define	MS_OWNER	(MS_NOSUID|MS_NODEV)
+#define	MS_GROUP	(MS_NOSUID|MS_NODEV)
+#define	MS_COMMENT	0
+
+/*
+ * Older glibc <sys/mount.h> headers did not define all the available
+ * umount2(2) flags.  Both MNT_FORCE and MNT_DETACH are supported in the
+ * kernel back to 2.4.11 so we define them correctly if they are missing.
+ */
+#ifdef MNT_FORCE
+#define	MS_FORCE	MNT_FORCE
+#else
+#define	MS_FORCE	0x00000001
+#endif /* MNT_FORCE */
+
+#ifdef MNT_DETACH
+#define	MS_DETACH	MNT_DETACH
+#else
+#define	MS_DETACH	0x00000002
+#endif /* MNT_DETACH */
+
+/*
+ * Overlay mount is default in Linux, but for solaris/zfs
+ * compatibility, MS_OVERLAY is defined to explicitly have the user
+ * provide a flag (-O) to mount over a non empty directory.
+ */
+#define	MS_OVERLAY	0x00000004
+
+/*
+ * MS_CRYPT indicates that encryption keys should be loaded if they are not
+ * already available. This is not defined in glibc, but it is never seen by
+ * the kernel so it will not cause any problems.
+ */
+#define	MS_CRYPT	0x00000008
+
+#endif /* _LIBSPL_SYS_MOUNT_H */
diff --git a/lib/libspl/include/os/macos/sys/param.h b/lib/libspl/include/os/macos/sys/param.h
new file mode 100644
index 0000000000..a9815f10b5
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/param.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_PARAM_H
+#define	_LIBSPL_SYS_PARAM_H
+
+#include_next <sys/param.h>
+#include <unistd.h>
+
+/*
+ * File system parameters and macros.
+ *
+ * The file system is made out of blocks of at most MAXBSIZE units,
+ * with smaller units (fragments) only in the last direct block.
+ * MAXBSIZE primarily determines the size of buffers in the buffer
+ * pool. It may be made larger without any effect on existing
+ * file systems; however making it smaller may make some file
+ * systems unmountable.
+ *
+ * Note that the blocked devices are assumed to have DEV_BSIZE
+ * "sectors" and that fragments must be some multiple of this size.
+ */
+#define	MAXNAMELEN	256
+
+#define	UID_NOBODY	60001		/* user ID no body */
+#define	GID_NOBODY	UID_NOBODY
+#define	UID_NOACCESS	60002		/* user ID no access */
+
+#define	MAXUID		UINT32_MAX	/* max user id */
+#define	MAXPROJID	MAXUID		/* max project id */
+
+#ifndef PAGESIZE
+#define	PAGESIZE (sysconf(_SC_PAGESIZE))
+#endif /* PAGESIZE */
+
+extern int execvpe(const char *name, char * const argv[], char * const envp[]);
+
+struct zfs_handle;
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/stat.h b/lib/libspl/include/os/macos/sys/stat.h
new file mode 100644
index 0000000000..1c7858194d
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/stat.h
@@ -0,0 +1,77 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBSPL_SYS_STAT_H
+#define	_LIBSPL_SYS_STAT_H
+
+#include_next <sys/stat.h>
+
+#include <sys/disk.h>
+#include <sys/mount.h> /* for BLKGETSIZE64 */
+
+#define	MAXOFFSET_T	OFF_MAX
+
+#ifndef _KERNEL
+#include <sys/disk.h>
+#endif
+
+static inline int
+fstat_blk(int fd, struct stat *st)
+{
+	if (fstat(fd, st) == -1)
+		return (-1);
+
+	/* In OS X we need to use ioctl to get the size of a block dev */
+	if (st->st_mode & (S_IFBLK | S_IFCHR)) {
+		uint32_t blksize;
+		uint64_t blkcnt;
+
+		if (ioctl(fd, DKIOCGETBLOCKSIZE, &blksize) < 0) {
+			return (-1);
+		}
+		if (ioctl(fd, DKIOCGETBLOCKCOUNT, &blkcnt) < 0) {
+			return (-1);
+		}
+
+		st->st_size = (off_t)((uint64_t)blksize * blkcnt);
+	}
+
+	return (0);
+}
+
+
+/*
+ * Deal with Linux use of 64 for everything.
+ * OsX has moved past it, dropped all 32 versions, and
+ * standard form is 64 bit.
+ */
+
+#define	stat64		stat
+#define	lstat64		lstat
+#define	fstat64		fstat
+#define	fstat64_blk	fstat_blk
+#define	statfs64	statfs
+
+#endif /* _LIBSPL_SYS_STAT_H */
diff --git a/lib/libspl/include/os/macos/sys/sysmacros.h b/lib/libspl/include/os/macos/sys/sysmacros.h
new file mode 100644
index 0000000000..7480eb85a5
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/sysmacros.h
@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LIBSPL_SYS_SYSMACROS_H
+#define	_LIBSPL_SYS_SYSMACROS_H
+
+/* common macros */
+#ifndef MIN
+#define	MIN(a, b)	((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define	MAX(a, b)	((a) < (b) ? (b) : (a))
+#endif
+#ifndef ABS
+#define	ABS(a)		((a) < 0 ? -(a) : (a))
+#endif
+#ifndef ARRAY_SIZE
+#define	ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+#endif
+#ifndef DIV_ROUND_UP
+#define	DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#endif
+
+#define	makedevice(maj, min)	makedev(maj, min)
+#define	_sysconf(a)		sysconf(a)
+
+/*
+ * Compatibility macros/typedefs needed for Solaris -> Linux port
+ */
+#define	P2ALIGN(x, align)	((x) & -(align))
+#define	P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
+#define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
+#define	P2ROUNDUP_TYPED(x, align, type) \
+	(-(-(type)(x) & -(type)(align)))
+#define	P2BOUNDARY(off, len, align) \
+	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
+#define	P2PHASE(x, align)	((x) & ((align) - 1))
+#define	P2NPHASE(x, align)	(-(x) & ((align) - 1))
+#define	P2NPHASE_TYPED(x, align, type) \
+	(-(type)(x) & ((type)(align) - 1))
+#define	ISP2(x)	(((x) & ((x) - 1)) == 0)
+#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
+
+/*
+ * Typed version of the P2* macros.  These macros should be used to ensure
+ * that the result is correctly calculated based on the data type of (x),
+ * which is passed in as the last argument, regardless of the data
+ * type of the alignment.  For example, if (x) is of type uint64_t,
+ * and we want to round it up to a page boundary using "PAGESIZE" as
+ * the alignment, we can do either
+ *      P2ROUNDUP(x, (uint64_t)PAGESIZE)
+ * or
+ *      P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
+ */
+#define	P2ALIGN_TYPED(x, align, type)		\
+	((type)(x) & -(type)(align))
+#define	P2PHASE_TYPED(x, align, type)		\
+	((type)(x) & ((type)(align) - 1))
+#define	P2NPHASE_TYPED(x, align, type)		\
+	(-(type)(x) & ((type)(align) - 1))
+#define	P2ROUNDUP_TYPED(x, align, type)		\
+	(-(-(type)(x) & -(type)(align)))
+#define	P2END_TYPED(x, align, type)		\
+	(-(~(type)(x) & -(type)(align)))
+#define	P2PHASEUP_TYPED(x, align, phase, type)	\
+	((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
+#define	P2CROSS_TYPED(x, y, align, type)	\
+	(((type)(x) ^ (type)(y)) > (type)(align) - 1)
+#define	P2SAMEHIGHBIT_TYPED(x, y, type)		\
+	(((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
+
+
+/* avoid any possibility of clashing with <stddef.h> version */
+#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
+#define	offsetof(s, m)	((size_t)(&(((s *)0)->m)))
+#endif
+
+#ifndef RLIM64_INFINITY
+#define	RLIM64_INFINITY	(~0ULL)
+#endif
+
+#endif /* _LIBSPL_SYS_SYSMACROS_H */
diff --git a/lib/libspl/include/os/macos/sys/uio.h b/lib/libspl/include/os/macos/sys/uio.h
new file mode 100644
index 0000000000..f646b3e5d5
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/uio.h
@@ -0,0 +1,175 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef	_LIBSPL_SYS_UIO_H
+#define	_LIBSPL_SYS_UIO_H
+
+#include <sys/kernel_types.h>
+#include_next <sys/uio.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/errno.h>
+
+/*
+ * I/O parameter information.  A uio structure describes the I/O which
+ * is to be performed by an operation.  Typically the data movement will
+ * be performed by a routine such as uiomove(), which updates the uio
+ * structure to reflect what was done.
+ */
+
+typedef struct iovec iovec_t;
+
+
+/*
+ * I/O direction.
+ */
+// typedef enum uio_rw { UIO_READ, UIO_WRITE } uio_rw_t;
+
+/*
+ * Segment flag values.
+ */
+typedef enum uio_seg { UIO_USERSPACE, UIO_SYSSPACE, UIO_USERISPACE } uio_seg_t;
+
+
+struct uio {
+	struct iovec	*uio_iov;	/* pointer to array of iovecs */
+	int		uio_iovcnt;	/* number of iovecs */
+	off_t		uio_offset;	/* file offset */
+	uio_seg_t	uio_segflg;	/* address space (kernel or user) */
+	off_t		uio_limit;	/* u-limit (maximum byte offset) */
+	ssize_t		uio_resid;	/* residual count */
+	enum uio_rw	uio_rw;
+	int		uio_max_iovs; /* max iovecs this uio_t can hold */
+	uint32_t	uio_index; /* Current index */
+};
+
+
+uio_t *uio_create(int iovcount, off_t offset, int spacetype, int iodirection);
+void uio_free(uio_t *uio);
+int uio_addiov(uio_t *uio, user_addr_t baseaddr, user_size_t length);
+int uio_isuserspace(uio_t *uio);
+int uio_getiov(uio_t *uio, int index, user_addr_t *baseaddr,
+    user_size_t *length);
+int uio_iovcnt(uio_t *uio);
+off_t uio_offset(uio_t *uio);
+void uio_update(uio_t *uio, user_size_t count);
+uint64_t uio_resid(uio_t *uio);
+user_addr_t uio_curriovbase(uio_t *uio);
+user_size_t uio_curriovlen(uio_t *uio);
+void uio_setoffset(uio_t *uio, off_t a_offset);
+uio_t *uio_duplicate(uio_t *uio);
+int uio_rw(uio_t *a_uio);
+void uio_setrw(uio_t *a_uio, int a_value);
+
+int	uiomove(void *, uint32_t, enum uio_rw, struct uio *);
+int	spllib_uiomove(const uint8_t *, uint32_t, struct uio *);
+void uioskip(struct uio *, uint32_t);
+int	uiodup(struct uio *, struct uio *, iovec_t *, int);
+
+// xuio struct is not used in this platform, but we define it
+// to allow compilation and easier patching
+typedef enum xuio_type {
+	UIOTYPE_ASYNCIO,
+	UIOTYPE_ZEROCOPY,
+} xuio_type_t;
+
+
+#define	UIOA_IOV_MAX    16
+
+typedef struct uioa_page_s {
+	int uioa_pfncnt;
+	void **uioa_ppp;
+	caddr_t uioa_base;
+	size_t  uioa_len;
+} uioa_page_t;
+
+typedef struct xuio {
+	uio_t *xu_uio;
+	enum xuio_type xu_type;
+	union {
+		struct {
+			uint32_t xu_a_state;
+			ssize_t xu_a_mbytes;
+			uioa_page_t *xu_a_lcur;
+			void **xu_a_lppp;
+			void *xu_a_hwst[4];
+			uioa_page_t xu_a_locked[UIOA_IOV_MAX];
+		} xu_aio;
+		struct {
+			int xu_zc_rw;
+			void *xu_zc_priv;
+		} xu_zc;
+	} xu_ext;
+} xuio_t;
+
+#define	XUIO_XUZC_PRIV(xuio)	xuio->xu_ext.xu_zc.xu_zc_priv
+#define	XUIO_XUZC_RW(xuio)		xuio->xu_ext.xu_zc.xu_zc_rw
+
+/*
+ * same as uiomove() but doesn't modify uio structure.
+ * return in cbytes how many bytes were copied.
+ */
+static inline int uiocopy(const unsigned char *p, uint32_t n,
+    enum uio_rw rw, struct uio *uio, uint64_t *cbytes)
+{
+	int result;
+	struct uio *nuio = uio_duplicate(uio);
+	unsigned long long x = uio_resid(uio);
+	if (!nuio)
+		return (ENOMEM);
+	uio_setrw(nuio, rw);
+	result = spllib_uiomove(p, n, nuio);
+	*cbytes = (x - uio_resid(nuio));
+	uio_free(nuio);
+	return (result);
+}
+
+// Apple's uiomove puts the uio_rw in uio_create
+#define	uiomove(A, B, C, D)	spllib_uiomove((A), (B), (D))
+#define	uioskip(A, B)	uio_update((A), (B))
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_UIO_H */
diff --git a/lib/libspl/include/os/macos/sys/vfs.h b/lib/libspl/include/os/macos/sys/vfs.h
new file mode 100644
index 0000000000..a2ffcc08d8
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/vfs.h
@@ -0,0 +1,26 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef ZFS_SYS_VFS_H_
+#define	ZFS_SYS_VFS_H_
+
+#endif /* !ZFS_SYS_VFS_H_ */
diff --git a/lib/libspl/include/os/macos/sys/xattr.h b/lib/libspl/include/os/macos/sys/xattr.h
new file mode 100644
index 0000000000..045f681b1e
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/xattr.h
@@ -0,0 +1,35 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBSPL_SYS_XATTR_H
+#define	_LIBSPL_SYS_XATTR_H
+
+#include_next <sys/xattr.h>
+
+/* macOS has one more argument */
+#define	setxattr(A, B, C, D, E) setxattr(A, B, C, D, E, 0)
+#define	getxattr(A, B, C, D, E) getxattr(A, B, C, D, E, 0)
+
+#endif
diff --git a/lib/libspl/include/os/macos/sys/zfs_context_os.h b/lib/libspl/include/os/macos/sys/zfs_context_os.h
new file mode 100644
index 0000000000..3324a1cf25
--- /dev/null
+++ b/lib/libspl/include/os/macos/sys/zfs_context_os.h
@@ -0,0 +1,41 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef ZFS_CONTEXT_OS_H_
+#define	ZFS_CONTEXT_OS_H_
+
+#include <sys/ioctl.h>
+
+#define	ZFS_EXPORTS_PATH	"/etc/exports"
+#define	MNTTYPE_ZFS_SUBTYPE ('Z'<<24|'F'<<16|'S'<<8)
+
+struct spa_iokit;
+typedef struct spa_iokit spa_iokit_t;
+
+typedef off_t loff_t;
+
+struct zfs_handle;
+
+extern void zfs_rollback_os(struct zfs_handle *zhp);
+extern void libzfs_macos_wrapfd(int *srcfd, boolean_t send);
+
+#endif
diff --git a/lib/libspl/include/os/macos/time.h b/lib/libspl/include/os/macos/time.h
new file mode 100644
index 0000000000..1f7068fcbf
--- /dev/null
+++ b/lib/libspl/include/os/macos/time.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LIBSPL_TIME_H
+#define	_LIBSPL_TIME_H
+
+#include_next <time.h>
+
+/* Linux also has a timer_create() API we need to emulate. */
+
+/*
+ * OsX version can probably be implemented by using:
+ * dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, queue);
+ * dispatch_source_set_event_handler(timer1, ^{vector1(timer1);});
+ * dispatch_source_set_cancel_handler(timer1
+ * dispatch_time_t start = dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_SEC);
+ * dispatch_source_set_timer(timer1, start, NSEC_PER_SEC / 5, 0);
+ */
+
+typedef void *timer_t;
+
+struct itimerspec {
+	struct timespec it_interval;	/* timer period */
+	struct timespec it_value;		/* timer expiration */
+};
+
+static inline int
+timer_create(clockid_t clockid, struct sigevent *sevp,
+    timer_t *timerid)
+{
+	return (0);
+}
+
+static inline int
+timer_settime(timer_t id, int flags,
+    const struct itimerspec *its, struct itimerspec *remainvalue)
+{
+	return (0);
+}
+
+static inline int
+timer_delete(timer_t id)
+{
+	return (0);
+}
+
+#endif
diff --git a/lib/libspl/include/os/macos/unistd.h b/lib/libspl/include/os/macos/unistd.h
new file mode 100644
index 0000000000..4a2814b9ec
--- /dev/null
+++ b/lib/libspl/include/os/macos/unistd.h
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#ifndef _LIBSPL_UNISTD_H
+#define	_LIBSPL_UNISTD_H
+
+#include_next <unistd.h>
+#include <fcntl.h>
+
+/* Handle Linux use of 64 names */
+
+#define	open64		open
+#define	pread64		pread
+#define	pwrite64	pwrite
+#define	ftruncate64	ftruncate
+#define	lseek64		lseek
+
+
+static inline int
+fdatasync(int fd)
+{
+	if (fcntl(fd, F_FULLFSYNC) == -1)
+		return (-1);
+	return (0);
+}
+
+#endif
diff --git a/lib/libspl/os/macos/getexecname.c b/lib/libspl/os/macos/getexecname.c
new file mode 100644
index 0000000000..7e37958869
--- /dev/null
+++ b/lib/libspl/os/macos/getexecname.c
@@ -0,0 +1,31 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+const char *
+getexecname(void)
+{
+	return (getprogname());
+}
diff --git a/lib/libspl/os/macos/gethostid.c b/lib/libspl/os/macos/gethostid.c
new file mode 100644
index 0000000000..bedea579ed
--- /dev/null
+++ b/lib/libspl/os/macos/gethostid.c
@@ -0,0 +1,37 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2020, Jorgen Lundman <lundman@lundman.net>
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+unsigned long
+get_system_hostid(void)
+{
+	size_t len;
+	uint32_t myhostid = 0;
+	len = sizeof (myhostid);
+	sysctlbyname("kern.hostid", &myhostid, &len, NULL, 0);
+	return (myhostid);
+}
diff --git a/lib/libspl/os/macos/getmntany.c b/lib/libspl/os/macos/getmntany.c
new file mode 100644
index 0000000000..f3fec9654e
--- /dev/null
+++ b/lib/libspl/os/macos/getmntany.c
@@ -0,0 +1,462 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Ricardo Correia.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1988 AT&T	*/
+/*	  All Rights Reserved	*/
+
+#include <stdio.h>
+#include <string.h>
+#include <mntent.h>
+#include <ctype.h> /* for isspace() */
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mnttab.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/fcntl.h>
+
+#define	DIFF(xx) ((mrefp->xx != NULL) && \
+		(mgetp->xx == NULL || strcmp(mrefp->xx, mgetp->xx) != 0))
+
+static struct statfs *gsfs = NULL;
+static int allfs = 0;
+/*
+ * We will also query the extended filesystem capabilities API, to lookup
+ * other mount options, for example, XATTR. We can not use the MNTNOUSERXATTR
+ * option due to VFS rejecting with EACCESS.
+ */
+
+#include <sys/attr.h>
+typedef struct attrlist attrlist_t;
+
+struct attrBufS {
+	u_int32_t length;
+	vol_capabilities_set_t caps;
+} __attribute__((aligned(4), packed));
+
+
+DIR *
+fdopendir(int fd)
+{
+	char fullpath[MAXPATHLEN];
+
+	if (fcntl(fd, F_GETPATH, fullpath) < 0) {
+		perror("fcntl");
+		return (NULL);
+	}
+	if (close(fd) < 0) {
+		return (NULL);
+	}
+
+	return (opendir(fullpath));
+}
+
+static int
+chdir_block_begin(int newroot_fd)
+{
+	int cwdfd, error;
+
+	cwdfd = open(".", O_RDONLY | O_DIRECTORY);
+	if (cwdfd == -1)
+		return (-1);
+
+	if (fchdir(newroot_fd) == -1) {
+		error = errno;
+		(void) close(cwdfd);
+		errno = error;
+		return (-1);
+	}
+	return (cwdfd);
+}
+
+static void
+chdir_block_end(int cwdfd)
+{
+	int error = errno;
+	(void) fchdir(cwdfd);
+	(void) close(cwdfd);
+	errno = error;
+}
+
+int
+openat64(int dirfd, const char *path, int flags, ...)
+{
+	int cwdfd, filefd;
+
+	if ((cwdfd = chdir_block_begin(dirfd)) == -1)
+		return (-1);
+
+	if ((flags & O_CREAT) != 0) {
+		va_list ap;
+		int mode;
+
+		va_start(ap, flags);
+		mode = va_arg(ap, int);
+		va_end(ap);
+
+		filefd = open(path, flags, mode);
+	} else
+		filefd = open(path, flags);
+
+	chdir_block_end(cwdfd);
+	return (filefd);
+}
+
+int
+fstatat64(int dirfd, const char *path, struct stat *statbuf, int flag)
+{
+	int cwdfd, error;
+
+	if ((cwdfd = chdir_block_begin(dirfd)) == -1)
+		return (-1);
+
+	if (flag == AT_SYMLINK_NOFOLLOW)
+		error = lstat(path, statbuf);
+	else
+		error = stat(path, statbuf);
+
+	chdir_block_end(cwdfd);
+	return (error);
+}
+
+
+static char *
+mntopt(char **p)
+{
+	char *cp = *p;
+	char *retstr;
+
+	while (*cp && isspace(*cp))
+		cp++;
+
+	retstr = cp;
+	while (*cp && *cp != ',')
+		cp++;
+
+	if (*cp) {
+		*cp = '\0';
+		cp++;
+	}
+
+	*p = cp;
+	return (retstr);
+}
+
+char *
+hasmntopt(struct mnttab *mnt, char *opt)
+{
+	char tmpopts[256];
+	char *f, *opts = tmpopts;
+
+	if (mnt->mnt_mntopts == NULL)
+		return (NULL);
+	(void) strlcpy(opts, mnt->mnt_mntopts, 256);
+	f = mntopt(&opts);
+	for (; *f; f = mntopt(&opts)) {
+		if (strncmp(opt, f, strlen(opt)) == 0)
+			return (f - tmpopts + mnt->mnt_mntopts);
+	}
+	return (NULL);
+}
+
+static void
+optadd(char *mntopts, size_t size, const char *opt)
+{
+
+	if (mntopts[0] != '\0')
+		strlcat(mntopts, ",", size);
+	strlcat(mntopts, opt, size);
+}
+
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <CoreServices/CoreServices.h>
+#include <IOKit/IOKitLib.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/IOBSD.h>
+
+
+char *
+MYCFStringCopyUTF8String(CFStringRef aString)
+{
+	if (aString == NULL)
+		return (NULL);
+
+	CFIndex length = CFStringGetLength(aString);
+	CFIndex maxSize =
+	    CFStringGetMaximumSizeForEncoding(length,
+	    kCFStringEncodingUTF8);
+	char *buffer = (char *)malloc(maxSize);
+	if (CFStringGetCString(aString, buffer, maxSize,
+	    kCFStringEncodingUTF8)) {
+		return (buffer);
+	}
+	return (NULL);
+}
+
+/*
+ * Given "/dev/disk6" connect to IOkit and fetch the dataset
+ * name "BOOM/lower", and use it instead.
+ */
+void
+expand_disk_to_zfs(char *devname, int len)
+{
+	char *result = NULL;
+	CFMutableDictionaryRef matchingDict;
+	io_service_t service;
+	CFStringRef cfstr;
+	char *device;
+
+	if (strncmp(devname, "/dev/disk", 9) != 0)
+		return;
+
+	device = &devname[5];
+
+	matchingDict = IOBSDNameMatching(kIOMasterPortDefault, 0, device);
+	if (NULL == matchingDict)
+		return;
+
+	/*
+	 * Fetch the object with the matching BSD node name.
+	 * Note that there should only be one match, so
+	 * IOServiceGetMatchingService is used instead of
+	 * IOServiceGetMatchingServices to simplify the code.
+	 */
+	service = IOServiceGetMatchingService(kIOMasterPortDefault,
+	    matchingDict);
+
+	if (IO_OBJECT_NULL == service) {
+		return;
+	}
+
+	cfstr = IORegistryEntryCreateCFProperty(service,
+	    CFSTR("ZFS Dataset"), kCFAllocatorDefault, 0);
+	if (cfstr) {
+		result = MYCFStringCopyUTF8String(cfstr);
+		CFRelease(cfstr);
+	}
+
+	IOObjectRelease(service);
+
+	if (result) {
+		strlcpy(devname, result, len);
+		free(result);
+	}
+}
+
+void
+statfs2mnttab(struct statfs *sfs, struct mnttab *mp)
+{
+	static char mntopts[MNTMAXSTR];
+	long flags;
+
+	mntopts[0] = '\0';
+
+	flags = sfs->f_flags;
+#define	OPTADD(opt)	optadd(mntopts, sizeof (mntopts), (opt))
+	if (flags & MNT_RDONLY)
+		OPTADD(MNTOPT_RO);
+	else
+		OPTADD(MNTOPT_RW);
+	if (flags & MNT_NOSUID)
+#ifdef __FreeBSD__
+		OPTADD(MNTOPT_NOSUID);
+#elif defined(__APPLE__)
+		OPTADD(MNTOPT_NOSETUID);
+#endif
+	else
+		OPTADD(MNTOPT_SETUID);
+	if (flags & MNT_UPDATE)
+		OPTADD(MNTOPT_REMOUNT);
+	if (flags & MNT_NOATIME)
+		OPTADD(MNTOPT_NOATIME);
+	else
+		OPTADD(MNTOPT_ATIME);
+		{
+			struct attrBufS attrBuf;
+			attrlist_t attrList;
+
+			memset(&attrList, 0, sizeof (attrList));
+			attrList.bitmapcount = ATTR_BIT_MAP_COUNT;
+			attrList.volattr = ATTR_VOL_INFO|ATTR_VOL_CAPABILITIES;
+
+			if (getattrlist(sfs->f_mntonname, &attrList, &attrBuf,
+			    sizeof (attrBuf), 0) == 0)  {
+
+				if (attrBuf.caps[VOL_CAPABILITIES_INTERFACES] &
+				    VOL_CAP_INT_EXTENDED_ATTR) {
+					OPTADD(MNTOPT_XATTR);
+				} else {
+					OPTADD(MNTOPT_NOXATTR);
+				} // If EXTENDED
+			} // if getattrlist
+		}
+	if (flags & MNT_NOEXEC)
+		OPTADD(MNTOPT_NOEXEC);
+	else
+		OPTADD(MNTOPT_EXEC);
+	if (flags & MNT_NODEV)
+		OPTADD(MNTOPT_NODEVICES);
+	else
+		OPTADD(MNTOPT_DEVICES);
+	if (flags & MNT_DONTBROWSE)
+		OPTADD(MNTOPT_NOBROWSE);
+	else
+		OPTADD(MNTOPT_BROWSE);
+	if (flags & MNT_IGNORE_OWNERSHIP)
+		OPTADD(MNTOPT_NOOWNERS);
+	else
+		OPTADD(MNTOPT_OWNERS);
+
+#undef	OPTADD
+
+	// If a disk is /dev/diskX, lets see if it has "zfs_dataset_name"
+	// set, and if so, use it instead, for mount matching.
+	expand_disk_to_zfs(sfs->f_mntfromname, sizeof (sfs->f_mntfromname));
+
+	mp->mnt_special = sfs->f_mntfromname;
+	mp->mnt_mountp = sfs->f_mntonname;
+	mp->mnt_fstype = sfs->f_fstypename;
+	mp->mnt_mntopts = mntopts;
+	mp->mnt_fssubtype = sfs->f_fssubtype;
+
+}
+
+static int
+statfs_init(void)
+{
+	struct statfs *sfs;
+	int error;
+
+	if (gsfs != NULL) {
+		free(gsfs);
+		gsfs = NULL;
+	}
+	allfs = getfsstat(NULL, 0, MNT_NOWAIT);
+	if (allfs == -1)
+		goto fail;
+	gsfs = malloc(sizeof (gsfs[0]) * allfs * 2);
+	if (gsfs == NULL)
+		goto fail;
+	allfs = getfsstat(gsfs, (long)(sizeof (gsfs[0]) * allfs * 2),
+	    MNT_NOWAIT);
+	if (allfs == -1)
+		goto fail;
+	sfs = realloc(gsfs, allfs * sizeof (gsfs[0]));
+	if (sfs != NULL)
+		gsfs = sfs;
+	return (0);
+fail:
+	error = errno;
+	if (gsfs != NULL)
+		free(gsfs);
+	gsfs = NULL;
+	allfs = 0;
+	return (error);
+}
+
+int
+getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp)
+{
+	int i, error;
+
+	error = statfs_init();
+	if (error != 0)
+		return (error);
+
+	for (i = 0; i < allfs; i++) {
+		statfs2mnttab(&gsfs[i], mgetp);
+		if (mrefp->mnt_special != NULL && mgetp->mnt_special != NULL &&
+		    strcmp(mrefp->mnt_special, mgetp->mnt_special) != 0) {
+			continue;
+		}
+		if (mrefp->mnt_mountp != NULL && mgetp->mnt_mountp != NULL &&
+		    strcmp(mrefp->mnt_mountp, mgetp->mnt_mountp) != 0) {
+			continue;
+		}
+		if (mrefp->mnt_fstype != NULL && mgetp->mnt_fstype != NULL &&
+		    strcmp(mrefp->mnt_fstype, mgetp->mnt_fstype) != 0) {
+			continue;
+		}
+		return (0);
+	}
+	return (-1);
+}
+
+int
+getmntent(FILE *fp, struct mnttab *mp)
+{
+	static int index = -1;
+	int error = 0;
+
+	if (index < 0) {
+		error = statfs_init();
+	}
+
+	if (error != 0)
+		return (error);
+
+	index++;
+
+	// If we have finished "reading" the mnttab, reset it to
+	// start from the beginning, and return EOF.
+	if (index >= allfs) {
+		index = -1;
+		return (-1);
+	}
+
+	statfs2mnttab(&gsfs[index], mp);
+	return (0);
+}
+
+int
+getextmntent(const char *path, struct extmnttab *entry, struct stat64 *statbuf)
+{
+	struct statfs sfs;
+
+	if (strlen(path) >= MAXPATHLEN) {
+		(void) fprintf(stderr, "invalid object; pathname too long\n");
+		return (-1);
+	}
+
+	if (stat64(path, statbuf) != 0) {
+		(void) fprintf(stderr, "cannot open '%s': %s\n",
+		    path, strerror(errno));
+		return (-1);
+	}
+
+	if (statfs(path, &sfs) != 0) {
+		(void) fprintf(stderr, "%s: %s\n", path,
+		    strerror(errno));
+		return (-1);
+	}
+	statfs2mnttab(&sfs, (struct mnttab *)entry);
+	return (0);
+}
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 8cbbae7445..873d6b9127 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -2718,15 +2718,19 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 
 #ifdef __APPLE__
 			/*
-			 * On OSX by default we mount pools under /Volumes unless
-			 * the dataset property mountpoint specifies otherwise.
-			 * In addition to this, there is an undocumented environment
-			 * variable __ZFS_MAIN_MOUNTPOINT_DIR, used mainly by the
-			 * testing environment, as it expects "/" by default.
+			 * On OSX by default we mount pools under /Volumes
+			 * unless the dataset property mountpoint specifies
+			 * otherwise.
+			 * In addition to this, there is an undocumented
+			 * environment variable __ZFS_MAIN_MOUNTPOINT_DIR,
+			 * used mainly by the testing environment, as it
+			 * expects "/" by default.
 			 */
 			char *default_mountpoint;
-			default_mountpoint = getenv("__ZFS_MAIN_MOUNTPOINT_DIR");
-			if (!default_mountpoint) default_mountpoint = "/Volumes/";
+			default_mountpoint =
+			    getenv("__ZFS_MAIN_MOUNTPOINT_DIR");
+			if (!default_mountpoint)
+				default_mountpoint = "/Volumes/";
 
 			if (relpath[0] == '\0')
 				(void) snprintf(propbuf, proplen, "%s%s",
@@ -2734,8 +2738,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 			else
 				(void) snprintf(propbuf, proplen, "%s%s%s%s",
 				    root, str, source == NULL ||
-				    source[0] == '\0' ? default_mountpoint : "/",
-				    relpath);
+				    source[0] == '\0' ? default_mountpoint :
+				    "/", relpath);
 
 #else
 
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index f1524c384f..b0b61521a4 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -1088,6 +1088,12 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
 	}
 
 	if (!sdd->dryrun) {
+
+#if defined(__APPLE__)
+		/* Can't do IO on pipes, possibly wrap fd in domain socket */
+		libzfs_macos_wrapfd(&sdd->outfd, B_TRUE);
+#endif
+
 		/*
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
@@ -2473,6 +2479,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
 	if (flags->dryrun)
 		return (0);
 
+#if defined(__APPLE__)
+	/* Can't do IO on pipes, possibly wrap fd in domain socket */
+	libzfs_macos_wrapfd(&fd, B_TRUE);
+#endif
+
 	/*
 	 * If progress reporting is requested, spawn a new thread to poll
 	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
@@ -2563,6 +2574,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
+
 	return (err != 0);
 }
 
@@ -4629,6 +4641,11 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
 	}
 
+#if defined(__APPLE__)
+	/* Can't do IO on pipes, possibly wrap fd in domain socket */
+	libzfs_macos_wrapfd(&infd, B_FALSE);
+#endif
+
 	err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
 	    oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
 	    raw, infd, drr_noswap, -1, &read_bytes, &errflags,
diff --git a/lib/libzfs/os/macos/libzfs_mount_os.c b/lib/libzfs/os/macos/libzfs_mount_os.c
new file mode 100644
index 0000000000..f5401a3b1f
--- /dev/null
+++ b/lib/libzfs/os/macos/libzfs_mount_os.c
@@ -0,0 +1,713 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2019 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
+ * Copyright 2017 RackTop Systems.
+ * Copyright (c) 2018 Datto Inc.
+ * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <zone.h>
+#include <sys/mntent.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/dsl_crypt.h>
+#include <os/macos/zfs/sys/zfs_mount.h>
+#include <libzfs.h>
+
+#include "libzfs_impl.h"
+#include <thread_pool.h>
+#include <sys/sysctl.h>
+
+
+/*
+ * The default OpenZFS icon. Compare against known values to see if it needs
+ * updating. Allowing users to set own.
+ * No file: copy icon
+ * correct size: do nothing
+ * other size: user custom icon, do nothing
+ */
+
+/* icon name on root of a mount */
+#define	MOUNT_POINT_CUSTOM_ICON ".VolumeIcon.icns"
+
+/* source icon name from inside zfs.kext bundle */
+#define	CUSTOM_ICON_PATH \
+	KERNEL_MODPREFIX "/zfs.kext/Contents/Resources/VolumeIcon.icns"
+
+#include <sys/xattr.h>
+
+
+/*
+ * On OSX we can set the icon to an Open ZFS specific one, just to be extra
+ * shiny
+ */
+static void
+zfs_mount_seticon(const char *mountpoint)
+{
+	/* For a root file system, add a volume icon. */
+	ssize_t attrsize;
+	uint16_t finderinfo[16];
+	struct stat sbuf;
+	char *path = NULL;
+	FILE *dstfp = NULL, *srcfp = NULL;
+	unsigned char buf[1024];
+	unsigned int red;
+
+	if (asprintf(&path, "%s/%s", mountpoint, MOUNT_POINT_CUSTOM_ICON) == -1)
+		return;
+
+	/* If we can stat it, and it has a size, leave it be. */
+	if ((stat(path, &sbuf) == 0 && sbuf.st_size > 0))
+		goto out;
+
+	/* Looks like we should copy the icon over */
+
+	/* check if we can read in the default ZFS icon */
+	srcfp = fopen(CUSTOM_ICON_PATH, "r");
+
+	/* No source icon */
+	if (!srcfp)
+		goto out;
+
+	/* Open the output icon for writing */
+	dstfp = fopen(path, "w");
+	if (!dstfp)
+		goto out;
+
+	/* Copy icon */
+	while ((red = fread(buf, 1, sizeof (buf), srcfp)) > 0)
+		(void) fwrite(buf, 1, red, dstfp);
+
+	/* We have copied it, set icon */
+	attrsize = getxattr(mountpoint, XATTR_FINDERINFO_NAME, &finderinfo,
+	    sizeof (finderinfo), 0);
+	if (attrsize != sizeof (finderinfo))
+		(void) memset(&finderinfo, 0, sizeof (finderinfo));
+	if ((finderinfo[4] & BE_16(0x0400)) == 0) {
+		finderinfo[4] |= BE_16(0x0400);
+		(void) setxattr(mountpoint, XATTR_FINDERINFO_NAME, &finderinfo,
+		    sizeof (finderinfo), 0);
+	}
+
+	/* Now tell Finder to update */
+#if 0
+	int fd = -1;
+	strlcpy(template, mountpoint, sizeof (template));
+	strlcat(template, "/tempXXXXXX", sizeof (template));
+	if ((fd = mkstemp(template)) != -1) {
+		unlink(template); // Just delete it right away
+		close(fd);
+	}
+#endif
+
+out:
+	if (dstfp != NULL)
+		fclose(dstfp);
+	if (srcfp != NULL)
+		fclose(srcfp);
+	if (path != NULL)
+		free(path);
+}
+
+/*
+ * zfs_init_libshare(zhandle, service)
+ *
+ * Initialize the libshare API if it hasn't already been initialized.
+ * In all cases it returns 0 if it succeeded and an error if not. The
+ * service value is which part(s) of the API to initialize and is a
+ * direct map to the libshare sa_init(service) interface.
+ */
+int
+zfs_init_libshare(libzfs_handle_t *zhandle, int service)
+{
+	int ret = SA_OK;
+
+	if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) {
+		/*
+		 * We had a cache miss. Most likely it is a new ZFS
+		 * dataset that was just created. We want to make sure
+		 * so check timestamps to see if a different process
+		 * has updated any of the configuration. If there was
+		 * some non-ZFS change, we need to re-initialize the
+		 * internal cache.
+		 */
+		zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS;
+		if (sa_needs_refresh(zhandle->libzfs_sharehdl)) {
+			zfs_uninit_libshare(zhandle);
+			zhandle->libzfs_sharehdl = sa_init(service);
+		}
+	}
+
+	if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL)
+		zhandle->libzfs_sharehdl = sa_init(service);
+
+	if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL)
+		ret = SA_NO_MEMORY;
+	return (ret);
+}
+
+
+/*
+ * Share the given filesystem according to the options in the specified
+ * protocol specific properties (sharenfs, sharesmb).  We rely
+ * on "libshare" to do the dirty work for us.
+ */
+int
+zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
+{
+	char mountpoint[ZFS_MAXPROPLEN];
+	char shareopts[ZFS_MAXPROPLEN];
+	char sourcestr[ZFS_MAXPROPLEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	sa_share_t share;
+	zfs_share_proto_t *curr_proto;
+	zprop_source_t sourcetype;
+	int err, ret;
+
+	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
+		return (0);
+
+	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
+		/*
+		 * Return success if there are no share options.
+		 */
+		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
+		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
+		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
+		    strcmp(shareopts, "off") == 0)
+			continue;
+
+		ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API);
+		if (ret != SA_OK) {
+			(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
+			    zfs_get_name(zhp), sa_errorstr(ret));
+			return (-1);
+		}
+
+		/*
+		 * If the 'zoned' property is set, then zfs_is_mountable()
+		 * will have already bailed out if we are in the global zone.
+		 * But local zones cannot be NFS servers, so we ignore it for
+		 * local zones as well.
+		 */
+		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
+			continue;
+
+		share = sa_find_share(hdl->libzfs_sharehdl, mountpoint);
+		if (share == NULL) {
+			/*
+			 * This may be a new file system that was just
+			 * created so isn't in the internal cache
+			 * (second time through). Rather than
+			 * reloading the entire configuration, we can
+			 * assume ZFS has done the checking and it is
+			 * safe to add this to the internal
+			 * configuration.
+			 */
+			if (sa_zfs_process_share(hdl->libzfs_sharehdl,
+			    NULL, NULL, mountpoint,
+			    proto_table[*curr_proto].p_name, sourcetype,
+			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+			hdl->libzfs_shareflags |= ZFSSHARE_MISS;
+			share = sa_find_share(hdl->libzfs_sharehdl,
+			    mountpoint);
+		}
+		if (share != NULL) {
+			err = sa_enable_share(share,
+			    proto_table[*curr_proto].p_name);
+			if (err != SA_OK) {
+				(void) zfs_error_fmt(hdl,
+				    proto_table[*curr_proto].p_share_err,
+				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+				    zfs_get_name(zhp));
+				return (-1);
+			}
+		} else {
+			(void) zfs_error_fmt(hdl,
+			    proto_table[*curr_proto].p_share_err,
+			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+			    zfs_get_name(zhp));
+			return (-1);
+		}
+
+	}
+	return (0);
+}
+
+/*
+ * Unshare a filesystem by mountpoint.
+ */
+int
+unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
+    zfs_share_proto_t proto)
+{
+	sa_share_t share;
+	int err;
+	char *mntpt;
+
+	/*
+	 * Mountpoint could get trashed if libshare calls getmntany
+	 * which it does during API initialization, so strdup the
+	 * value.
+	 */
+	mntpt = zfs_strdup(hdl, mountpoint);
+
+	/* make sure libshare initialized */
+	if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+		free(mntpt);	/* don't need the copy anymore */
+		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+		    name, sa_errorstr(err)));
+	}
+
+	share = sa_find_share(hdl->libzfs_sharehdl, mntpt);
+	free(mntpt);	/* don't need the copy anymore */
+
+	if (share != NULL) {
+		err = sa_disable_share(share, proto_table[proto].p_name);
+		if (err != SA_OK) {
+			return (zfs_error_fmt(hdl,
+			    proto_table[proto].p_unshare_err,
+			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+			    name, sa_errorstr(err)));
+		}
+	} else {
+		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
+		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
+		    name));
+	}
+	return (0);
+}
+
+/*
+ * Search the sharetab for the given mountpoint and protocol, returning
+ * a zfs_share_type_t value.
+ */
+zfs_share_type_t
+is_shared_impl(libzfs_handle_t *hdl, const char *mountpoint,
+    zfs_share_proto_t proto)
+{
+	char buf[MAXPATHLEN], *tab;
+	char *ptr;
+
+	if (hdl->libzfs_sharetab == NULL)
+		return (SHARED_NOT_SHARED);
+
+	/* Reopen ZFS_SHARETAB to prevent reading stale data from open file */
+	if (freopen(ZFS_SHARETAB, "r", hdl->libzfs_sharetab) == NULL)
+		return (SHARED_NOT_SHARED);
+
+	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
+
+	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
+
+		/* the mountpoint is the first entry on each line */
+		if ((tab = strchr(buf, '\t')) == NULL)
+			continue;
+
+		*tab = '\0';
+		if (strcmp(buf, mountpoint) == 0) {
+			/*
+			 * the protocol field is the third field
+			 * skip over second field
+			 */
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			ptr = ++tab;
+			if ((tab = strchr(ptr, '\t')) == NULL)
+				continue;
+			*tab = '\0';
+			if (strcmp(ptr,
+			    proto_table[proto].p_name) == 0) {
+				switch (proto) {
+				case PROTO_NFS:
+					return (SHARED_NFS);
+				case PROTO_SMB:
+					return (SHARED_SMB);
+				default:
+					return (0);
+				}
+			}
+		}
+	}
+
+	return (SHARED_NOT_SHARED);
+}
+
+/*
+ * if (zmount(zhp, zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
+ * MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
+ */
+int
+do_mount(zfs_handle_t *zhp, const char *dir, char *optptr, int mflag)
+{
+	int rv;
+	const char *spec = zfs_get_name(zhp);
+	const char *fstype = MNTTYPE_ZFS;
+	struct zfs_mount_args mnt_args;
+	char *rpath = NULL;
+	zfs_cmd_t zc = { "\0" };
+	int devdisk = ZFS_DEVDISK_POOLONLY;
+	int ispool = 0;  // the pool dataset, that is
+	int optlen = 0;
+
+	assert(spec != NULL);
+	assert(dir != NULL);
+	assert(fstype != NULL);
+	assert(mflag >= 0);
+	assert(strcmp(fstype, MNTTYPE_ZFS) == 0);
+	assert(dataptr == NULL);
+	assert(datalen == 0);
+	assert(optptr != NULL);
+	assert(optlen > 0);
+
+	if (optptr != NULL)
+		optlen = strlen(optptr);
+
+	/*
+	 * Figure out if we want this mount as a /dev/diskX mount, if so
+	 * ask kernel to create one for us, then use it to mount.
+	 */
+
+	// Use dataset name by default
+	mnt_args.fspec = spec;
+
+	/*
+	 * Lookup the dataset property devdisk, and depending on its
+	 * setting, we need to create a /dev/diskX for the mount
+	 */
+	if (zhp) {
+
+		/* If we are in zfs-tests, no devdisks */
+		if (getenv("__ZFS_MAIN_MOUNTPOINT_DIR") != NULL)
+			devdisk = ZFS_DEVDISK_OFF;
+		else
+			devdisk = zfs_prop_get_int(zhp, ZFS_PROP_DEVDISK);
+
+		if (zhp && zhp->zpool_hdl &&
+		    strcmp(zpool_get_name(zhp->zpool_hdl),
+		    zfs_get_name(zhp)) == 0)
+			ispool = 1;
+
+		if ((devdisk == ZFS_DEVDISK_ON) ||
+		    ((devdisk == ZFS_DEVDISK_POOLONLY) &&
+		    ispool)) {
+			(void) strlcpy(zc.zc_name, zhp->zfs_name,
+			    sizeof (zc.zc_name));
+			zc.zc_value[0] = 0;
+
+			rv = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_PROXY_DATASET,
+			    &zc);
+
+#ifdef DEBUG
+			if (rv)
+				fprintf(stderr,
+				    "proxy dataset returns %d '%s'\n",
+				    rv, zc.zc_value);
+#endif
+
+			// Mount using /dev/diskX, use temporary buffer to
+			// give it full name
+			if (rv == 0) {
+				snprintf(zc.zc_name, sizeof (zc.zc_name),
+				    "/dev/%s", zc.zc_value);
+				mnt_args.fspec = zc.zc_name;
+			}
+		}
+	}
+
+	mnt_args.mflag = mflag;
+	mnt_args.optptr = optptr;
+	mnt_args.optlen = optlen;
+	mnt_args.struct_size = sizeof (mnt_args);
+
+	/*
+	 * There is a bug in XNU where /var/tmp is resolved as
+	 * "private/var/tmp" without the leading "/", and both mount(2) and
+	 * diskutil mount avoid this by calling realpath() first. So we will
+	 * do the same.
+	 */
+	rpath = realpath(dir, NULL);
+
+	dprintf("%s calling mount with fstype %s, %s %s, fspec %s, mflag %d,"
+	    " optptr %s, optlen %d, devdisk %d, ispool %d\n",
+	    __func__, fstype, (rpath ? "rpath" : "dir"),
+	    (rpath ? rpath : dir), mnt_args.fspec, mflag, optptr, optlen,
+	    devdisk, ispool);
+	rv = mount(fstype, rpath ? rpath : dir, 0, &mnt_args);
+
+	if (rpath) free(rpath);
+
+	/* Check if we need to create/update icon */
+	if (rv == 0)
+		zfs_mount_seticon(dir);
+
+	return (rv);
+}
+
+int
+do_unmount_impl(const char *mntpt, int flags)
+{
+	char force_opt[] = "force";
+	char *argv[7] = {
+		"/usr/sbin/diskutil",
+		"unmount",
+		NULL, NULL, NULL, NULL };
+	int rc, count = 2;
+
+	if (flags & MS_FORCE) {
+		argv[count] = force_opt;
+		count++;
+	}
+
+	argv[count] = (char *)mntpt;
+	rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
+
+	/*
+	 * There is a bug, where we can not unmount, with the error
+	 * already unmounted, even though it wasn't. But it is easy
+	 * to work around by calling 'umount'. Until a real fix is done...
+	 * re-test this: 202004/lundman
+	 */
+	if (rc != 0) {
+		char *argv[7] = {
+		    "/sbin/umount",
+		    NULL, NULL, NULL, NULL };
+		int rc, count = 1;
+
+		fprintf(stderr, "Fallback umount called\r\n");
+		if (flags & MS_FORCE) {
+			argv[count] = "-f";
+			count++;
+		}
+		argv[count] = (char *)mntpt;
+		rc = libzfs_run_process(argv[0], argv,
+		    STDOUT_VERBOSE|STDERR_VERBOSE);
+	}
+
+	return (rc ? EINVAL : 0);
+}
+
+
+void unmount_snapshots(libzfs_handle_t *hdl, const char *mntpt, int flags);
+
+int
+do_unmount(libzfs_handle_t *hdl, const char *mntpt, int flags)
+{
+	/*
+	 * On OSX, the kernel can not unmount all snapshots for us, as XNU
+	 * rejects the unmount before it reaches ZFS. But we can easily handle
+	 * unmounting snapshots from userland.
+	 */
+	unmount_snapshots(hdl, mntpt, flags);
+
+	return (do_unmount_impl(mntpt, flags));
+}
+
+/*
+ * Given "/Volumes/BOOM" look for any lower mounts with ".zfs/snapshot/"
+ * in them - issue unmount.
+ */
+void
+unmount_snapshots(libzfs_handle_t *hdl, const char *mntpt, int flags)
+{
+	struct mnttab entry;
+	int len = strlen(mntpt);
+
+	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
+		/* Starts with our mountpoint ? */
+		if (strncmp(mntpt, entry.mnt_mountp, len) == 0) {
+			/* The next part is "/.zfs/snapshot/" ? */
+			if (strncmp("/.zfs/snapshot/", &entry.mnt_mountp[len],
+			    15) == 0) {
+				/* Unmount it */
+				do_unmount_impl(entry.mnt_mountp, MS_FORCE);
+			}
+		}
+	}
+}
+
+int
+zfs_mount_delegation_check(void)
+{
+	return ((geteuid() != 0) ? EACCES : 0);
+}
+
+static char *
+zfs_snapshot_mountpoint(zfs_handle_t *zhp)
+{
+	char *dataset_name, *snapshot_mountpoint, *parent_mountpoint;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_handle_t *parent;
+	char *r;
+
+	dataset_name = zfs_strdup(hdl, zhp->zfs_name);
+	if (dataset_name == NULL) {
+		(void) fprintf(stderr, gettext("not enough memory"));
+		return (NULL);
+	}
+
+	r = strrchr(dataset_name, '@');
+
+	if (r == NULL) {
+		(void) fprintf(stderr, gettext("snapshot '%s' "
+		    "has no '@'\n"), zhp->zfs_name);
+		free(dataset_name);
+		return (NULL);
+	}
+
+	r[0] = 0;
+
+	/* Open the dataset */
+	if ((parent = zfs_open(hdl, dataset_name,
+	    ZFS_TYPE_FILESYSTEM)) == NULL) {
+		(void) fprintf(stderr,
+		    gettext("unable to open parent dataset '%s'\n"),
+		    dataset_name);
+		free(dataset_name);
+		return (NULL);
+	}
+
+	if (!zfs_is_mounted(parent, &parent_mountpoint)) {
+		(void) fprintf(stderr,
+		    gettext("parent dataset '%s' must be mounted\n"),
+		    dataset_name);
+		free(dataset_name);
+		zfs_close(parent);
+		return (NULL);
+	}
+
+	zfs_close(parent);
+
+	snapshot_mountpoint =
+	    zfs_asprintf(hdl, "%s/.zfs/snapshot/%s/",
+	    parent_mountpoint, &r[1]);
+
+	free(dataset_name);
+	free(parent_mountpoint);
+
+	return (snapshot_mountpoint);
+}
+
+/*
+ * Mount a snapshot; called from "zfs mount dataset@snapshot".
+ * Given "dataset@snapshot" construct mountpoint path of the
+ * style "/mountpoint/dataset/.zfs/snapshot/$name/". Ensure
+ * parent "dataset" is mounted, then issue mount for snapshot.
+ */
+int
+zfs_snapshot_mount(zfs_handle_t *zhp, const char *options,
+    int flags)
+{
+	int ret = 0;
+	char *mountpoint;
+
+	/*
+	 * The automounting will kick in, and zed mounts it - so
+	 * we temporarily disable it
+	 */
+	uint64_t automount = 0;
+	uint64_t saved_automount = 0;
+	size_t len = sizeof (automount);
+	size_t slen = sizeof (saved_automount);
+
+	/* Remember what the user has it set to */
+	sysctlbyname("kstat.zfs.darwin.tunable.zfs_auto_snapshot",
+	    &saved_automount, &slen, NULL, 0);
+
+	/* Disable automounting */
+	sysctlbyname("kstat.zfs.darwin.tunable.zfs_auto_snapshot",
+	    NULL, NULL, &automount, len);
+
+	if (zfs_is_mounted(zhp, NULL)) {
+		return (EBUSY);
+	}
+
+	mountpoint = zfs_snapshot_mountpoint(zhp);
+	if (mountpoint == NULL)
+		return (EINVAL);
+
+	ret = zfs_mount_at(zhp, options, MS_RDONLY | flags,
+	    mountpoint);
+
+	/* If zed is running, it can mount it before us */
+	if (ret == -1 && errno == EINVAL)
+		ret = 0;
+
+	if (ret == 0) {
+		(void) fprintf(stderr,
+		    gettext("ZFS: snapshot mountpoint '%s'\n"),
+		    mountpoint);
+	}
+
+	free(mountpoint);
+
+	/* Restore automount setting */
+	sysctlbyname("kstat.zfs.darwin.tunable.zfs_auto_snapshot",
+	    NULL, NULL, &saved_automount, len);
+
+	return (ret);
+}
+
+int
+zfs_snapshot_unmount(zfs_handle_t *zhp, int flags)
+{
+	int ret = 0;
+	char *mountpoint;
+
+	if (!zfs_is_mounted(zhp, NULL)) {
+		return (ENOENT);
+	}
+
+	mountpoint = zfs_snapshot_mountpoint(zhp);
+	if (mountpoint == NULL)
+		return (EINVAL);
+
+	ret = zfs_unmount(zhp, mountpoint, flags);
+
+	free(mountpoint);
+
+	return (ret);
+}
diff --git a/lib/libzfs/os/macos/libzfs_pool_os.c b/lib/libzfs/os/macos/libzfs_pool_os.c
new file mode 100644
index 0000000000..f7685e2f01
--- /dev/null
+++ b/lib/libzfs/os/macos/libzfs_pool_os.c
@@ -0,0 +1,345 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
+ * Copyright (c) 2018 Datto Inc.
+ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ * Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
+ */
+
+#include <errno.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <zone.h>
+#include <sys/stat.h>
+#include <sys/efi_partition.h>
+#include <sys/systeminfo.h>
+#include <sys/vtoc.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/vdev_disk.h>
+#include <dlfcn.h>
+#include <libzutil.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+#include "zfs_comutil.h"
+#include "zfeature_common.h"
+
+/*
+ * If the device has being dynamically expanded then we need to relabel
+ * the disk to use the new unallocated space.
+ */
+int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
+{
+	int fd, error;
+
+	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to open device: %d"), path, errno);
+		return (zfs_error(hdl, EZFS_OPENFAILED, msg));
+	}
+
+	/*
+	 * It's possible that we might encounter an error if the device
+	 * does not have any unallocated space left. If so, we simply
+	 * ignore that error and continue on.
+	 *
+	 * Also, we don't call efi_rescan() - that would just return EBUSY.
+	 * The module will do it for us in vdev_disk_open().
+	 */
+	error = efi_use_whole_disk(fd);
+
+	/* Flush the buffers to disk and invalidate the page cache. */
+	(void) fsync(fd);
+//	(void) ioctl(fd, BLKFLSBUF);
+
+	(void) close(fd);
+	if (error && error != VT_ENOSPC) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to read disk capacity"), path);
+		return (zfs_error(hdl, EZFS_NOCAP, msg));
+	}
+	return (0);
+}
+
+/*
+ * Read the EFI label from the config, if a label does not exist then
+ * pass back the error to the caller. If the caller has passed a non-NULL
+ * diskaddr argument then we set it to the starting address of the EFI
+ * partition.
+ */
+static int
+read_efi_label(nvlist_t *config, diskaddr_t *sb)
+{
+	char *path;
+	int fd;
+	char diskname[MAXPATHLEN];
+	int err = -1;
+
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
+		return (err);
+
+	(void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
+	    strrchr(path, '/'));
+	if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
+		struct dk_gpt *vtoc;
+
+		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
+			if (sb != NULL)
+				*sb = vtoc->efi_parts[0].p_start;
+			efi_free(vtoc);
+		}
+		(void) close(fd);
+	}
+	return (err);
+}
+
+/*
+ * determine where a partition starts on a disk in the current
+ * configuration
+ */
+static diskaddr_t
+find_start_block(nvlist_t *config)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	diskaddr_t sb = MAXOFFSET_T;
+	uint64_t wholedisk;
+
+	if (nvlist_lookup_nvlist_array(config,
+	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
+		if (nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) != 0 || !wholedisk) {
+			return (MAXOFFSET_T);
+		}
+		if (read_efi_label(config, &sb) < 0)
+			sb = MAXOFFSET_T;
+		return (sb);
+	}
+
+	for (c = 0; c < children; c++) {
+		sb = find_start_block(child[c]);
+		if (sb != MAXOFFSET_T) {
+			return (sb);
+		}
+	}
+	return (MAXOFFSET_T);
+}
+
+static int
+zpool_label_disk_check(char *path)
+{
+	struct dk_gpt *vtoc;
+	int fd, err;
+
+	if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
+		return (errno);
+
+	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+		(void) close(fd);
+		return (err);
+	}
+
+	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+		efi_free(vtoc);
+		(void) close(fd);
+		return (EIDRM);
+	}
+
+	efi_free(vtoc);
+	(void) close(fd);
+	return (0);
+}
+
+/*
+ * Generate a unique partition name for the ZFS member.  Partitions must
+ * have unique names to ensure udev will be able to create symlinks under
+ * /dev/disk/by-partlabel/ for all pool members.  The partition names are
+ * of the form <pool>-<unique-id>.
+ */
+static void
+zpool_label_name(char *label_name, int label_size)
+{
+	uint64_t id = 0;
+	int fd;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd >= 0) {
+		if (read(fd, &id, sizeof (id)) != sizeof (id))
+			id = 0;
+
+		close(fd);
+	}
+
+	if (id == 0)
+		id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
+
+	snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
+}
+
+/*
+ * Label an individual disk.  The name provided is the short name,
+ * stripped of any leading /dev path.
+ */
+int
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
+{
+	char path[MAXPATHLEN];
+	struct dk_gpt *vtoc;
+	int rval, fd;
+	size_t resv = EFI_MIN_RESV_SIZE;
+	uint64_t slice_size;
+	diskaddr_t start_block;
+	char errbuf[1024];
+
+	/* prepare an error message just in case */
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
+
+	if (zhp) {
+		nvlist_t *nvroot;
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+
+		if (zhp->zpool_start_block == 0)
+			start_block = find_start_block(nvroot);
+		else
+			start_block = zhp->zpool_start_block;
+		zhp->zpool_start_block = start_block;
+	} else {
+		/* new pool */
+		start_block = NEW_START_BLOCK;
+	}
+
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+
+	if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
+		/*
+		 * This shouldn't happen.  We've long since verified that this
+		 * is a valid device.
+		 */
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "label '%s': unable to open device: %d"), path, errno);
+		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+	}
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
+		/*
+		 * The only way this can fail is if we run out of memory, or we
+		 * were unable to read the disk's capacity
+		 */
+		if (errno == ENOMEM)
+			(void) no_memory(hdl);
+
+		(void) close(fd);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "label '%s': unable to read disk capacity"), path);
+
+		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	}
+
+	slice_size = vtoc->efi_last_u_lba + 1;
+	slice_size -= EFI_MIN_RESV_SIZE;
+	if (start_block == MAXOFFSET_T)
+		start_block = NEW_START_BLOCK;
+	slice_size -= start_block;
+	slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
+
+	vtoc->efi_parts[0].p_start = start_block;
+	vtoc->efi_parts[0].p_size = slice_size;
+
+	/*
+	 * Why we use V_USR: V_BACKUP confuses users, and is considered
+	 * disposable by some EFI utilities (since EFI doesn't have a backup
+	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
+	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
+	 * etc. were all pretty specific.  V_USR is as close to reality as we
+	 * can get, in the absence of V_OTHER.
+	 */
+	vtoc->efi_parts[0].p_tag = V_USR;
+	zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
+
+	vtoc->efi_parts[8].p_start = slice_size + start_block;
+	vtoc->efi_parts[8].p_size = resv;
+	vtoc->efi_parts[8].p_tag = V_RESERVED;
+
+	rval = efi_write(fd, vtoc);
+
+	/* Flush the buffers to disk and invalidate the page cache. */
+	(void) fsync(fd);
+//	(void) ioctl(fd, BLKFLSBUF);
+
+	if (rval == 0)
+		rval = efi_rescan(fd);
+
+	/*
+	 * Some block drivers (like pcata) may not support EFI GPT labels.
+	 * Print out a helpful error message directing the user to manually
+	 * label the disk and give a specific slice.
+	 */
+	if (rval != 0) {
+		(void) close(fd);
+		efi_free(vtoc);
+
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
+		    "parted(8) and then provide a specific slice: %d"), rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	(void) close(fd);
+	efi_free(vtoc);
+
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	(void) zfs_append_partition(path, MAXPATHLEN);
+
+	/* Wait to udev to signal use the device has settled. */
+	rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
+		    "detect device partitions on '%s': %d"), path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+
+	/* We can't be to paranoid.  Read the label back and verify it. */
+	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+	rval = zpool_label_disk_check(path);
+	if (rval) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
+		    "EFI label on '%s' is damaged.  Ensure\nthis device "
+		    "is not in use, and is functioning properly: %d"),
+		    path, rval);
+		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+	}
+	return (0);
+}
diff --git a/lib/libzfs/os/macos/libzfs_util_os.c b/lib/libzfs/os/macos/libzfs_util_os.c
new file mode 100644
index 0000000000..cd732786ad
--- /dev/null
+++ b/lib/libzfs/os/macos/libzfs_util_os.c
@@ -0,0 +1,575 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <sys/mnttab.h>
+#include <sys/mntent.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/sysctl.h>
+
+#include <libzfs.h>
+#include <libzfs_core.h>
+
+#include "libzfs_impl.h"
+#include "zfs_prop.h"
+#include <libzutil.h>
+#include <sys/zfs_sysfs.h>
+
+#define	ZDIFF_SHARESDIR		"/.zfs/shares/"
+
+
+int
+zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
+{
+	return (zfs_ioctl_fd(hdl->libzfs_fd, request, zc));
+}
+
+const char *
+libzfs_error_init(int error)
+{
+	switch (error) {
+	case ENXIO:
+		return (dgettext(TEXT_DOMAIN, "The ZFS modules are not "
+		    "loaded.\nTry running '/sbin/kextload zfs.kext' as root "
+		    "to load them."));
+	case ENOENT:
+		return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts "
+		    "are required.\nTry running 'udevadm trigger' and 'mount "
+		    "-t proc proc /proc' as root."));
+	case ENOEXEC:
+		return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be "
+		    "auto-loaded.\nTry running '/sbin/kextload zfs.kext' as "
+		    "root to manually load them."));
+	case EACCES:
+		return (dgettext(TEXT_DOMAIN, "Permission denied the "
+		    "ZFS utilities must be run as root."));
+	default:
+		return (dgettext(TEXT_DOMAIN, "Failed to initialize the "
+		    "libzfs library."));
+	}
+}
+
+static int
+libzfs_module_loaded(const char *module)
+{
+	const char path_prefix[] = "/dev/";
+	char path[256];
+
+	memcpy(path, path_prefix, sizeof (path_prefix) - 1);
+	strcpy(path + sizeof (path_prefix) - 1, module);
+
+	return (access(path, F_OK) == 0);
+}
+
+/*
+ * Verify the required ZFS_DEV device is available and optionally attempt
+ * to load the ZFS modules.  Under normal circumstances the modules
+ * should already have been loaded by some external mechanism.
+ *
+ * Environment variables:
+ * - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules.
+ * - ZFS_MODULE_TIMEOUT="<seconds>"     - Seconds to wait for ZFS_DEV
+ */
+static int
+libzfs_load_module_impl(const char *module)
+{
+	char *argv[4] = {"/sbin/kextload", (char *)module, (char *)0};
+	char *load_str, *timeout_str;
+	long timeout = 10; /* seconds */
+	long busy_timeout = 10; /* milliseconds */
+	int load = 0, fd;
+	hrtime_t start;
+
+	/* Optionally request module loading */
+	if (!libzfs_module_loaded(module)) {
+		load_str = getenv("ZFS_MODULE_LOADING");
+		if (load_str) {
+			if (!strncasecmp(load_str, "YES", strlen("YES")) ||
+			    !strncasecmp(load_str, "ON", strlen("ON")))
+				load = 1;
+			else
+				load = 0;
+		}
+
+		if (load) {
+			if (libzfs_run_process("/sbin/kextload", argv, 0))
+				return (ENOEXEC);
+		}
+
+		if (!libzfs_module_loaded(module))
+			return (ENXIO);
+	}
+
+	/*
+	 * Device creation by udev is asynchronous and waiting may be
+	 * required.  Busy wait for 10ms and then fall back to polling every
+	 * 10ms for the allowed timeout (default 10s, max 10m).  This is
+	 * done to optimize for the common case where the device is
+	 * immediately available and to avoid penalizing the possible
+	 * case where udev is slow or unable to create the device.
+	 */
+	timeout_str = getenv("ZFS_MODULE_TIMEOUT");
+	if (timeout_str) {
+		timeout = strtol(timeout_str, NULL, 0);
+		timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */
+	}
+
+	start = gethrtime();
+	do {
+		fd = open(ZFS_DEV, O_RDWR);
+		if (fd >= 0) {
+			(void) close(fd);
+			return (0);
+		} else if (errno != ENOENT) {
+			return (errno);
+		} else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) {
+			sched_yield();
+		} else {
+			usleep(10 * MILLISEC);
+		}
+	} while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC));
+
+	return (ENOENT);
+}
+
+int
+libzfs_load_module(void)
+{
+	return (libzfs_load_module_impl(ZFS_DRIVER));
+}
+
+int
+find_shares_object(differ_info_t *di)
+{
+	char fullpath[MAXPATHLEN];
+	struct stat64 sb = { 0 };
+
+	(void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN);
+	(void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN);
+
+	if (stat64(fullpath, &sb) != 0) {
+		(void) snprintf(di->errbuf, sizeof (di->errbuf),
+		    dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath);
+		return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf));
+	}
+
+	di->shares = (uint64_t)sb.st_ino;
+	return (0);
+}
+
+/*
+ * Fill given version buffer with zfs kernel version read from ZFS_SYSFS_DIR
+ * Returns 0 on success, and -1 on error (with errno set)
+ */
+int
+zfs_version_kernel(char *version, int len)
+{
+	size_t rlen = len;
+
+	if (sysctlbyname("zfs.kext_version",
+	    version, &rlen, NULL, 0) == -1)
+		return (-1);
+
+	return (0);
+}
+
+static int
+execvPe(const char *name, const char *path, char * const *argv,
+    char * const *envp)
+{
+	const char **memp;
+	size_t cnt, lp, ln;
+	int eacces, save_errno;
+	char *cur, buf[MAXPATHLEN];
+	const char *p, *bp;
+	struct stat sb;
+
+	eacces = 0;
+
+	/* If it's an absolute or relative path name, it's easy. */
+	if (strchr(name, '/')) {
+		bp = name;
+		cur = NULL;
+		goto retry;
+	}
+	bp = buf;
+
+	/* If it's an empty path name, fail in the usual POSIX way. */
+	if (*name == '\0') {
+		errno = ENOENT;
+		return (-1);
+	}
+
+	cur = alloca(strlen(path) + 1);
+	if (cur == NULL) {
+		errno = ENOMEM;
+		return (-1);
+	}
+	strcpy(cur, path);
+	while ((p = strsep(&cur, ":")) != NULL) {
+		/*
+		 * It's a SHELL path -- double, leading and trailing colons
+		 * mean the current directory.
+		 */
+		if (*p == '\0') {
+			p = ".";
+			lp = 1;
+		} else
+			lp = strlen(p);
+		ln = strlen(name);
+
+		/*
+		 * If the path is too long complain.  This is a possible
+		 * security issue; given a way to make the path too long
+		 * the user may execute the wrong program.
+		 */
+		if (lp + ln + 2 > sizeof (buf)) {
+			(void) write(STDERR_FILENO, "execvP: ", 8);
+			(void) write(STDERR_FILENO, p, lp);
+			(void) write(STDERR_FILENO, ": path too long\n",
+			    16);
+			continue;
+		}
+		bcopy(p, buf, lp);
+		buf[lp] = '/';
+		bcopy(name, buf + lp + 1, ln);
+		buf[lp + ln + 1] = '\0';
+
+retry:
+		(void) execve(bp, argv, envp);
+		switch (errno) {
+		case E2BIG:
+			goto done;
+		case ELOOP:
+		case ENAMETOOLONG:
+		case ENOENT:
+			break;
+		case ENOEXEC:
+			for (cnt = 0; argv[cnt]; ++cnt)
+				;
+			memp = alloca((cnt + 2) * sizeof (char *));
+			if (memp == NULL) {
+				goto done;
+			}
+			memp[0] = "sh";
+			memp[1] = bp;
+			bcopy(argv + 1, memp + 2, cnt * sizeof (char *));
+			execve(_PATH_BSHELL, __DECONST(char **, memp),
+			    envp);
+			goto done;
+		case ENOMEM:
+			goto done;
+		case ENOTDIR:
+			break;
+		case ETXTBSY:
+			/*
+			 * We used to retry here, but sh(1) doesn't.
+			 */
+			goto done;
+		default:
+			/*
+			 * EACCES may be for an inaccessible directory or
+			 * a non-executable file.  Call stat() to decide
+			 * which.  This also handles ambiguities for EFAULT
+			 * and EIO, and undocumented errors like ESTALE.
+			 * We hope that the race for a stat() is unimportant.
+			 */
+			save_errno = errno;
+			if (stat(bp, &sb) != 0)
+				break;
+			if (save_errno == EACCES) {
+				eacces = 1;
+				continue;
+			}
+			errno = save_errno;
+			goto done;
+		}
+	}
+	if (eacces)
+		errno = EACCES;
+	else
+		errno = ENOENT;
+done:
+	return (-1);
+}
+
+int
+execvpe(const char *name, char * const argv[], char * const envp[])
+{
+	const char *path;
+
+	/* Get the path we're searching. */
+	if ((path = getenv("PATH")) == NULL)
+		path = _PATH_DEFPATH;
+
+	return (execvPe(name, path, argv, envp));
+}
+
+#include <objc/objc.h>
+#include <objc/runtime.h>
+#include <objc/message.h>
+
+extern void libzfs_refresh_finder(char *);
+
+/*
+ * To tell Finder to refresh is relatively easy from Obj-C, but as this
+ * would be the only function to use Obj-C (and only .m), the following code:
+ * void libzfs_refresh_finder(char *mountpoint)
+ * {
+ *    [[NSWorkspace sharedWorkspace] noteFileSystemChanged:[NSString
+ *         stringWithUTF8String:mountpoint]];
+ * }
+ * Has been converted to C to keep autoconf simpler. If in future we have
+ * more Obj-C source files, then we should re-address this.
+ */
+void
+libzfs_refresh_finder(char *path)
+{
+	Class NSWorkspace = objc_getClass("NSWorkspace");
+	Class NSString = objc_getClass("NSString");
+	SEL stringWithUTF8String = sel_registerName("stringWithUTF8String:");
+	SEL sharedWorkspace = sel_registerName("sharedWorkspace");
+	SEL noteFileSystemChanged = sel_registerName("noteFileSystemChanged:");
+	id ns_path = ((id(*)(Class, SEL, char *))objc_msgSend)(NSString,
+	    stringWithUTF8String, path);
+	id workspace = ((id(*)(Class, SEL))objc_msgSend)(NSWorkspace,
+	    sharedWorkspace);
+	((id(*)(id, SEL, id))objc_msgSend)(workspace, noteFileSystemChanged,
+	    ns_path);
+}
+
+void
+zfs_rollback_os(zfs_handle_t *zhp)
+{
+	char sourceloc[ZFS_MAX_DATASET_NAME_LEN];
+	char mountpoint[ZFS_MAXPROPLEN];
+	zprop_source_t sourcetype;
+
+	if (zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
+	    B_FALSE)) {
+		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
+		    mountpoint, sizeof (mountpoint),
+		    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0)
+			libzfs_refresh_finder(mountpoint);
+	}
+}
+
+struct pipe2file {
+	int from;
+	int to;
+};
+typedef struct pipe2file pipe2file_t;
+
+#include <poll.h>
+
+static void *
+pipe_io_relay(void *arg)
+{
+	pipe2file_t *p2f = (pipe2file_t *)arg;
+	int readfd, writefd;
+	unsigned char *buffer;
+	unsigned char space[1024];
+	int size = 1024 * 1024;
+	int red, sent;
+	uint64_t total = 0;
+
+	readfd = p2f->from;
+	writefd = p2f->to;
+	free(p2f);
+	p2f = NULL;
+
+	buffer = malloc(size);
+	if (buffer == NULL) {
+		buffer = space;
+		size = sizeof (space);
+	}
+
+	fprintf(stderr, "%s: thread up: read(%d) write(%d)\r\n", __func__,
+	    readfd, writefd);
+
+	for (;;) {
+
+		red = read(readfd, buffer, size);
+		// fprintf(stderr, "%s: read(%d): %d (errno %d)\r\n", __func__,
+		//	readfd, red, errno);
+		if (red == 0)
+			break;
+		if (red < 0 && errno != EWOULDBLOCK)
+			break;
+		sent = write(writefd, buffer, red);
+		// fprintf(stderr, "%s: write(%d): %d (errno %d)\r\n", __func__,
+		//	writefd, sent, errno);
+		if (sent < 0)
+			break;
+		total += red;
+	}
+
+	/*
+	 * It seems unlikely that this code is ever reached, as the process
+	 * calls exit() when done, and this thread is terminated.
+	 */
+
+	fprintf(stderr, "loop exit\r\n");
+
+	close(readfd);
+	close(writefd);
+
+	if (buffer != space)
+		free(buffer);
+
+	fprintf(stderr, "%s: thread done: %llu bytes\r\n", __func__, total);
+	return (NULL);
+}
+
+/*
+ * XNU only lets us do IO on vnodes, not pipes, so create a Unix
+ * Domain socket, open it to get a vnode for the kernel, and spawn
+ * thread to relay IO.
+ */
+void
+libzfs_macos_wrapfd(int *srcfd, boolean_t send)
+{
+	char template[100];
+	int readfd = -1;
+	int writefd = -1;
+	int error;
+	struct stat sb;
+	pipe2file_t *p2f = NULL;
+
+	fprintf(stderr, "%s: checking if we need pipe wrap\r\n", __func__);
+
+	// Check if it is a pipe
+	error = fstat(*srcfd, &sb);
+
+	if (error != 0)
+		return;
+
+	if (!S_ISFIFO(sb.st_mode))
+		return;
+
+	p2f = (pipe2file_t *)malloc(sizeof (pipe2file_t));
+	if (p2f == NULL)
+		return;
+
+	fprintf(stderr, "%s: is pipe: work on fd %d\r\n", __func__, *srcfd);
+
+	snprintf(template, sizeof (template), "/tmp/.zfs.pipe.XXXXXX");
+
+	mktemp(template);
+
+	mkfifo(template, 0600);
+
+	readfd = open(template, O_RDONLY | O_NONBLOCK);
+
+	fprintf(stderr, "%s: readfd %d (%d)\r\n", __func__, readfd, error);
+
+	writefd = open(template, O_WRONLY | O_NONBLOCK);
+
+	fprintf(stderr, "%s: writefd %d (%d)\r\n", __func__, writefd, error);
+
+	// set it to delete
+	unlink(template);
+
+	// Check delayed so unlink() is always called.
+	if (readfd < 0)
+		goto out;
+	if (writefd < 0)
+		goto out;
+
+	/* Open needs NONBLOCK, so switch back to BLOCK */
+	int flags;
+	flags = fcntl(readfd, F_GETFL);
+	flags &= ~O_NONBLOCK;
+	fcntl(readfd, F_SETFL, flags);
+	flags = fcntl(writefd, F_GETFL);
+	flags &= ~O_NONBLOCK;
+	fcntl(writefd, F_SETFL, flags);
+
+	// create IO thread
+
+	// Send, kernel was to be given *srcfd - to write to.
+	// Instead we give it writefd.
+	// thread then uses read(readfd) -> write(*srcfd)
+	if (send) {
+		p2f->from = readfd;
+		p2f->to = *srcfd;
+	} else {
+		p2f->from = *srcfd;
+		p2f->to = writefd;
+	}
+	fprintf(stderr, "%s: spawning thread\r\n", __func__);
+
+	// pthread kills all threads on exit, and pipe_io_relay may not
+	// have fully completed.
+	error = fork();
+	if (error == 0) {
+
+		// Close the fd we don't need
+		if (send)
+			close(writefd);
+		else
+			close(readfd);
+
+		setsid();
+		pipe_io_relay(p2f);
+		_exit(0);
+	}
+
+	if (error < 0)
+		goto out;
+
+	// Return open(file) fd to kernel only after all error cases
+	if (send) {
+		*srcfd = writefd;
+		close(readfd);
+	} else {
+		*srcfd = readfd;
+		close(writefd);
+	}
+	return;
+
+out:
+	if (p2f != NULL)
+		free(p2f);
+
+	if (readfd >= 0)
+		close(readfd);
+
+	if (writefd >= 0)
+		close(writefd);
+}
+
+void
+libzfs_set_pipe_max(int infd)
+{
+	/* macOS automatically resizes */
+}
diff --git a/lib/libzutil/os/macos/zutil_compat.c b/lib/libzutil/os/macos/zutil_compat.c
new file mode 100644
index 0000000000..b46263ac27
--- /dev/null
+++ b/lib/libzutil/os/macos/zutil_compat.c
@@ -0,0 +1,94 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/zfs_ioctl.h>
+#include <os/macos/zfs/sys/zfs_ioctl_compat.h>
+#include <libzutil.h>
+
+static int
+zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag)
+{
+	int ret;
+	void *zc_c;
+	unsigned long ncmd;
+	zfs_iocparm_t zp;
+
+	switch (cflag) {
+	case ZFS_CMD_COMPAT_NONE:
+		ncmd = _IOWR('Z', request, zfs_iocparm_t);
+		zp.zfs_cmd = (uint64_t)zc;
+		zp.zfs_cmd_size = sizeof (zfs_cmd_t);
+		zp.zfs_ioctl_version = ZFS_IOCVER_ZOF;
+		zp.zfs_ioc_error = 0;
+
+		ret = ioctl(fd, ncmd, &zp);
+
+		/*
+		 * If ioctl worked, get actual rc from kernel, which goes
+		 * into errno, and return -1 if not-zero.
+		 */
+		if (ret == 0) {
+			errno = zp.zfs_ioc_error;
+			if (zp.zfs_ioc_error != 0)
+				ret = -1;
+		}
+		return (ret);
+
+	default:
+		abort();
+		return (EINVAL);
+	}
+
+	/* Pass-through ioctl, rarely used if at all */
+
+	ret = ioctl(fd, ncmd, zc_c);
+	ASSERT0(ret);
+
+	zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag);
+	free(zc_c);
+
+	return (ret);
+}
+
+/*
+ * This is the macOS version of ioctl(). Because the XNU kernel
+ * handles copyin() and copyout(), we must return success from the
+ * ioctl() handler (or it will not copyout() for userland),
+ * and instead embed the error return value in the zc structure.
+ */
+int
+zfs_ioctl_fd(int fd, unsigned long request, zfs_cmd_t *zc)
+{
+	size_t oldsize;
+	int ret, cflag = ZFS_CMD_COMPAT_NONE;
+
+	oldsize = zc->zc_nvlist_dst_size;
+	ret = zcmd_ioctl_compat(fd, request, zc, cflag);
+
+	if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
+		ret = -1;
+		errno = ENOMEM;
+	}
+
+	return (ret);
+}
diff --git a/lib/libzutil/os/macos/zutil_device_path_os.c b/lib/libzutil/os/macos/zutil_device_path_os.c
new file mode 100644
index 0000000000..dec0281be9
--- /dev/null
+++ b/lib/libzutil/os/macos/zutil_device_path_os.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/stat.h>
+#include <sys/efi_partition.h>
+
+#include <libzutil.h>
+
+/*
+ * We don't strip/append partitions on FreeBSD.
+ */
+
+/*
+ * Note: The caller must free the returned string.
+ */
+char *
+zfs_strip_partition(char *dev)
+{
+	return (strdup(dev));
+}
+
+int
+zfs_append_partition(char *path, size_t max_len)
+{
+	return (strnlen(path, max_len));
+}
+
+/*
+ * Strip the path from a device name.
+ * On FreeBSD we only want to remove "/dev/" from the beginning of
+ * paths if present.
+ */
+char *
+zfs_strip_path(char *path)
+{
+	if (strncmp(path, _PATH_DEV, sizeof (_PATH_DEV) - 1) == 0)
+		return (path + sizeof (_PATH_DEV) - 1);
+	else
+		return (path);
+}
+
+char *
+zfs_get_underlying_path(const char *dev_name)
+{
+
+	if (dev_name == NULL)
+		return (NULL);
+
+	return (realpath(dev_name, NULL));
+}
+
+boolean_t
+zfs_dev_is_whole_disk(const char *dev_name)
+{
+	struct dk_gpt *label;
+	int fd;
+
+	if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
+		return (B_FALSE);
+
+	if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+		(void) close(fd);
+		return (B_FALSE);
+	}
+
+	efi_free(label);
+	(void) close(fd);
+
+	return (B_TRUE);
+}
+
+/*
+ * Wait up to timeout_ms for udev to set up the device node.  The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle.  At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(const char *path, int timeout_ms)
+{
+	int settle_ms = 50;
+	long sleep_ms = 10;
+	hrtime_t start, settle;
+	struct stat64 statbuf;
+
+	start = gethrtime();
+	settle = 0;
+
+	do {
+		errno = 0;
+		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
+			if (settle == 0)
+				settle = gethrtime();
+			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
+				return (0);
+		} else if (errno != ENOENT) {
+			return (errno);
+		}
+
+		usleep(sleep_ms * MILLISEC);
+	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+	return (ENODEV);
+}
+
+/* ARGSUSED */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+	return (B_FALSE);
+}
+
+/*
+ * Return B_TRUE if device is a device mapper or multipath device.
+ * Return B_FALSE if not.
+ */
+boolean_t
+zfs_dev_is_dm(const char *dev_name)
+{
+	return (B_FALSE);
+}
diff --git a/lib/libzutil/os/macos/zutil_import_os.c b/lib/libzutil/os/macos/zutil_import_os.c
new file mode 100644
index 0000000000..251096301c
--- /dev/null
+++ b/lib/libzutil/os/macos/zutil_import_os.c
@@ -0,0 +1,483 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+/*
+ * Pool import support functions.
+ *
+ * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
+ * these commands are expected to run in the global zone, we can assume
+ * that the devices are all readable when called.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device.  If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ *	pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded.  Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed.  Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+#include <sys/vdev_impl.h>
+
+#include <thread_pool.h>
+#include <libzutil.h>
+#include <libnvpair.h>
+
+#include "zutil_import.h"
+
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#include <sched.h>
+#endif
+
+#define	DEFAULT_IMPORT_PATH_SIZE	9
+#define	DEV_BYID_PATH	"/dev/disk/by-id/"
+
+static boolean_t
+is_watchdog_dev(char *dev)
+{
+	/* For 'watchdog' dev */
+	if (strcmp(dev, "watchdog") == 0)
+		return (B_TRUE);
+
+	/* For 'watchdog<digit><whatever> */
+	if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+int
+zfs_dev_flush(int fd)
+{
+//	return (ioctl(fd, BLKFLSBUF));
+	return (0);
+}
+
+void
+zpool_open_func(void *arg)
+{
+	rdsk_node_t *rn = arg;
+	libpc_handle_t *hdl = rn->rn_hdl;
+	struct stat64 statbuf;
+	nvlist_t *config;
+	char *bname, *dupname;
+	uint64_t vdev_guid = 0;
+	int error;
+	int num_labels = 0;
+	int fd;
+
+	/*
+	 * Skip devices with well known prefixes there can be side effects
+	 * when opening devices which need to be avoided.
+	 *
+	 * hpet     - High Precision Event Timer
+	 * watchdog - Watchdog must be closed in a special way.
+	 */
+	dupname = zutil_strdup(hdl, rn->rn_name);
+	bname = basename(dupname);
+	error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
+	free(dupname);
+	if (error)
+		return;
+
+	/*
+	 * Ignore failed stats.  We only want regular files and block devices.
+	 */
+	if (stat64(rn->rn_name, &statbuf) != 0 ||
+	    (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
+		return;
+
+	fd = open(rn->rn_name, O_RDONLY);
+	if ((fd < 0) && (errno == EINVAL))
+		fd = open(rn->rn_name, O_RDONLY);
+	if ((fd < 0) && (errno == EACCES))
+		hdl->lpc_open_access_error = B_TRUE;
+	if (fd < 0)
+		return;
+
+	/*
+	 * This file is too small to hold a zpool
+	 */
+	if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
+		(void) close(fd);
+		return;
+	}
+
+	error = zpool_read_label(fd, &config, &num_labels);
+	if (error != 0) {
+		(void) close(fd);
+		return;
+	}
+
+	if (num_labels == 0) {
+		(void) close(fd);
+		nvlist_free(config);
+		return;
+	}
+
+	/*
+	 * Check that the vdev is for the expected guid.  Additional entries
+	 * are speculatively added based on the paths stored in the labels.
+	 * Entries with valid paths but incorrect guids must be removed.
+	 */
+	error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
+	if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
+		(void) close(fd);
+		nvlist_free(config);
+		return;
+	}
+
+	(void) close(fd);
+
+	rn->rn_config = config;
+	rn->rn_num_labels = num_labels;
+
+	/*
+	 * Add additional entries for paths described by this label.
+	 */
+	if (rn->rn_labelpaths) {
+		char *path = NULL;
+		char *devid = NULL;
+		char *env = NULL;
+		rdsk_node_t *slice;
+		avl_index_t where;
+		int timeout;
+		int error;
+
+		if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
+			return;
+
+		env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
+		if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
+		    timeout < 0) {
+			timeout = DISK_LABEL_WAIT;
+		}
+
+		/*
+		 * Allow devlinks to stabilize so all paths are available.
+		 */
+		zpool_label_disk_wait(rn->rn_name, timeout);
+
+		if (path != NULL) {
+			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+			slice->rn_name = zutil_strdup(hdl, path);
+			slice->rn_vdev_guid = vdev_guid;
+			slice->rn_avl = rn->rn_avl;
+			slice->rn_hdl = hdl;
+			slice->rn_order = IMPORT_ORDER_PREFERRED_1;
+			slice->rn_labelpaths = B_FALSE;
+			pthread_mutex_lock(rn->rn_lock);
+			if (avl_find(rn->rn_avl, slice, &where)) {
+			pthread_mutex_unlock(rn->rn_lock);
+				free(slice->rn_name);
+				free(slice);
+			} else {
+				avl_insert(rn->rn_avl, slice, where);
+				pthread_mutex_unlock(rn->rn_lock);
+				zpool_open_func(slice);
+			}
+		}
+
+		if (devid != NULL) {
+			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+			error = asprintf(&slice->rn_name, "%s%s",
+			    DEV_BYID_PATH, devid);
+			if (error == -1) {
+				free(slice);
+				return;
+			}
+
+			slice->rn_vdev_guid = vdev_guid;
+			slice->rn_avl = rn->rn_avl;
+			slice->rn_hdl = hdl;
+			slice->rn_order = IMPORT_ORDER_PREFERRED_2;
+			slice->rn_labelpaths = B_FALSE;
+			pthread_mutex_lock(rn->rn_lock);
+			if (avl_find(rn->rn_avl, slice, &where)) {
+				pthread_mutex_unlock(rn->rn_lock);
+				free(slice->rn_name);
+				free(slice);
+			} else {
+				avl_insert(rn->rn_avl, slice, where);
+				pthread_mutex_unlock(rn->rn_lock);
+				zpool_open_func(slice);
+			}
+		}
+	}
+}
+
+static char *
+zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
+	"/private/var/run/disk/by-id",
+	"/private/var/run/disk/by-path",
+#ifndef __UNSAFE_DEFAULT_IMPORT_PATH__
+	"/private/var/run/disk/by-serial"
+#else
+	"/private/var/run/disk/by-serial",
+	"/dev"	/* UNSAFE device names will change */
+#endif /* !__UNSAFE_DEFAULT_IMPORT_PATH__ */
+};
+
+const char * const *
+zpool_default_search_paths(size_t *count)
+{
+	*count = DEFAULT_IMPORT_PATH_SIZE;
+	return ((const char * const *)zpool_default_import_path);
+}
+
+int
+zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+    avl_tree_t **slice_cache)
+{
+	int i, dirs;
+	struct dirent *dp;
+	char path[MAXPATHLEN];
+	char *end, **dir;
+	size_t pathleft;
+	avl_index_t where;
+	rdsk_node_t *slice;
+
+	dir = zpool_default_import_path;
+	dirs = DEFAULT_IMPORT_PATH_SIZE;
+
+	/*
+	 * Go through and read the label configuration information from every
+	 * possible device, organizing the information according to pool GUID
+	 * and toplevel GUID.
+	 */
+	for (i = 0; i < dirs; i++) {
+		char rdsk[MAXPATHLEN];
+		int dfd;
+		DIR *dirp;
+
+		/* use realpath to normalize the path */
+		if (realpath(dir[i], path) == 0) {
+
+			/* it is safe to skip missing search paths */
+			if (errno == ENOENT)
+				continue;
+
+			return (EPERM);
+		}
+		end = &path[strlen(path)];
+		*end++ = '/';
+		*end = 0;
+		pathleft = &path[sizeof (path)] - end;
+
+		(void) strlcpy(rdsk, path, sizeof (rdsk));
+
+		if ((dfd = open(rdsk, O_RDONLY)) < 0 ||
+		    (dirp = fdopendir(dfd)) == NULL) {
+			if (dfd >= 0)
+				(void) close(dfd);
+			return (ENOENT);
+		}
+
+		*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
+		avl_create(*slice_cache, slice_cache_compare,
+		    sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
+
+		while ((dp = readdir(dirp)) != NULL) {
+			const char *name = dp->d_name;
+			if (name[0] == '.' &&
+			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+				continue;
+
+			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+			slice->rn_name = zutil_strdup(hdl, path);
+			slice->rn_vdev_guid = 0;
+			slice->rn_lock = lock;
+			slice->rn_avl = *slice_cache;
+			slice->rn_hdl = hdl;
+			slice->rn_labelpaths = B_FALSE;
+			slice->rn_order = IMPORT_ORDER_DEFAULT;
+
+			pthread_mutex_lock(lock);
+			if (avl_find(*slice_cache, slice, &where)) {
+				free(slice->rn_name);
+				free(slice);
+			} else {
+				avl_insert(*slice_cache, slice, where);
+			}
+			pthread_mutex_unlock(lock);
+		}
+
+		(void) closedir(dirp);
+	}
+
+	return (0);
+}
+
+/*
+ * Linux persistent device strings for vdev labels
+ *
+ * based on libudev for consistency with libudev disk add/remove events
+ */
+
+typedef struct vdev_dev_strs {
+	char	vds_devid[128];
+	char	vds_devphys[128];
+} vdev_dev_strs_t;
+
+/* ARGSUSED */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	return (ENODATA);
+}
+
+/* ARGSUSED */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+	return (ENODATA);
+}
+
+/*
+ * Encode the persistent devices strings
+ * used for the vdev disk label
+ */
+static int
+encode_device_strings(const char *path, vdev_dev_strs_t *ds,
+    boolean_t wholedisk)
+{
+	return (ENOENT);
+}
+
+/*
+ * Update a leaf vdev's persistent device strings
+ *
+ * - only applies for a dedicated leaf vdev (aka whole disk)
+ * - updated during pool create|add|attach|import
+ * - used for matching device matching during auto-{online,expand,replace}
+ * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
+ * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
+ *
+ * single device node example:
+ * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
+ * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
+ *
+ * multipath device node example:
+ * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
+ *
+ * We also store the enclosure sysfs path for turning on enclosure LEDs
+ * (if applicable):
+ *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ */
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+	vdev_dev_strs_t vds;
+	char *env, *type, *path;
+	uint64_t wholedisk = 0;
+	char *upath, *spath;
+
+	/*
+	 * For the benefit of legacy ZFS implementations, allow
+	 * for opting out of devid strings in the vdev label.
+	 *
+	 * example use:
+	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
+	 *
+	 * explanation:
+	 * Older ZFS on Linux implementations had issues when attempting to
+	 * display pool config VDEV names if a "devid" NVP value is present
+	 * in the pool's config.
+	 *
+	 * For example, a pool that originated on illumos platform would
+	 * have a devid value in the config and "zpool status" would fail
+	 * when listing the config.
+	 *
+	 * A pool can be stripped of any "devid" values on import or
+	 * prevented from adding them on zpool create|add by setting
+	 * ZFS_VDEV_DEVID_OPT_OUT.
+	 */
+	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
+	if (env && (strtoul(env, NULL, 0) > 0 ||
+	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+		return;
+	}
+
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+	    strcmp(type, VDEV_TYPE_DISK) != 0) {
+		return;
+	}
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		return;
+	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
+
+	/*
+	 * Update device string values in the config nvlist.
+	 */
+	if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
+		(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
+		if (vds.vds_devphys[0] != '\0') {
+			(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
+			    vds.vds_devphys);
+		}
+
+		/* Add enclosure sysfs path (if disk is in an enclosure). */
+		upath = zfs_get_underlying_path(path);
+		spath = zfs_get_enclosure_sysfs_path(upath);
+		if (spath)
+			nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+			    spath);
+		else
+			nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+
+		free(upath);
+		free(spath);
+	} else {
+		/* Clear out any stale entries. */
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+	}
+}
diff --git a/module/icp/asm-x86_64/os/macos/aes/aes_aesni.S b/module/icp/asm-x86_64/os/macos/aes/aes_aesni.S
new file mode 100644
index 0000000000..9be7f63151
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/aes/aes_aesni.S
@@ -0,0 +1,855 @@
+/*
+ * ====================================================================
+ * Written by Intel Corporation for the OpenSSL project to add support
+ * for Intel AES-NI instructions. Rights for redistribution and usage
+ * in source and binary forms are granted according to the OpenSSL
+ * license.
+ *
+ *   Author: Huang Ying <ying.huang at intel dot com>
+ *           Vinodh Gopal <vinodh.gopal at intel dot com>
+ *           Kahraman Akdemir
+ *
+ * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
+ * instructions that are going to be introduced in the next generation
+ * of Intel processor, as of 2009. These instructions enable fast and
+ * secure data encryption and decryption, using the Advanced Encryption
+ * Standard (AES), defined by FIPS Publication number 197. The
+ * architecture introduces six instructions that offer full hardware
+ * support for AES. Four of them support high performance data
+ * encryption and decryption, and the other two instructions support
+ * the AES key expansion procedure.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * OpenSolaris OS modifications
+ *
+ * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
+ * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
+ * Huang Ying of Intel to the openssl-dev mailing list under the subject
+ * of "Add support to Intel AES-NI instruction set for x86_64 platform".
+ *
+ * This OpenSolaris version has these major changes from the original source:
+ *
+ * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 2. Formatted code, added comments, and added #includes and #defines.
+ *
+ * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
+ * calling kpreempt_disable() and kpreempt_enable().
+ * If the TS bit is not set, Save and restore %xmm registers at the beginning
+ * and end of function calls (%xmm* registers are not saved and restored by
+ * during kernel thread preemption).
+ *
+ * 4. Renamed functions, reordered parameters, and changed return value
+ * to match OpenSolaris:
+ *
+ * OpenSSL interface:
+ *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ *		const int bits, AES_KEY *key);
+ *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ *		const int bits, AES_KEY *key);
+ *	Return values for above are non-zero on error, 0 on success.
+ *
+ *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ *		const AES_KEY *key);
+ *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ *		const AES_KEY *key);
+ *	typedef struct aes_key_st {
+ *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
+ *		int		rounds;
+ *		unsigned int	pad[3];
+ *	} AES_KEY;
+ * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
+ * (ks32) instead of 64-bit (ks64).
+ * Number of rounds (aka round count) is at offset 240 of AES_KEY.
+ *
+ * OpenSolaris OS interface (#ifdefs removed for readability):
+ *	int rijndael_key_setup_dec_intel(uint32_t rk[],
+ *		const uint32_t cipherKey[], uint64_t keyBits);
+ *	int rijndael_key_setup_enc_intel(uint32_t rk[],
+ *		const uint32_t cipherKey[], uint64_t keyBits);
+ *	Return values for above are 0 on error, number of rounds on success.
+ *
+ *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ *		const uint32_t pt[4], uint32_t ct[4]);
+ *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ *		const uint32_t pt[4], uint32_t ct[4]);
+ *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
+ *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
+ *
+ *	typedef union {
+ *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+ *	} aes_ks_t;
+ *	typedef struct aes_key {
+ *		aes_ks_t	encr_ks, decr_ks;
+ *		long double	align128;
+ *		int		flags, nr, type;
+ *	} aes_key_t;
+ *
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ *
+ * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
+ * ====================================================================
+ * Mac OS X modifications
+ * 1. Removed CR0.TS / STTS / CLTS since the XNU kernel can apparently use floating point
+ * registers without this.
+ *
+ * ====================================================================
+ */
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+
+/* ARGSUSED */
+void
+aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
+    uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
+    uint32_t pt[4]) {
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+    uint64_t keyBits) {
+	return (0);
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+   uint64_t keyBits) {
+	return (0);
+}
+
+
+#else	/* lint */
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL) && !defined(__APPLE__)
+	/*
+	 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
+	 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
+	 * uses it to pass P2 to syscall.
+	 * This also occurs with the STTS macro, but we dont care if
+	 * P2 (%rsi) is modified just before function exit.
+	 * The CLTS and STTS macros push and pop P1 (%rdi) already.
+	 */
+#ifdef __xpv
+#define	PROTECTED_CLTS \
+	push	%rsi; \
+	CLTS; \
+	pop	%rsi
+#else
+#define	PROTECTED_CLTS \
+	CLTS
+#endif	/* __xpv */
+
+#define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
+	push	%rbp; \
+	mov	%rsp, %rbp; \
+	movq	%cr0, tmpreg; \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	and	$-XMM_ALIGN, %rsp; \
+	sub	$(XMM_SIZE * 2), %rsp; \
+	movaps	%xmm0, 16(%rsp); \
+	movaps	%xmm1, (%rsp); \
+	jmp	2f; \
+1: \
+	PROTECTED_CLTS; \
+2:
+
+	/*
+	 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
+	 * otherwise set CR0_TS.
+	 */
+#define	SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	movaps	(%rsp), %xmm1; \
+	movaps	16(%rsp), %xmm0; \
+	jmp	2f; \
+1: \
+	STTS(tmpreg); \
+2: \
+	mov	%rbp, %rsp; \
+	pop	%rbp
+
+	/*
+	 * If CR0_TS is not set, align stack (with push %rbp) and push
+	 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
+	 */
+#define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
+	push	%rbp; \
+	mov	%rsp, %rbp; \
+	movq	%cr0, tmpreg; \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	and	$-XMM_ALIGN, %rsp; \
+	sub	$(XMM_SIZE * 7), %rsp; \
+	movaps	%xmm0, 96(%rsp); \
+	movaps	%xmm1, 80(%rsp); \
+	movaps	%xmm2, 64(%rsp); \
+	movaps	%xmm3, 48(%rsp); \
+	movaps	%xmm4, 32(%rsp); \
+	movaps	%xmm5, 16(%rsp); \
+	movaps	%xmm6, (%rsp); \
+	jmp	2f; \
+1: \
+	PROTECTED_CLTS; \
+2:
+
+
+	/*
+	 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
+	 * otherwise set CR0_TS.
+	 */
+#define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	movaps	(%rsp), %xmm6; \
+	movaps	16(%rsp), %xmm5; \
+	movaps	32(%rsp), %xmm4; \
+	movaps	48(%rsp), %xmm3; \
+	movaps	64(%rsp), %xmm2; \
+	movaps	80(%rsp), %xmm1; \
+	movaps	96(%rsp), %xmm0; \
+	jmp	2f; \
+1: \
+	STTS(tmpreg); \
+2: \
+	mov	%rbp, %rsp; \
+	pop	%rbp
+
+
+#else
+#define	PROTECTED_CLTS
+#define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
+#define	SET_TS_OR_POP_XMM0_XMM1(tmpreg)
+#define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
+#define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
+#endif	/* _KERNEL */
+
+
+/*
+ * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
+ * _key_expansion_256a(), _key_expansion_256b()
+ *
+ * Helper functions called by rijndael_key_setup_inc_intel().
+ * Also used indirectly by rijndael_key_setup_dec_intel().
+ *
+ * Input:
+ * %xmm0	User-provided cipher key
+ * %xmm1	Round constant
+ * Output:
+ * (%rcx)	AES key
+ */
+
+.align	4, 0x90
+_key_expansion_128:
+_key_expansion_256a:
+	pshufd	$0b11111111, %xmm1, %xmm1
+	shufps	$0b00010000, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	shufps	$0b10001100, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	pxor	%xmm1, %xmm0
+	movups	%xmm0, (%rcx)
+	add	$0x10, %rcx
+	ret
+	SET_SIZE(_key_expansion_128)
+	SET_SIZE(_key_expansion_256a)
+
+.align 4, 0x90
+_key_expansion_192a:
+	pshufd	$0b01010101, %xmm1, %xmm1
+	shufps	$0b00010000, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	shufps	$0b10001100, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	pxor	%xmm1, %xmm0
+
+	movups	%xmm2, %xmm5
+	movups	%xmm2, %xmm6
+	pslldq	$4, %xmm5
+	pshufd	$0b11111111, %xmm0, %xmm3
+	pxor	%xmm3, %xmm2
+	pxor	%xmm5, %xmm2
+
+	movups	%xmm0, %xmm1
+	shufps	$0b01000100, %xmm0, %xmm6
+	movups	%xmm6, (%rcx)
+	shufps	$0b01001110, %xmm2, %xmm1
+	movups	%xmm1, 0x10(%rcx)
+	add	$0x20, %rcx
+	ret
+	SET_SIZE(_key_expansion_192a)
+
+.align 4, 0x90
+_key_expansion_192b:
+	pshufd	$0b01010101, %xmm1, %xmm1
+	shufps	$0b00010000, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	shufps	$0b10001100, %xmm0, %xmm4
+	pxor	%xmm4, %xmm0
+	pxor	%xmm1, %xmm0
+
+	movups	%xmm2, %xmm5
+	pslldq	$4, %xmm5
+	pshufd	$0b11111111, %xmm0, %xmm3
+	pxor	%xmm3, %xmm2
+	pxor	%xmm5, %xmm2
+
+	movups	%xmm0, (%rcx)
+	add	$0x10, %rcx
+	ret
+	SET_SIZE(_key_expansion_192b)
+
+.align 4, 0x90
+_key_expansion_256b:
+	pshufd	$0b10101010, %xmm1, %xmm1
+	shufps	$0b00010000, %xmm2, %xmm4
+	pxor	%xmm4, %xmm2
+	shufps	$0b10001100, %xmm2, %xmm4
+	pxor	%xmm4, %xmm2
+	pxor	%xmm1, %xmm2
+	movups	%xmm2, (%rcx)
+	add	$0x10, %rcx
+	ret
+	SET_SIZE(_key_expansion_256b)
+
+
+/*
+ * rijndael_key_setup_enc_intel()
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called.  This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
+ * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+ *	uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ *	const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+
+#ifdef	OPENSSL_INTERFACE
+#define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
+#define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
+
+#define	USERCIPHERKEY		rdi	/* P1, 64 bits */
+#define	KEYSIZE32		esi	/* P2, 32 bits */
+#define	KEYSIZE64		rsi	/* P2, 64 bits */
+#define	AESKEY			rdx	/* P3, 64 bits */
+
+#else	/* OpenSolaris Interface */
+#define	AESKEY			rdi	/* P1, 64 bits */
+#define	USERCIPHERKEY		rsi	/* P2, 64 bits */
+#define	KEYSIZE32		edx	/* P3, 32 bits */
+#define	KEYSIZE64		rdx	/* P3, 64 bits */
+#endif	/* OPENSSL_INTERFACE */
+
+#define	ROUNDS32		KEYSIZE32	/* temp */
+#define	ROUNDS64		KEYSIZE64	/* temp */
+#define	ENDAESKEY		USERCIPHERKEY	/* temp */
+
+ENTRY_NP(rijndael_key_setup_enc_intel)
+rijndael_key_setup_enc_intel_local:
+	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
+
+	// NULL pointer sanity check
+	test	%USERCIPHERKEY, %USERCIPHERKEY
+	jz	.Lenc_key_invalid_param
+	test	%AESKEY, %AESKEY
+	jz	.Lenc_key_invalid_param
+
+	movups	(%USERCIPHERKEY), %xmm0	// user key (first 16 bytes)
+	movups	%xmm0, (%AESKEY)
+	lea	0x10(%AESKEY), %rcx	// key addr
+	pxor	%xmm4, %xmm4		// xmm4 is assumed 0 in _key_expansion_x
+
+	cmp	$256, %KEYSIZE32
+	jnz	.Lenc_key192
+
+	// AES 256: 14 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+	mov	$14, %ROUNDS32
+	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 14
+#endif	/* OPENSSL_INTERFACE */
+
+	movups	0x10(%USERCIPHERKEY), %xmm2	// other user key (2nd 16 bytes)
+	movups	%xmm2, (%rcx)
+	add	$0x10, %rcx
+
+	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x1, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x2, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x4, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x8, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x10, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+	aeskeygenassist $0x20, %xmm0, %xmm1
+	call	_key_expansion_256b
+	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_256a
+
+	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
+#ifdef	OPENSSL_INTERFACE
+	xor	%rax, %rax			// return 0 (OK)
+#else	/* Open Solaris Interface */
+	mov	$14, %rax			// return # rounds = 14
+#endif
+	ret
+
+.align 4
+.Lenc_key192:
+	cmp	$192, %KEYSIZE32
+	jnz	.Lenc_key128
+
+	// AES 192: 12 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+	mov	$12, %ROUNDS32
+	movl	%ROUNDS32, 240(%AESKEY)	// key.rounds = 12
+#endif	/* OPENSSL_INTERFACE */
+
+	movq	0x10(%USERCIPHERKEY), %xmm2	// other user key
+	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192a
+	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192b
+	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192a
+	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192b
+	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192a
+	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192b
+	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192a
+	aeskeygenassist $0x80, %xmm2, %xmm1	// expand the key
+	call	_key_expansion_192b
+
+	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
+#ifdef	OPENSSL_INTERFACE
+	xor	%rax, %rax			// return 0 (OK)
+#else	/* OpenSolaris Interface */
+	mov	$12, %rax			// return # rounds = 12
+#endif
+	ret
+
+.align 4
+.Lenc_key128:
+	cmp $128, %KEYSIZE32
+	jnz .Lenc_key_invalid_key_bits
+
+	// AES 128: 10 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+	mov	$10, %ROUNDS32
+	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 10
+#endif	/* OPENSSL_INTERFACE */
+
+	aeskeygenassist $0x1, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x2, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x4, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x8, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x10, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x20, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x40, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x80, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x1b, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+	aeskeygenassist $0x36, %xmm0, %xmm1	// expand the key
+	call	_key_expansion_128
+
+	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
+#ifdef	OPENSSL_INTERFACE
+	xor	%rax, %rax			// return 0 (OK)
+#else	/* OpenSolaris Interface */
+	mov	$10, %rax			// return # rounds = 10
+#endif
+	ret
+
+.Lenc_key_invalid_param:
+#ifdef	OPENSSL_INTERFACE
+	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
+	mov	$-1, %rax	// user key or AES key pointer is NULL
+	ret
+#else
+	/* FALLTHROUGH */
+#endif	/* OPENSSL_INTERFACE */
+
+.Lenc_key_invalid_key_bits:
+	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
+#ifdef	OPENSSL_INTERFACE
+	mov	$-2, %rax	// keysize is invalid
+#else	/* Open Solaris Interface */
+	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
+#endif	/* OPENSSL_INTERFACE */
+
+	ret
+	SET_SIZE(rijndael_key_setup_enc_intel)
+
+
+/*
+ * rijndael_key_setup_dec_intel()
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called.  This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
+ * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+ *	uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ * P1->P2, P2->P3, P3->P1
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ *	const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+ENTRY_NP(rijndael_key_setup_dec_intel)
+	// Generate round keys used for encryption
+	call	rijndael_key_setup_enc_intel_local
+	test	%rax, %rax
+#ifdef	OPENSSL_INTERFACE
+	jnz	.Ldec_key_exit	// Failed if returned non-0
+#else	/* OpenSolaris Interface */
+	jz	.Ldec_key_exit	// Failed if returned 0
+#endif	/* OPENSSL_INTERFACE */
+
+	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
+
+	/*
+	 * Convert round keys used for encryption
+	 * to a form usable for decryption
+	 */
+#ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
+	mov	%rax, %ROUNDS64		// set # rounds (10, 12, or 14)
+					// (already set for OpenSSL)
+#endif
+
+	lea	0x10(%AESKEY), %rcx	// key addr
+	shl	$4, %ROUNDS32
+	add	%AESKEY, %ROUNDS64
+	mov	%ROUNDS64, %ENDAESKEY
+
+.align 4
+.Ldec_key_reorder_loop:
+	movups	(%AESKEY), %xmm0
+	movups	(%ROUNDS64), %xmm1
+	movups	%xmm0, (%ROUNDS64)
+	movups	%xmm1, (%AESKEY)
+	lea	0x10(%AESKEY), %AESKEY
+	lea	-0x10(%ROUNDS64), %ROUNDS64
+	cmp	%AESKEY, %ROUNDS64
+	ja	.Ldec_key_reorder_loop
+
+.align 4
+.Ldec_key_inv_loop:
+	movups	(%rcx), %xmm0
+	// Convert an encryption round key to a form usable for decryption
+	// with the "AES Inverse Mix Columns" instruction
+	aesimc	%xmm0, %xmm1
+	movups	%xmm1, (%rcx)
+	lea	0x10(%rcx), %rcx
+	cmp	%ENDAESKEY, %rcx
+	jnz	.Ldec_key_inv_loop
+
+	SET_TS_OR_POP_XMM0_XMM1(%r10)
+
+.Ldec_key_exit:
+	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
+	// OpenSSL: rax = 0 for OK, or non-zero for error
+	ret
+	SET_SIZE(rijndael_key_setup_dec_intel)
+
+
+/*
+ * aes_encrypt_intel()
+ * Encrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called.  This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
+ * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0	State
+ * %xmm1	Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ *	const uint32_t pt[4], uint32_t ct[4])
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ *	const AES_KEY *key)
+ */
+
+#ifdef	OPENSSL_INTERFACE
+#define	aes_encrypt_intel	intel_AES_encrypt
+#define	aes_decrypt_intel	intel_AES_decrypt
+
+#define	INP		rdi	/* P1, 64 bits */
+#define	OUTP		rsi	/* P2, 64 bits */
+#define	KEYP		rdx	/* P3, 64 bits */
+
+/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
+#define	NROUNDS32	ecx	/* temporary, 32 bits */
+#define	NROUNDS		cl	/* temporary,  8 bits */
+
+#else	/* OpenSolaris Interface */
+#define	KEYP		rdi	/* P1, 64 bits */
+#define	NROUNDS		esi	/* P2, 32 bits */
+#define	INP		rdx	/* P3, 64 bits */
+#define	OUTP		rcx	/* P4, 64 bits */
+#endif	/* OPENSSL_INTERFACE */
+
+#define	STATE		xmm0	/* temporary, 128 bits */
+#define	KEY		xmm1	/* temporary, 128 bits */
+
+ENTRY_NP(aes_encrypt_intel)
+	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
+
+	movups	(%INP), %STATE			// input
+	movups	(%KEYP), %KEY			// key
+#ifdef	OPENSSL_INTERFACE
+	mov	240(%KEYP), %NROUNDS32		// round count
+#else	/* OpenSolaris Interface */
+	/* Round count is already present as P2 in %rsi/%esi */
+#endif	/* OPENSSL_INTERFACE */
+
+	pxor	%KEY, %STATE			// round 0
+	lea	0x30(%KEYP), %KEYP
+	cmp	$12, %NROUNDS
+	jb	.Lenc128
+	lea	0x20(%KEYP), %KEYP
+	je	.Lenc192
+
+	// AES 256
+	lea	0x20(%KEYP), %KEYP
+	movups	-0x60(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	-0x50(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+
+.align 4
+.Lenc192:
+	// AES 192 and 256
+	movups	-0x40(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	-0x30(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+
+.align 4
+.Lenc128:
+	// AES 128, 192, and 256
+	movups	-0x20(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	-0x10(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x10(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x20(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x30(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x40(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x50(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x60(%KEYP), %KEY
+	aesenc	%KEY, %STATE
+	movups	0x70(%KEYP), %KEY
+	aesenclast	 %KEY, %STATE		// last round
+	movups	%STATE, (%OUTP)			// output
+
+	SET_TS_OR_POP_XMM0_XMM1(%r10)
+	ret
+	SET_SIZE(aes_encrypt_intel)
+
+
+/*
+ * aes_decrypt_intel()
+ * Decrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called.  This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
+ * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0	State
+ * %xmm1	Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ *	const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ *	const AES_KEY *key);
+ */
+ENTRY_NP(aes_decrypt_intel)
+	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
+
+	movups	(%INP), %STATE			// input
+	movups	(%KEYP), %KEY			// key
+#ifdef	OPENSSL_INTERFACE
+	mov	240(%KEYP), %NROUNDS32		// round count
+#else	/* OpenSolaris Interface */
+	/* Round count is already present as P2 in %rsi/%esi */
+#endif	/* OPENSSL_INTERFACE */
+
+	pxor	%KEY, %STATE			// round 0
+	lea	0x30(%KEYP), %KEYP
+	cmp	$12, %NROUNDS
+	jb	.Ldec128
+	lea	0x20(%KEYP), %KEYP
+	je	.Ldec192
+
+	// AES 256
+	lea	0x20(%KEYP), %KEYP
+	movups	-0x60(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	-0x50(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+
+.align 4
+.Ldec192:
+	// AES 192 and 256
+	movups	-0x40(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	-0x30(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+
+.align 4
+.Ldec128:
+	// AES 128, 192, and 256
+	movups	-0x20(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	-0x10(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x10(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x20(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x30(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x40(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x50(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x60(%KEYP), %KEY
+	aesdec	%KEY, %STATE
+	movups	0x70(%KEYP), %KEY
+	aesdeclast	%KEY, %STATE		// last round
+	movups	%STATE, (%OUTP)			// output
+
+	SET_TS_OR_POP_XMM0_XMM1(%r10)
+	ret
+	SET_SIZE(aes_decrypt_intel)
+
+#endif	/* lint || __lint */
diff --git a/module/icp/asm-x86_64/os/macos/aes/aes_amd64.S b/module/icp/asm-x86_64/os/macos/aes/aes_amd64.S
new file mode 100644
index 0000000000..cdd9a861be
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/aes/aes_amd64.S
@@ -0,0 +1,900 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ *  1. source code distributions include the above copyright notice, this
+ *     list of conditions and the following disclaimer;
+ *
+ *  2. binary distributions include the above copyright notice, this list
+ *     of conditions and the following disclaimer in their documentation;
+ *
+ *  3. the name of the copyright holder is not used to endorse products
+ *     built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue 20/12/2007
+ *
+ * I am grateful to Dag Arne Osvik for many discussions of the techniques that
+ * can be used to optimise AES assembler code on AMD64/EM64T architectures.
+ * Some of the techniques used in this implementation are the result of
+ * suggestions made by him for which I am most grateful.
+ *
+ * An AES implementation for AMD64 processors using the YASM assembler.  This
+ * implementation provides only encryption, decryption and hence requires key
+ * scheduling support in C. It uses 8k bytes of tables but its encryption and
+ * decryption performance is very close to that obtained using large tables.
+ * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
+ * which are as follows:
+ *               ms windows  gnu/linux/opensolaris os
+ *
+ *   in_blk          rcx     rdi
+ *   out_blk         rdx     rsi
+ *   context (cx)     r8     rdx
+ *
+ *   preserved       rsi      -    + rbx, rbp, rsp, r12, r13, r14 & r15
+ *   registers       rdi      -      on both
+ *
+ *   destroyed        -      rsi   + rax, rcx, rdx, r8, r9, r10 & r11
+ *   registers        -      rdi     on both
+ *
+ * The convention used here is that for gnu/linux/opensolaris os.
+ *
+ * This code provides the standard AES block size (128 bits, 16 bytes) and the
+ * three standard AES key sizes (128, 192 and 256 bits). It has the same call
+ * interface as my C implementation.  It uses the Microsoft C AMD64 calling
+ * conventions in which the three parameters are placed in  rcx, rdx and r8
+ * respectively.  The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
+ *
+ * OpenSolaris Note:
+ * Modified to use GNU/Linux/Solaris calling conventions.
+ * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
+ *
+ *     AES_RETURN aes_encrypt(const unsigned char in_blk[],
+ *                   unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
+ *
+ *     AES_RETURN aes_decrypt(const unsigned char in_blk[],
+ *                   unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
+ *
+ *     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+ *                                            const aes_encrypt_ctx cx[1])/
+ *
+ *     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+ *                                            const aes_decrypt_ctx cx[1])/
+ *
+ *     AES_RETURN aes_encrypt_key(const unsigned char key[],
+ *                           unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ *     AES_RETURN aes_decrypt_key(const unsigned char key[],
+ *                           unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ * where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
+ * either bits or bytes.
+ *
+ * Comment in/out the following lines to obtain the desired subroutines. These
+ * selections MUST match those in the C header file aesopt.h
+ */
+#define	AES_REV_DKS	  /* define if key decryption schedule is reversed */
+
+#define	LAST_ROUND_TABLES /* define for the faster version using extra tables */
+
+/*
+ * The encryption key schedule has the following in memory layout where N is the
+ * number of rounds (10, 12 or 14):
+ *
+ * lo: | input key (round 0)  |  / each round is four 32-bit words
+ *     | encryption round 1   |
+ *     | encryption round 2   |
+ *     ....
+ *     | encryption round N-1 |
+ * hi: | encryption round N   |
+ *
+ * The decryption key schedule is normally set up so that it has the same
+ * layout as above by actually reversing the order of the encryption key
+ * schedule in memory (this happens when AES_REV_DKS is set):
+ *
+ * lo: | decryption round 0   | =              | encryption round N   |
+ *     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
+ *     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
+ *     ....                       ....
+ *     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
+ * hi: | decryption round N   | =              | input key (round 0)  |
+ *
+ * with rounds except the first and last modified using inv_mix_column()
+ * But if AES_REV_DKS is NOT set the order of keys is left as it is for
+ * encryption so that it has to be accessed in reverse when used for
+ * decryption (although the inverse mix column modifications are done)
+ *
+ * lo: | decryption round 0   | =              | input key (round 0)  |
+ *     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
+ *     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
+ *     ....                       ....
+ *     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+ * hi: | decryption round N   | =              | encryption round N   |
+ *
+ * This layout is faster when the assembler key scheduling provided here
+ * is used.
+ *
+ * End of user defines
+ */
+
+/*
+ * ---------------------------------------------------------------------------
+ * OpenSolaris OS modifications
+ *
+ * This source originates from Brian Gladman file aes_amd64.asm
+ * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip
+ * with these changes:
+ *
+ * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
+ * !__GNUC__ ifdefs.  Also removed ENCRYPTION, DECRYPTION,
+ * AES_128, AES_192, AES_256, AES_VAR ifdefs.
+ *
+ * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
+ *
+ * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
+ *
+ * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
+ * (operands reversed, literals prefixed with "$", registers prefixed with "%",
+ * and "[register+offset]", addressing changed to "offset(register)",
+ * parenthesis in constant expressions "()" changed to square brackets "[]",
+ * "." removed from  local (numeric) labels, and other changes.
+ * Examples:
+ * Intel/yasm/nasm Syntax	ATT/OpenSolaris Syntax
+ * mov	rax,(4*20h)		mov	$[4*0x20],%rax
+ * mov	rax,[ebx+20h]		mov	0x20(%ebx),%rax
+ * lea	rax,[ebx+ecx]		lea	(%ebx,%ecx),%rax
+ * sub	rax,[ebx+ecx*4-20h]	sub	-0x20(%ebx,%ecx,4),%rax
+ *
+ * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 6. Renamed functions and reordered parameters to match OpenSolaris:
+ * Original Gladman interface:
+ *	int aes_encrypt(const unsigned char *in,
+ *		unsigned char *out, const aes_encrypt_ctx cx[1])/
+ *	int aes_decrypt(const unsigned char *in,
+ *		unsigned char *out, const aes_encrypt_ctx cx[1])/
+ * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
+ * and a union type, inf., containing inf.l, a uint32_t and
+ * inf.b, a 4-element array of uint32_t.  Only b[0] in the array (aka "l") is
+ * used and contains the key schedule length * 16 where key schedule length is
+ * 10, 12, or 14 bytes.
+ *
+ * OpenSolaris OS interface:
+ *	void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ *		const uint32_t pt[4], uint32_t ct[4])/
+ *	void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ *		const uint32_t pt[4], uint32_t ct[4])/
+ *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
+ *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ */
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+/* ARGSUSED */
+void
+aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
+	uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
+	uint32_t pt[4]) {
+}
+
+
+#else
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#define	KS_LENGTH	60
+
+#define	raxd		eax
+#define	rdxd		edx
+#define	rcxd		ecx
+#define	rbxd		ebx
+#define	rsid		esi
+#define	rdid		edi
+
+#define	raxb		al
+#define	rdxb		dl
+#define	rcxb		cl
+#define	rbxb		bl
+#define	rsib		sil
+#define	rdib		dil
+
+// finite field multiplies by {02}, {04} and {08}
+
+#define	f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
+#define	f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
+#define	f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
+
+// finite field multiplies required in table generation
+
+#define	f3(x) ((f2(x)) ^ (x))
+#define	f9(x) ((f8(x)) ^ (x))
+#define	fb(x) ((f8(x)) ^ (f2(x)) ^ (x))
+#define	fd(x) ((f8(x)) ^ (f4(x)) ^ (x))
+#define	fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x)))
+
+// macros for expanding S-box data
+
+#define	u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x))
+#define	v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x)
+#define	w8(x) (x), 0, 0, 0, (x), 0, 0, 0
+
+#define	enc_vals(x)	\
+   .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
+   .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \
+   .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \
+   .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \
+   .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \
+   .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \
+   .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \
+   .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \
+   .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \
+   .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \
+   .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \
+   .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \
+   .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \
+   .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \
+   .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \
+   .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \
+   .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \
+   .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \
+   .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \
+   .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \
+   .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \
+   .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \
+   .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \
+   .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \
+   .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \
+   .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \
+   .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \
+   .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \
+   .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \
+   .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \
+   .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \
+   .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16)
+
+#define	dec_vals(x) \
+   .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \
+   .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \
+   .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \
+   .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \
+   .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \
+   .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \
+   .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \
+   .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \
+   .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \
+   .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \
+   .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \
+   .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \
+   .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \
+   .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \
+   .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \
+   .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \
+   .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \
+   .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \
+   .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \
+   .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \
+   .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \
+   .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \
+   .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \
+   .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \
+   .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \
+   .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \
+   .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \
+   .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \
+   .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \
+   .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \
+   .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \
+   .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d)
+
+#define	tptr	%rbp	/* table pointer */
+#define	kptr	%r8	/* key schedule pointer */
+#define	fofs	128	/* adjust offset in key schedule to keep |disp| < 128 */
+#define	fk_ref(x, y)	-16*x+fofs+4*y(kptr)
+
+#ifdef	AES_REV_DKS
+#define	rofs		128
+#define	ik_ref(x, y)	-16*x+rofs+4*y(kptr)
+
+#else
+#define	rofs		-128
+#define	ik_ref(x, y)	16*x+rofs+4*y(kptr)
+#endif	/* AES_REV_DKS */
+
+#define	tab_0(x)	(tptr,x,8)
+#define	tab_1(x)	3(tptr,x,8)
+#define	tab_2(x)	2(tptr,x,8)
+#define	tab_3(x)	1(tptr,x,8)
+#define	tab_f(x)	1(tptr,x,8)
+#define	tab_i(x)	7(tptr,x,8)
+
+#define	ff_rnd(p1, p2, p3, p4, round)	/* normal forward round */ \
+	mov	fk_ref(round,0), p1; \
+	mov	fk_ref(round,1), p2; \
+	mov	fk_ref(round,2), p3; \
+	mov	fk_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	shr	$16, %eax; \
+	xor	tab_0(%rsi), p1; \
+	xor	tab_1(%rdi), p4; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	xor	tab_2(%rsi), p3; \
+	xor	tab_3(%rdi), p2; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	shr	$16, %ebx; \
+	xor	tab_0(%rsi), p2; \
+	xor	tab_1(%rdi), p1; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	xor	tab_2(%rsi), p4; \
+	xor	tab_3(%rdi), p3; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	shr	$16, %ecx; \
+	xor	tab_0(%rsi), p3; \
+	xor	tab_1(%rdi), p2; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	xor	tab_2(%rsi), p1; \
+	xor	tab_3(%rdi), p4; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	shr	$16, %edx; \
+	xor	tab_0(%rsi), p4; \
+	xor	tab_1(%rdi), p3; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	xor	tab_2(%rsi), p2; \
+	xor	tab_3(%rdi), p1; \
+ \
+	mov	p1, %eax; \
+	mov	p2, %ebx; \
+	mov	p3, %ecx; \
+	mov	p4, %edx
+
+#ifdef	LAST_ROUND_TABLES
+
+#define	fl_rnd(p1, p2, p3, p4, round)	/* last forward round */ \
+	add	$2048, tptr; \
+	mov	fk_ref(round,0), p1; \
+	mov	fk_ref(round,1), p2; \
+	mov	fk_ref(round,2), p3; \
+	mov	fk_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	shr	$16, %eax; \
+	xor	tab_0(%rsi), p1; \
+	xor	tab_1(%rdi), p4; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	xor	tab_2(%rsi), p3; \
+	xor	tab_3(%rdi), p2; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	shr	$16, %ebx; \
+	xor	tab_0(%rsi), p2; \
+	xor	tab_1(%rdi), p1; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	xor	tab_2(%rsi), p4; \
+	xor	tab_3(%rdi), p3; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	shr	$16, %ecx; \
+	xor	tab_0(%rsi), p3; \
+	xor	tab_1(%rdi), p2; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	xor	tab_2(%rsi), p1; \
+	xor	tab_3(%rdi), p4; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	shr	$16, %edx; \
+	xor	tab_0(%rsi), p4; \
+	xor	tab_1(%rdi), p3; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	xor	tab_2(%rsi), p2; \
+	xor	tab_3(%rdi), p1
+
+#else
+
+#define	fl_rnd(p1, p2, p3, p4, round)	/* last forward round */ \
+	mov	fk_ref(round,0), p1; \
+	mov	fk_ref(round,1), p2; \
+	mov	fk_ref(round,2), p3; \
+	mov	fk_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	shr	$16, %eax; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	xor	%esi, p1; \
+	rol	$8, %edi; \
+	xor	%edi, p4; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p3; \
+	xor	%edi, p2; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	shr	$16, %ebx; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	xor	%esi, p2; \
+	rol	$8, %edi; \
+	xor	%edi, p1; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p4; \
+	xor	%edi, p3; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	shr	$16, %ecx; \
+	xor	%esi, p3; \
+	rol	$8, %edi; \
+	xor	%edi, p2; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p1; \
+	xor	%edi, p4; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	shr	$16, %edx; \
+	xor	%esi, p4; \
+	rol	$8, %edi; \
+	xor	%edi, p3; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	movzx	tab_f(%rsi), %esi; \
+	movzx	tab_f(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p2; \
+	xor	%edi, p1
+
+#endif	/* LAST_ROUND_TABLES */
+
+#define	ii_rnd(p1, p2, p3, p4, round)	/* normal inverse round */ \
+	mov	ik_ref(round,0), p1; \
+	mov	ik_ref(round,1), p2; \
+	mov	ik_ref(round,2), p3; \
+	mov	ik_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	shr	$16, %eax; \
+	xor	tab_0(%rsi), p1; \
+	xor	tab_1(%rdi), p2; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	xor	tab_2(%rsi), p3; \
+	xor	tab_3(%rdi), p4; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	shr	$16, %ebx; \
+	xor	tab_0(%rsi), p2; \
+	xor	tab_1(%rdi), p3; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	xor	tab_2(%rsi), p4; \
+	xor	tab_3(%rdi), p1; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	shr	$16, %ecx; \
+	xor	tab_0(%rsi), p3; \
+	xor	tab_1(%rdi), p4; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	xor	tab_2(%rsi), p1; \
+	xor	tab_3(%rdi), p2; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	shr	$16, %edx; \
+	xor	tab_0(%rsi), p4; \
+	xor	tab_1(%rdi), p1; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	xor	tab_2(%rsi), p2; \
+	xor	tab_3(%rdi), p3; \
+ \
+	mov	p1, %eax; \
+	mov	p2, %ebx; \
+	mov	p3, %ecx; \
+	mov	p4, %edx
+
+#ifdef	LAST_ROUND_TABLES
+
+#define	il_rnd(p1, p2, p3, p4, round)	/* last inverse round */ \
+	add	$2048, tptr; \
+	mov	ik_ref(round,0), p1; \
+	mov	ik_ref(round,1), p2; \
+	mov	ik_ref(round,2), p3; \
+	mov	ik_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	shr	$16, %eax; \
+	xor	tab_0(%rsi), p1; \
+	xor	tab_1(%rdi), p2; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	xor	tab_2(%rsi), p3; \
+	xor	tab_3(%rdi), p4; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	shr	$16, %ebx; \
+	xor	tab_0(%rsi), p2; \
+	xor	tab_1(%rdi), p3; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	xor	tab_2(%rsi), p4; \
+	xor	tab_3(%rdi), p1; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	shr	$16, %ecx; \
+	xor	tab_0(%rsi), p3; \
+	xor	tab_1(%rdi), p4; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	xor	tab_2(%rsi), p1; \
+	xor	tab_3(%rdi), p2; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	shr	$16, %edx; \
+	xor	tab_0(%rsi), p4; \
+	xor	tab_1(%rdi), p1; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	xor	tab_2(%rsi), p2; \
+	xor	tab_3(%rdi), p3
+
+#else
+
+#define	il_rnd(p1, p2, p3, p4, round)	/* last inverse round */ \
+	mov	ik_ref(round,0), p1; \
+	mov	ik_ref(round,1), p2; \
+	mov	ik_ref(round,2), p3; \
+	mov	ik_ref(round,3), p4; \
+ \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	shr	$16, %eax; \
+	xor	%esi, p1; \
+	rol	$8, %edi; \
+	xor	%edi, p2; \
+	movzx	%al, %esi; \
+	movzx	%ah, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p3; \
+	xor	%edi, p4; \
+ \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	shr	$16, %ebx; \
+	xor	%esi, p2; \
+	rol	$8, %edi; \
+	xor	%edi, p3; \
+	movzx	%bl, %esi; \
+	movzx	%bh, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p4; \
+	xor	%edi, p1; \
+ \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	shr	$16, %ecx; \
+	xor	%esi, p3; \
+	rol	$8, %edi; \
+	xor	%edi, p4; \
+	movzx	%cl, %esi; \
+	movzx	%ch, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p1; \
+	xor	%edi, p2; \
+ \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	shr	$16, %edx; \
+	xor	%esi, p4; \
+	rol	$8, %edi; \
+	xor	%edi, p1; \
+	movzx	%dl, %esi; \
+	movzx	%dh, %edi; \
+	movzx	tab_i(%rsi), %esi; \
+	movzx	tab_i(%rdi), %edi; \
+	rol	$16, %esi; \
+	rol	$24, %edi; \
+	xor	%esi, p2; \
+	xor	%edi, p3
+
+#endif	/* LAST_ROUND_TABLES */
+
+/*
+ * OpenSolaris OS:
+ * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ *	const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_encrypt(const unsigned char *in,
+ *	unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+	.align	6, 0x90
+enc_tab:
+	enc_vals(u8)
+#ifdef	LAST_ROUND_TABLES
+	// Last Round Tables:
+	enc_vals(w8)
+#endif
+
+
+	ENTRY_NP(aes_encrypt_amd64)
+#ifdef	GLADMAN_INTERFACE
+	// Original interface
+	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
+	mov	%rsi, (%rsp)	// output pointer (P2)
+	mov	%rdx, %r8	// context (P3)
+
+	mov	%rbx, 1*8(%rsp)	// P1: input pointer in rdi
+	mov	%rbp, 2*8(%rsp)	// P2: output pointer in (rsp)
+	mov	%r12, 3*8(%rsp)	// P3: context in r8
+	movzx	4*KS_LENGTH(kptr), %esi	// Get byte key length * 16
+
+#else
+	// OpenSolaris OS interface
+	sub	$(4*8), %rsp	// Make room on stack to save registers
+	mov	%rcx, (%rsp)	// Save output pointer (P4) on stack
+	mov	%rdi, %r8	// context (P1)
+	mov	%rdx, %rdi	// P3: save input pointer
+	shl	$4, %esi	// P2: esi byte key length * 16
+
+	mov	%rbx, 1*8(%rsp)	// Save registers
+	mov	%rbp, 2*8(%rsp)
+	mov	%r12, 3*8(%rsp)
+	// P1: context in r8
+	// P2: byte key length * 16 in esi
+	// P3: input pointer in rdi
+	// P4: output pointer in (rsp)
+#endif	/* GLADMAN_INTERFACE */
+
+	lea	enc_tab(%rip), tptr
+	sub	$fofs, kptr
+
+	// Load input block into registers
+	mov	(%rdi), %eax
+	mov	1*4(%rdi), %ebx
+	mov	2*4(%rdi), %ecx
+	mov	3*4(%rdi), %edx
+
+	xor	fofs(kptr), %eax
+	xor	fofs+4(kptr), %ebx
+	xor	fofs+8(kptr), %ecx
+	xor	fofs+12(kptr), %edx
+
+	lea	(kptr,%rsi), kptr
+	// Jump based on byte key length * 16:
+	cmp	$(10*16), %esi
+	je	3f
+	cmp	$(12*16), %esi
+	je	2f
+	cmp	$(14*16), %esi
+	je	1f
+	mov	$-1, %rax	// error
+	jmp	4f
+
+	// Perform normal forward rounds
+1:	ff_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2:	ff_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3:	ff_rnd(%r9d, %r10d, %r11d, %r12d,  9)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  8)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  7)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  6)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  5)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  4)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  3)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  2)
+	ff_rnd(%r9d, %r10d, %r11d, %r12d,  1)
+	fl_rnd(%r9d, %r10d, %r11d, %r12d,  0)
+
+	// Copy results
+	mov	(%rsp), %rbx
+	mov	%r9d, (%rbx)
+	mov	%r10d, 4(%rbx)
+	mov	%r11d, 8(%rbx)
+	mov	%r12d, 12(%rbx)
+	xor	%rax, %rax
+4:	// Restore registers
+	mov	1*8(%rsp), %rbx
+	mov	2*8(%rsp), %rbp
+	mov	3*8(%rsp), %r12
+	add	$(4*8), %rsp
+	ret
+
+	SET_SIZE(aes_encrypt_amd64)
+
+/*
+ * OpenSolaris OS:
+ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ *	const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_decrypt(const unsigned char *in,
+ *	unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+	.align	6, 0x90
+dec_tab:
+	dec_vals(v8)
+#ifdef	LAST_ROUND_TABLES
+	// Last Round Tables:
+	dec_vals(w8)
+#endif
+
+
+	ENTRY_NP(aes_decrypt_amd64)
+#ifdef	GLADMAN_INTERFACE
+	// Original interface
+	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
+	mov	%rsi, (%rsp)	// output pointer (P2)
+	mov	%rdx, %r8	// context (P3)
+
+	mov	%rbx, 1*8(%rsp)	// P1: input pointer in rdi
+	mov	%rbp, 2*8(%rsp)	// P2: output pointer in (rsp)
+	mov	%r12, 3*8(%rsp)	// P3: context in r8
+	movzx	4*KS_LENGTH(kptr), %esi	// Get byte key length * 16
+
+#else
+	// OpenSolaris OS interface
+	sub	$(4*8), %rsp	// Make room on stack to save registers
+	mov	%rcx, (%rsp)	// Save output pointer (P4) on stack
+	mov	%rdi, %r8	// context (P1)
+	mov	%rdx, %rdi	// P3: save input pointer
+	shl	$4, %esi	// P2: esi byte key length * 16
+
+	mov	%rbx, 1*8(%rsp)	// Save registers
+	mov	%rbp, 2*8(%rsp)
+	mov	%r12, 3*8(%rsp)
+	// P1: context in r8
+	// P2: byte key length * 16 in esi
+	// P3: input pointer in rdi
+	// P4: output pointer in (rsp)
+#endif	/* GLADMAN_INTERFACE */
+
+	lea	dec_tab(%rip), tptr
+	sub	$rofs, kptr
+
+	// Load input block into registers
+	mov	(%rdi), %eax
+	mov	1*4(%rdi), %ebx
+	mov	2*4(%rdi), %ecx
+	mov	3*4(%rdi), %edx
+
+#ifdef AES_REV_DKS
+	mov	kptr, %rdi
+	lea	(kptr,%rsi), kptr
+#else
+	lea	(kptr,%rsi), %rdi
+#endif
+
+	xor	rofs(%rdi), %eax
+	xor	rofs+4(%rdi), %ebx
+	xor	rofs+8(%rdi), %ecx
+	xor	rofs+12(%rdi), %edx
+
+	// Jump based on byte key length * 16:
+	cmp	$(10*16), %esi
+	je	3f
+	cmp	$(12*16), %esi
+	je	2f
+	cmp	$(14*16), %esi
+	je	1f
+	mov	$-1, %rax	// error
+	jmp	4f
+
+	// Perform normal inverse rounds
+1:	ii_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2:	ii_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3:	ii_rnd(%r9d, %r10d, %r11d, %r12d,  9)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  8)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  7)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  6)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  5)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  4)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  3)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  2)
+	ii_rnd(%r9d, %r10d, %r11d, %r12d,  1)
+	il_rnd(%r9d, %r10d, %r11d, %r12d,  0)
+
+	// Copy results
+	mov	(%rsp), %rbx
+	mov	%r9d, (%rbx)
+	mov	%r10d, 4(%rbx)
+	mov	%r11d, 8(%rbx)
+	mov	%r12d, 12(%rbx)
+	xor	%rax, %rax
+4:	// Restore registers
+	mov	1*8(%rsp), %rbx
+	mov	2*8(%rsp), %rbp
+	mov	3*8(%rsp), %r12
+	add	$(4*8), %rsp
+	ret
+
+	SET_SIZE(aes_decrypt_amd64)
+#endif	/* lint || __lint */
diff --git a/module/icp/asm-x86_64/os/macos/modes/gcm_pclmulqdq.S b/module/icp/asm-x86_64/os/macos/modes/gcm_pclmulqdq.S
new file mode 100644
index 0000000000..20f4d14c78
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/modes/gcm_pclmulqdq.S
@@ -0,0 +1,334 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009 Intel Corporation
+ * All Rights Reserved.
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions.  This file contains an accelerated
+ * Galois Field Multiplication implementation.
+ *
+ * PCLMULQDQ is used to accelerate the most time-consuming part of GHASH,
+ * carry-less multiplication. More information about PCLMULQDQ can be
+ * found at:
+ * http://software.intel.com/en-us/articles/
+ * carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
+ *
+ */
+
+/*
+ * ====================================================================
+ * OpenSolaris OS modifications
+ *
+ * This source originates as file galois_hash_asm.c from
+ * Intel Corporation dated September 21, 2009.
+ *
+ * This OpenSolaris version has these major changes from the original source:
+ *
+ * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and a dummy C function
+ * definition for lint.
+ *
+ * 2. Formatted code, added comments, and added #includes and #defines.
+ *
+ * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
+ * calling kpreempt_disable() and kpreempt_enable().
+ * If the TS bit is not set, Save and restore %xmm registers at the beginning
+ * and end of function calls (%xmm* registers are not saved and restored by
+ * during kernel thread preemption).
+ *
+ * 4. Removed code to perform hashing.  This is already done with C macro
+ * GHASH in gcm.c.  For better performance, this removed code should be
+ * reintegrated in the future to replace the C GHASH macro.
+ *
+ * 5. Added code to byte swap 16-byte input and output.
+ *
+ * 6. Folded in comments from the original C source with embedded assembly
+ * (SB_w_shift_xor.c)
+ *
+ * 7. Renamed function and reordered parameters to match OpenSolaris:
+ * Intel interface:
+ *	void galois_hash_asm(unsigned char *hk, unsigned char *s,
+ *		unsigned char *d, int length)
+ * OpenSolaris OS interface:
+ *	void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
+ * ====================================================================
+ */
+
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+
+/* ARGSUSED */
+void
+gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
+}
+
+#else	/* lint */
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#if defined(_KERNEL) && !defined(__APPLE__)
+	/*
+	 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
+	 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
+	 * uses it to pass P2 to syscall.
+	 * This also occurs with the STTS macro, but we dont care if
+	 * P2 (%rsi) is modified just before function exit.
+	 * The CLTS and STTS macros push and pop P1 (%rdi) already.
+	 */
+#ifdef __xpv
+#define	PROTECTED_CLTS \
+	push	%rsi; \
+	CLTS; \
+	pop	%rsi
+#else
+#define	PROTECTED_CLTS \
+	CLTS
+#endif	/* __xpv */
+
+	/*
+	 * If CR0_TS is not set, align stack (with push %rbp) and push
+	 * %xmm0 - %xmm10 on stack, otherwise clear CR0_TS
+	 */
+#define	CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg) \
+	push	%rbp; \
+	mov	%rsp, %rbp; \
+	movq	%cr0, tmpreg; \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	and	$-XMM_ALIGN, %rsp; \
+	sub	$(XMM_SIZE * 11), %rsp; \
+	movaps	%xmm0, 160(%rsp); \
+	movaps	%xmm1, 144(%rsp); \
+	movaps	%xmm2, 128(%rsp); \
+	movaps	%xmm3, 112(%rsp); \
+	movaps	%xmm4, 96(%rsp); \
+	movaps	%xmm5, 80(%rsp); \
+	movaps	%xmm6, 64(%rsp); \
+	movaps	%xmm7, 48(%rsp); \
+	movaps	%xmm8, 32(%rsp); \
+	movaps	%xmm9, 16(%rsp); \
+	movaps	%xmm10, (%rsp); \
+	jmp	2f; \
+1: \
+	PROTECTED_CLTS; \
+2:
+
+
+	/*
+	 * If CR0_TS was not set above, pop %xmm0 - %xmm10 off stack,
+	 * otherwise set CR0_TS.
+	 */
+#define	SET_TS_OR_POP_XMM_REGISTERS(tmpreg) \
+	testq	$CR0_TS, tmpreg; \
+	jnz	1f; \
+	movaps	(%rsp), %xmm10; \
+	movaps	16(%rsp), %xmm9; \
+	movaps	32(%rsp), %xmm8; \
+	movaps	48(%rsp), %xmm7; \
+	movaps	64(%rsp), %xmm6; \
+	movaps	80(%rsp), %xmm5; \
+	movaps	96(%rsp), %xmm4; \
+	movaps	112(%rsp), %xmm3; \
+	movaps	128(%rsp), %xmm2; \
+	movaps	144(%rsp), %xmm1; \
+	movaps	160(%rsp), %xmm0; \
+	jmp	2f; \
+1: \
+	STTS(tmpreg); \
+2: \
+	mov	%rbp, %rsp; \
+	pop	%rbp
+
+
+#else
+#define	PROTECTED_CLTS
+#define	CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg)
+#define	SET_TS_OR_POP_XMM_REGISTERS(tmpreg)
+#endif	/* _KERNEL */
+
+/*
+ * Use this mask to byte-swap a 16-byte integer with the pshufb instruction
+ */
+
+// static uint8_t byte_swap16_mask[] = {
+//	 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
+.text
+.align XMM_ALIGN_LOG
+.Lbyte_swap16_mask:
+	.byte	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+
+
+/*
+ * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
+ *
+ * Perform a carry-less multiplication (that is, use XOR instead of the
+ * multiply operator) on P1 and P2 and place the result in P3.
+ *
+ * Byte swap the input and the output.
+ *
+ * Note: x_in, y, and res all point to a block of 20-byte numbers
+ * (an array of two 64-bit integers).
+ *
+ * Note2: For kernel code, caller is responsible for ensuring
+ * kpreempt_disable() has been called.  This is because %xmm registers are
+ * not saved/restored.  Clear and set the CR0.TS bit on entry and exit,
+ * respectively, if TS is set on entry.  Otherwise, if TS is not set,
+ * save and restore %xmm registers on the stack.
+ *
+ * Note3: Original Intel definition:
+ * void galois_hash_asm(unsigned char *hk, unsigned char *s,
+ *	unsigned char *d, int length)
+ *
+ * Note4: Register/parameter mapping:
+ * Intel:
+ *	Parameter 1: %rcx (copied to %xmm0)	hk or x_in
+ *	Parameter 2: %rdx (copied to %xmm1)	s or y
+ *	Parameter 3: %rdi (result)		d or res
+ * OpenSolaris:
+ *	Parameter 1: %rdi (copied to %xmm0)	x_in
+ *	Parameter 2: %rsi (copied to %xmm1)	y
+ *	Parameter 3: %rdx (result)		res
+ */
+
+ENTRY_NP(gcm_mul_pclmulqdq)
+	CLEAR_TS_OR_PUSH_XMM_REGISTERS(%r10)
+
+	//
+	// Copy Parameters
+	//
+	movdqu	(%rdi), %xmm0	// P1
+	movdqu	(%rsi), %xmm1	// P2
+
+	//
+	// Byte swap 16-byte input
+	//
+	lea	.Lbyte_swap16_mask(%rip), %rax
+	movups	(%rax), %xmm10
+	pshufb	%xmm10, %xmm0
+	pshufb	%xmm10, %xmm1
+
+
+	//
+	// Multiply with the hash key
+	//
+	movdqu	%xmm0, %xmm3
+	pclmulqdq $0, %xmm1, %xmm3	// xmm3 holds a0*b0
+
+	movdqu	%xmm0, %xmm4
+	pclmulqdq $16, %xmm1, %xmm4	// xmm4 holds a0*b1
+
+	movdqu	%xmm0, %xmm5
+	pclmulqdq $1, %xmm1, %xmm5	// xmm5 holds a1*b0
+	movdqu	%xmm0, %xmm6
+	pclmulqdq $17, %xmm1, %xmm6	// xmm6 holds a1*b1
+
+	pxor	%xmm5, %xmm4	// xmm4 holds a0*b1 + a1*b0
+
+	movdqu	%xmm4, %xmm5	// move the contents of xmm4 to xmm5
+	psrldq	$8, %xmm4	// shift by xmm4 64 bits to the right
+	pslldq	$8, %xmm5	// shift by xmm5 64 bits to the left
+	pxor	%xmm5, %xmm3
+	pxor	%xmm4, %xmm6	// Register pair <xmm6:xmm3> holds the result
+				// of the carry-less multiplication of
+				// xmm0 by xmm1.
+
+	// We shift the result of the multiplication by one bit position
+	// to the left to cope for the fact that the bits are reversed.
+	movdqu	%xmm3, %xmm7
+	movdqu	%xmm6, %xmm8
+	pslld	$1, %xmm3
+	pslld	$1, %xmm6
+	psrld	$31, %xmm7
+	psrld	$31, %xmm8
+	movdqu	%xmm7, %xmm9
+	pslldq	$4, %xmm8
+	pslldq	$4, %xmm7
+	psrldq	$12, %xmm9
+	por	%xmm7, %xmm3
+	por	%xmm8, %xmm6
+	por	%xmm9, %xmm6
+
+	//
+	// First phase of the reduction
+	//
+	// Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+	// independently.
+	movdqu	%xmm3, %xmm7
+	movdqu	%xmm3, %xmm8
+	movdqu	%xmm3, %xmm9
+	pslld	$31, %xmm7	// packed right shift shifting << 31
+	pslld	$30, %xmm8	// packed right shift shifting << 30
+	pslld	$25, %xmm9	// packed right shift shifting << 25
+	pxor	%xmm8, %xmm7	// xor the shifted versions
+	pxor	%xmm9, %xmm7
+	movdqu	%xmm7, %xmm8
+	pslldq	$12, %xmm7
+	psrldq	$4, %xmm8
+	pxor	%xmm7, %xmm3	// first phase of the reduction complete
+
+	//
+	// Second phase of the reduction
+	//
+	// Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+	// shift operations.
+	movdqu	%xmm3, %xmm2
+	movdqu	%xmm3, %xmm4	// packed left shifting >> 1
+	movdqu	%xmm3, %xmm5
+	psrld	$1, %xmm2
+	psrld	$2, %xmm4	// packed left shifting >> 2
+	psrld	$7, %xmm5	// packed left shifting >> 7
+	pxor	%xmm4, %xmm2	// xor the shifted versions
+	pxor	%xmm5, %xmm2
+	pxor	%xmm8, %xmm2
+	pxor	%xmm2, %xmm3
+	pxor	%xmm3, %xmm6	// the result is in xmm6
+
+	//
+	// Byte swap 16-byte result
+	//
+	pshufb	%xmm10, %xmm6	// %xmm10 has the swap mask
+
+	//
+	// Store the result
+	//
+	movdqu	%xmm6, (%rdx)	// P3
+
+
+	//
+	// Cleanup and Return
+	//
+	SET_TS_OR_POP_XMM_REGISTERS(%r10)
+	ret
+	SET_SIZE(gcm_mul_pclmulqdq)
+
+#endif	/* lint || __lint */
diff --git a/module/icp/asm-x86_64/os/macos/sha1/sha1-x86_64.S b/module/icp/asm-x86_64/os/macos/sha1/sha1-x86_64.S
new file mode 100644
index 0000000000..cb923784a7
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/sha1/sha1-x86_64.S
@@ -0,0 +1,1353 @@
+/*
+ * !/usr/bin/env perl
+ *
+ *  ====================================================================
+ *  Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ *  project. The module is, however, dual licensed under OpenSSL and
+ *  CRYPTOGAMS licenses depending on where you obtain it. For further
+ *  details see http://www.openssl.org/~appro/cryptogams/.
+ *  ====================================================================
+ *
+ *  sha1_block procedure for x86_64.
+ *
+ *  It was brought to my attention that on EM64T compiler-generated code
+ *  was far behind 32-bit assembler implementation. This is unlike on
+ *  Opteron where compiler-generated code was only 15% behind 32-bit
+ *  assembler, which originally made it hard to motivate the effort.
+ *  There was suggestion to mechanically translate 32-bit code, but I
+ *  dismissed it, reasoning that x86_64 offers enough register bank
+ *  capacity to fully utilize SHA-1 parallelism. Therefore this fresh
+ *  implementation:-) However! While 64-bit code does performs better
+ *  on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
+ *  x86_64 does offer larger *addressable* bank, but out-of-order core
+ *  reaches for even more registers through dynamic aliasing, and EM64T
+ *  core must have managed to run-time optimize even 32-bit code just as
+ *  good as 64-bit one. Performance improvement is summarized in the
+ *  following table:
+ *
+ * 		gcc 3.4		32-bit asm	cycles/byte
+ *  Opteron	+45%		+20%		6.8
+ *  Xeon P4	+65%		+0%		9.9
+ *  Core2		+60%		+10%		7.0
+ *
+ *
+ *  OpenSolaris OS modifications
+ *
+ *  Sun elects to use this software under the BSD license.
+ *
+ *  This source originates from OpenSSL file sha1-x86_64.pl at
+ *  ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ *  (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ *  1. Added perl "use strict" and declared variables.
+ *
+ *  2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ *  /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ *  3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ *  assemblers).
+ *
+ */
+
+/*
+ * This file was generated by a perl script (sha1-x86_64.pl). The comments from
+ * the original file have been pasted above.
+ */
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sys/sha1.h>
+
+
+/* ARGSUSED */
+void
+sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
+{
+}
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+ENTRY_NP(sha1_block_data_order)
+	push	%rbx
+	push	%rbp
+	push	%r12
+	mov	%rsp,%rax
+	mov	%rdi,%r8	# reassigned argument
+	sub	$72,%rsp
+	mov	%rsi,%r9	# reassigned argument
+	and	$-64,%rsp
+	mov	%rdx,%r10	# reassigned argument
+	mov	%rax,64(%rsp)
+
+	mov	0(%r8),%edx
+	mov	4(%r8),%esi
+	mov	8(%r8),%edi
+	mov	12(%r8),%ebp
+	mov	16(%r8),%r11d
+.align	4
+.Lloop:
+	mov	0(%r9),%eax
+	bswap	%eax
+	mov	%eax,0(%rsp)
+	lea	0x5a827999(%eax,%r11d),%r12d
+	mov	%edi,%ebx
+	mov	4(%r9),%eax
+	mov	%edx,%r11d
+	xor	%ebp,%ebx
+	bswap	%eax
+	rol	$5,%r11d
+	and	%esi,%ebx
+	mov	%eax,4(%rsp)
+	add	%r11d,%r12d
+	xor	%ebp,%ebx
+	rol	$30,%esi
+	add	%ebx,%r12d
+	lea	0x5a827999(%eax,%ebp),%r11d
+	mov	%esi,%ebx
+	mov	8(%r9),%eax
+	mov	%r12d,%ebp
+	xor	%edi,%ebx
+	bswap	%eax
+	rol	$5,%ebp
+	and	%edx,%ebx
+	mov	%eax,8(%rsp)
+	add	%ebp,%r11d
+	xor	%edi,%ebx
+	rol	$30,%edx
+	add	%ebx,%r11d
+	lea	0x5a827999(%eax,%edi),%ebp
+	mov	%edx,%ebx
+	mov	12(%r9),%eax
+	mov	%r11d,%edi
+	xor	%esi,%ebx
+	bswap	%eax
+	rol	$5,%edi
+	and	%r12d,%ebx
+	mov	%eax,12(%rsp)
+	add	%edi,%ebp
+	xor	%esi,%ebx
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	lea	0x5a827999(%eax,%esi),%edi
+	mov	%r12d,%ebx
+	mov	16(%r9),%eax
+	mov	%ebp,%esi
+	xor	%edx,%ebx
+	bswap	%eax
+	rol	$5,%esi
+	and	%r11d,%ebx
+	mov	%eax,16(%rsp)
+	add	%esi,%edi
+	xor	%edx,%ebx
+	rol	$30,%r11d
+	add	%ebx,%edi
+	lea	0x5a827999(%eax,%edx),%esi
+	mov	%r11d,%ebx
+	mov	20(%r9),%eax
+	mov	%edi,%edx
+	xor	%r12d,%ebx
+	bswap	%eax
+	rol	$5,%edx
+	and	%ebp,%ebx
+	mov	%eax,20(%rsp)
+	add	%edx,%esi
+	xor	%r12d,%ebx
+	rol	$30,%ebp
+	add	%ebx,%esi
+	lea	0x5a827999(%eax,%r12d),%edx
+	mov	%ebp,%ebx
+	mov	24(%r9),%eax
+	mov	%esi,%r12d
+	xor	%r11d,%ebx
+	bswap	%eax
+	rol	$5,%r12d
+	and	%edi,%ebx
+	mov	%eax,24(%rsp)
+	add	%r12d,%edx
+	xor	%r11d,%ebx
+	rol	$30,%edi
+	add	%ebx,%edx
+	lea	0x5a827999(%eax,%r11d),%r12d
+	mov	%edi,%ebx
+	mov	28(%r9),%eax
+	mov	%edx,%r11d
+	xor	%ebp,%ebx
+	bswap	%eax
+	rol	$5,%r11d
+	and	%esi,%ebx
+	mov	%eax,28(%rsp)
+	add	%r11d,%r12d
+	xor	%ebp,%ebx
+	rol	$30,%esi
+	add	%ebx,%r12d
+	lea	0x5a827999(%eax,%ebp),%r11d
+	mov	%esi,%ebx
+	mov	32(%r9),%eax
+	mov	%r12d,%ebp
+	xor	%edi,%ebx
+	bswap	%eax
+	rol	$5,%ebp
+	and	%edx,%ebx
+	mov	%eax,32(%rsp)
+	add	%ebp,%r11d
+	xor	%edi,%ebx
+	rol	$30,%edx
+	add	%ebx,%r11d
+	lea	0x5a827999(%eax,%edi),%ebp
+	mov	%edx,%ebx
+	mov	36(%r9),%eax
+	mov	%r11d,%edi
+	xor	%esi,%ebx
+	bswap	%eax
+	rol	$5,%edi
+	and	%r12d,%ebx
+	mov	%eax,36(%rsp)
+	add	%edi,%ebp
+	xor	%esi,%ebx
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	lea	0x5a827999(%eax,%esi),%edi
+	mov	%r12d,%ebx
+	mov	40(%r9),%eax
+	mov	%ebp,%esi
+	xor	%edx,%ebx
+	bswap	%eax
+	rol	$5,%esi
+	and	%r11d,%ebx
+	mov	%eax,40(%rsp)
+	add	%esi,%edi
+	xor	%edx,%ebx
+	rol	$30,%r11d
+	add	%ebx,%edi
+	lea	0x5a827999(%eax,%edx),%esi
+	mov	%r11d,%ebx
+	mov	44(%r9),%eax
+	mov	%edi,%edx
+	xor	%r12d,%ebx
+	bswap	%eax
+	rol	$5,%edx
+	and	%ebp,%ebx
+	mov	%eax,44(%rsp)
+	add	%edx,%esi
+	xor	%r12d,%ebx
+	rol	$30,%ebp
+	add	%ebx,%esi
+	lea	0x5a827999(%eax,%r12d),%edx
+	mov	%ebp,%ebx
+	mov	48(%r9),%eax
+	mov	%esi,%r12d
+	xor	%r11d,%ebx
+	bswap	%eax
+	rol	$5,%r12d
+	and	%edi,%ebx
+	mov	%eax,48(%rsp)
+	add	%r12d,%edx
+	xor	%r11d,%ebx
+	rol	$30,%edi
+	add	%ebx,%edx
+	lea	0x5a827999(%eax,%r11d),%r12d
+	mov	%edi,%ebx
+	mov	52(%r9),%eax
+	mov	%edx,%r11d
+	xor	%ebp,%ebx
+	bswap	%eax
+	rol	$5,%r11d
+	and	%esi,%ebx
+	mov	%eax,52(%rsp)
+	add	%r11d,%r12d
+	xor	%ebp,%ebx
+	rol	$30,%esi
+	add	%ebx,%r12d
+	lea	0x5a827999(%eax,%ebp),%r11d
+	mov	%esi,%ebx
+	mov	56(%r9),%eax
+	mov	%r12d,%ebp
+	xor	%edi,%ebx
+	bswap	%eax
+	rol	$5,%ebp
+	and	%edx,%ebx
+	mov	%eax,56(%rsp)
+	add	%ebp,%r11d
+	xor	%edi,%ebx
+	rol	$30,%edx
+	add	%ebx,%r11d
+	lea	0x5a827999(%eax,%edi),%ebp
+	mov	%edx,%ebx
+	mov	60(%r9),%eax
+	mov	%r11d,%edi
+	xor	%esi,%ebx
+	bswap	%eax
+	rol	$5,%edi
+	and	%r12d,%ebx
+	mov	%eax,60(%rsp)
+	add	%edi,%ebp
+	xor	%esi,%ebx
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	lea	0x5a827999(%eax,%esi),%edi
+	mov	0(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	8(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%esi
+	xor	32(%rsp),%eax
+	and	%r11d,%ebx
+	add	%esi,%edi
+	xor	52(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,0(%rsp)
+	lea	0x5a827999(%eax,%edx),%esi
+	mov	4(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	12(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edx
+	xor	36(%rsp),%eax
+	and	%ebp,%ebx
+	add	%edx,%esi
+	xor	56(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,4(%rsp)
+	lea	0x5a827999(%eax,%r12d),%edx
+	mov	8(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	16(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%r12d
+	xor	40(%rsp),%eax
+	and	%edi,%ebx
+	add	%r12d,%edx
+	xor	60(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,8(%rsp)
+	lea	0x5a827999(%eax,%r11d),%r12d
+	mov	12(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	20(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%r11d
+	xor	44(%rsp),%eax
+	and	%esi,%ebx
+	add	%r11d,%r12d
+	xor	0(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,12(%rsp)
+	lea	0x5a827999(%eax,%ebp),%r11d
+	mov	16(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	24(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%ebp
+	xor	48(%rsp),%eax
+	and	%edx,%ebx
+	add	%ebp,%r11d
+	xor	4(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,16(%rsp)
+	lea	0x6ed9eba1(%eax,%edi),%ebp
+	mov	20(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	28(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	52(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	8(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,20(%rsp)
+	lea	0x6ed9eba1(%eax,%esi),%edi
+	mov	24(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	32(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	56(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	12(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,24(%rsp)
+	lea	0x6ed9eba1(%eax,%edx),%esi
+	mov	28(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	36(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	60(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	16(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,28(%rsp)
+	lea	0x6ed9eba1(%eax,%r12d),%edx
+	mov	32(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	40(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	0(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	20(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,32(%rsp)
+	lea	0x6ed9eba1(%eax,%r11d),%r12d
+	mov	36(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	44(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	4(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	24(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,36(%rsp)
+	lea	0x6ed9eba1(%eax,%ebp),%r11d
+	mov	40(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	48(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	8(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	28(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,40(%rsp)
+	lea	0x6ed9eba1(%eax,%edi),%ebp
+	mov	44(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	52(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	12(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	32(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,44(%rsp)
+	lea	0x6ed9eba1(%eax,%esi),%edi
+	mov	48(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	56(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	16(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	36(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,48(%rsp)
+	lea	0x6ed9eba1(%eax,%edx),%esi
+	mov	52(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	60(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	20(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	40(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,52(%rsp)
+	lea	0x6ed9eba1(%eax,%r12d),%edx
+	mov	56(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	0(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	24(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	44(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,56(%rsp)
+	lea	0x6ed9eba1(%eax,%r11d),%r12d
+	mov	60(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	4(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	28(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	48(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,60(%rsp)
+	lea	0x6ed9eba1(%eax,%ebp),%r11d
+	mov	0(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	8(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	32(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	52(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,0(%rsp)
+	lea	0x6ed9eba1(%eax,%edi),%ebp
+	mov	4(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	12(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	36(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	56(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,4(%rsp)
+	lea	0x6ed9eba1(%eax,%esi),%edi
+	mov	8(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	16(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	40(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	60(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,8(%rsp)
+	lea	0x6ed9eba1(%eax,%edx),%esi
+	mov	12(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	20(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	44(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	0(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,12(%rsp)
+	lea	0x6ed9eba1(%eax,%r12d),%edx
+	mov	16(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	24(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	48(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	4(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,16(%rsp)
+	lea	0x6ed9eba1(%eax,%r11d),%r12d
+	mov	20(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	28(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	52(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	8(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,20(%rsp)
+	lea	0x6ed9eba1(%eax,%ebp),%r11d
+	mov	24(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	32(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	56(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	12(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,24(%rsp)
+	lea	0x6ed9eba1(%eax,%edi),%ebp
+	mov	28(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	36(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	60(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	16(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,28(%rsp)
+	lea	0x6ed9eba1(%eax,%esi),%edi
+	mov	32(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	40(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	0(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	20(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,32(%rsp)
+	lea	-0x70e44324(%eax,%edx),%esi
+	mov	36(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%ebp,%ecx
+	xor	44(%rsp),%eax
+	mov	%edi,%edx
+	and	%r11d,%ebx
+	xor	4(%rsp),%eax
+	or	%r11d,%ecx
+	rol	$5,%edx
+	xor	24(%rsp),%eax
+	and	%r12d,%ecx
+	add	%edx,%esi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%ebp
+	mov	%eax,36(%rsp)
+	add	%ebx,%esi
+	lea	-0x70e44324(%eax,%r12d),%edx
+	mov	40(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edi,%ecx
+	xor	48(%rsp),%eax
+	mov	%esi,%r12d
+	and	%ebp,%ebx
+	xor	8(%rsp),%eax
+	or	%ebp,%ecx
+	rol	$5,%r12d
+	xor	28(%rsp),%eax
+	and	%r11d,%ecx
+	add	%r12d,%edx
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edi
+	mov	%eax,40(%rsp)
+	add	%ebx,%edx
+	lea	-0x70e44324(%eax,%r11d),%r12d
+	mov	44(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%esi,%ecx
+	xor	52(%rsp),%eax
+	mov	%edx,%r11d
+	and	%edi,%ebx
+	xor	12(%rsp),%eax
+	or	%edi,%ecx
+	rol	$5,%r11d
+	xor	32(%rsp),%eax
+	and	%ebp,%ecx
+	add	%r11d,%r12d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%esi
+	mov	%eax,44(%rsp)
+	add	%ebx,%r12d
+	lea	-0x70e44324(%eax,%ebp),%r11d
+	mov	48(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%edx,%ecx
+	xor	56(%rsp),%eax
+	mov	%r12d,%ebp
+	and	%esi,%ebx
+	xor	16(%rsp),%eax
+	or	%esi,%ecx
+	rol	$5,%ebp
+	xor	36(%rsp),%eax
+	and	%edi,%ecx
+	add	%ebp,%r11d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edx
+	mov	%eax,48(%rsp)
+	add	%ebx,%r11d
+	lea	-0x70e44324(%eax,%edi),%ebp
+	mov	52(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%r12d,%ecx
+	xor	60(%rsp),%eax
+	mov	%r11d,%edi
+	and	%edx,%ebx
+	xor	20(%rsp),%eax
+	or	%edx,%ecx
+	rol	$5,%edi
+	xor	40(%rsp),%eax
+	and	%esi,%ecx
+	add	%edi,%ebp
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r12d
+	mov	%eax,52(%rsp)
+	add	%ebx,%ebp
+	lea	-0x70e44324(%eax,%esi),%edi
+	mov	56(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%r11d,%ecx
+	xor	0(%rsp),%eax
+	mov	%ebp,%esi
+	and	%r12d,%ebx
+	xor	24(%rsp),%eax
+	or	%r12d,%ecx
+	rol	$5,%esi
+	xor	44(%rsp),%eax
+	and	%edx,%ecx
+	add	%esi,%edi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r11d
+	mov	%eax,56(%rsp)
+	add	%ebx,%edi
+	lea	-0x70e44324(%eax,%edx),%esi
+	mov	60(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%ebp,%ecx
+	xor	4(%rsp),%eax
+	mov	%edi,%edx
+	and	%r11d,%ebx
+	xor	28(%rsp),%eax
+	or	%r11d,%ecx
+	rol	$5,%edx
+	xor	48(%rsp),%eax
+	and	%r12d,%ecx
+	add	%edx,%esi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%ebp
+	mov	%eax,60(%rsp)
+	add	%ebx,%esi
+	lea	-0x70e44324(%eax,%r12d),%edx
+	mov	0(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edi,%ecx
+	xor	8(%rsp),%eax
+	mov	%esi,%r12d
+	and	%ebp,%ebx
+	xor	32(%rsp),%eax
+	or	%ebp,%ecx
+	rol	$5,%r12d
+	xor	52(%rsp),%eax
+	and	%r11d,%ecx
+	add	%r12d,%edx
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edi
+	mov	%eax,0(%rsp)
+	add	%ebx,%edx
+	lea	-0x70e44324(%eax,%r11d),%r12d
+	mov	4(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%esi,%ecx
+	xor	12(%rsp),%eax
+	mov	%edx,%r11d
+	and	%edi,%ebx
+	xor	36(%rsp),%eax
+	or	%edi,%ecx
+	rol	$5,%r11d
+	xor	56(%rsp),%eax
+	and	%ebp,%ecx
+	add	%r11d,%r12d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%esi
+	mov	%eax,4(%rsp)
+	add	%ebx,%r12d
+	lea	-0x70e44324(%eax,%ebp),%r11d
+	mov	8(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%edx,%ecx
+	xor	16(%rsp),%eax
+	mov	%r12d,%ebp
+	and	%esi,%ebx
+	xor	40(%rsp),%eax
+	or	%esi,%ecx
+	rol	$5,%ebp
+	xor	60(%rsp),%eax
+	and	%edi,%ecx
+	add	%ebp,%r11d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edx
+	mov	%eax,8(%rsp)
+	add	%ebx,%r11d
+	lea	-0x70e44324(%eax,%edi),%ebp
+	mov	12(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%r12d,%ecx
+	xor	20(%rsp),%eax
+	mov	%r11d,%edi
+	and	%edx,%ebx
+	xor	44(%rsp),%eax
+	or	%edx,%ecx
+	rol	$5,%edi
+	xor	0(%rsp),%eax
+	and	%esi,%ecx
+	add	%edi,%ebp
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r12d
+	mov	%eax,12(%rsp)
+	add	%ebx,%ebp
+	lea	-0x70e44324(%eax,%esi),%edi
+	mov	16(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%r11d,%ecx
+	xor	24(%rsp),%eax
+	mov	%ebp,%esi
+	and	%r12d,%ebx
+	xor	48(%rsp),%eax
+	or	%r12d,%ecx
+	rol	$5,%esi
+	xor	4(%rsp),%eax
+	and	%edx,%ecx
+	add	%esi,%edi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r11d
+	mov	%eax,16(%rsp)
+	add	%ebx,%edi
+	lea	-0x70e44324(%eax,%edx),%esi
+	mov	20(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%ebp,%ecx
+	xor	28(%rsp),%eax
+	mov	%edi,%edx
+	and	%r11d,%ebx
+	xor	52(%rsp),%eax
+	or	%r11d,%ecx
+	rol	$5,%edx
+	xor	8(%rsp),%eax
+	and	%r12d,%ecx
+	add	%edx,%esi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%ebp
+	mov	%eax,20(%rsp)
+	add	%ebx,%esi
+	lea	-0x70e44324(%eax,%r12d),%edx
+	mov	24(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edi,%ecx
+	xor	32(%rsp),%eax
+	mov	%esi,%r12d
+	and	%ebp,%ebx
+	xor	56(%rsp),%eax
+	or	%ebp,%ecx
+	rol	$5,%r12d
+	xor	12(%rsp),%eax
+	and	%r11d,%ecx
+	add	%r12d,%edx
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edi
+	mov	%eax,24(%rsp)
+	add	%ebx,%edx
+	lea	-0x70e44324(%eax,%r11d),%r12d
+	mov	28(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%esi,%ecx
+	xor	36(%rsp),%eax
+	mov	%edx,%r11d
+	and	%edi,%ebx
+	xor	60(%rsp),%eax
+	or	%edi,%ecx
+	rol	$5,%r11d
+	xor	16(%rsp),%eax
+	and	%ebp,%ecx
+	add	%r11d,%r12d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%esi
+	mov	%eax,28(%rsp)
+	add	%ebx,%r12d
+	lea	-0x70e44324(%eax,%ebp),%r11d
+	mov	32(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%edx,%ecx
+	xor	40(%rsp),%eax
+	mov	%r12d,%ebp
+	and	%esi,%ebx
+	xor	0(%rsp),%eax
+	or	%esi,%ecx
+	rol	$5,%ebp
+	xor	20(%rsp),%eax
+	and	%edi,%ecx
+	add	%ebp,%r11d
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edx
+	mov	%eax,32(%rsp)
+	add	%ebx,%r11d
+	lea	-0x70e44324(%eax,%edi),%ebp
+	mov	36(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%r12d,%ecx
+	xor	44(%rsp),%eax
+	mov	%r11d,%edi
+	and	%edx,%ebx
+	xor	4(%rsp),%eax
+	or	%edx,%ecx
+	rol	$5,%edi
+	xor	24(%rsp),%eax
+	and	%esi,%ecx
+	add	%edi,%ebp
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r12d
+	mov	%eax,36(%rsp)
+	add	%ebx,%ebp
+	lea	-0x70e44324(%eax,%esi),%edi
+	mov	40(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%r11d,%ecx
+	xor	48(%rsp),%eax
+	mov	%ebp,%esi
+	and	%r12d,%ebx
+	xor	8(%rsp),%eax
+	or	%r12d,%ecx
+	rol	$5,%esi
+	xor	28(%rsp),%eax
+	and	%edx,%ecx
+	add	%esi,%edi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%r11d
+	mov	%eax,40(%rsp)
+	add	%ebx,%edi
+	lea	-0x70e44324(%eax,%edx),%esi
+	mov	44(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%ebp,%ecx
+	xor	52(%rsp),%eax
+	mov	%edi,%edx
+	and	%r11d,%ebx
+	xor	12(%rsp),%eax
+	or	%r11d,%ecx
+	rol	$5,%edx
+	xor	32(%rsp),%eax
+	and	%r12d,%ecx
+	add	%edx,%esi
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%ebp
+	mov	%eax,44(%rsp)
+	add	%ebx,%esi
+	lea	-0x70e44324(%eax,%r12d),%edx
+	mov	48(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edi,%ecx
+	xor	56(%rsp),%eax
+	mov	%esi,%r12d
+	and	%ebp,%ebx
+	xor	16(%rsp),%eax
+	or	%ebp,%ecx
+	rol	$5,%r12d
+	xor	36(%rsp),%eax
+	and	%r11d,%ecx
+	add	%r12d,%edx
+	rol	$1,%eax
+	or	%ecx,%ebx
+	rol	$30,%edi
+	mov	%eax,48(%rsp)
+	add	%ebx,%edx
+	lea	-0x359d3e2a(%eax,%r11d),%r12d
+	mov	52(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	60(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	20(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	40(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,52(%rsp)
+	lea	-0x359d3e2a(%eax,%ebp),%r11d
+	mov	56(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	0(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	24(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	44(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,56(%rsp)
+	lea	-0x359d3e2a(%eax,%edi),%ebp
+	mov	60(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	4(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	28(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	48(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,60(%rsp)
+	lea	-0x359d3e2a(%eax,%esi),%edi
+	mov	0(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	8(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	32(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	52(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,0(%rsp)
+	lea	-0x359d3e2a(%eax,%edx),%esi
+	mov	4(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	12(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	36(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	56(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,4(%rsp)
+	lea	-0x359d3e2a(%eax,%r12d),%edx
+	mov	8(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	16(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	40(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	60(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,8(%rsp)
+	lea	-0x359d3e2a(%eax,%r11d),%r12d
+	mov	12(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	20(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	44(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	0(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,12(%rsp)
+	lea	-0x359d3e2a(%eax,%ebp),%r11d
+	mov	16(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	24(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	48(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	4(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,16(%rsp)
+	lea	-0x359d3e2a(%eax,%edi),%ebp
+	mov	20(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	28(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	52(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	8(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,20(%rsp)
+	lea	-0x359d3e2a(%eax,%esi),%edi
+	mov	24(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	32(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	56(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	12(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,24(%rsp)
+	lea	-0x359d3e2a(%eax,%edx),%esi
+	mov	28(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	36(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	60(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	16(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	mov	%eax,28(%rsp)
+	lea	-0x359d3e2a(%eax,%r12d),%edx
+	mov	32(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	40(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	0(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	20(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	mov	%eax,32(%rsp)
+	lea	-0x359d3e2a(%eax,%r11d),%r12d
+	mov	36(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	44(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	4(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	24(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	mov	%eax,36(%rsp)
+	lea	-0x359d3e2a(%eax,%ebp),%r11d
+	mov	40(%rsp),%eax
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	48(%rsp),%eax
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	8(%rsp),%eax
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	xor	28(%rsp),%eax
+	rol	$30,%edx
+	add	%ebx,%r11d
+	rol	$1,%eax
+	mov	%eax,40(%rsp)
+	lea	-0x359d3e2a(%eax,%edi),%ebp
+	mov	44(%rsp),%eax
+	mov	%edx,%ebx
+	mov	%r11d,%edi
+	xor	52(%rsp),%eax
+	xor	%r12d,%ebx
+	rol	$5,%edi
+	xor	12(%rsp),%eax
+	xor	%esi,%ebx
+	add	%edi,%ebp
+	xor	32(%rsp),%eax
+	rol	$30,%r12d
+	add	%ebx,%ebp
+	rol	$1,%eax
+	mov	%eax,44(%rsp)
+	lea	-0x359d3e2a(%eax,%esi),%edi
+	mov	48(%rsp),%eax
+	mov	%r12d,%ebx
+	mov	%ebp,%esi
+	xor	56(%rsp),%eax
+	xor	%r11d,%ebx
+	rol	$5,%esi
+	xor	16(%rsp),%eax
+	xor	%edx,%ebx
+	add	%esi,%edi
+	xor	36(%rsp),%eax
+	rol	$30,%r11d
+	add	%ebx,%edi
+	rol	$1,%eax
+	mov	%eax,48(%rsp)
+	lea	-0x359d3e2a(%eax,%edx),%esi
+	mov	52(%rsp),%eax
+	mov	%r11d,%ebx
+	mov	%edi,%edx
+	xor	60(%rsp),%eax
+	xor	%ebp,%ebx
+	rol	$5,%edx
+	xor	20(%rsp),%eax
+	xor	%r12d,%ebx
+	add	%edx,%esi
+	xor	40(%rsp),%eax
+	rol	$30,%ebp
+	add	%ebx,%esi
+	rol	$1,%eax
+	lea	-0x359d3e2a(%eax,%r12d),%edx
+	mov	56(%rsp),%eax
+	mov	%ebp,%ebx
+	mov	%esi,%r12d
+	xor	0(%rsp),%eax
+	xor	%edi,%ebx
+	rol	$5,%r12d
+	xor	24(%rsp),%eax
+	xor	%r11d,%ebx
+	add	%r12d,%edx
+	xor	44(%rsp),%eax
+	rol	$30,%edi
+	add	%ebx,%edx
+	rol	$1,%eax
+	lea	-0x359d3e2a(%eax,%r11d),%r12d
+	mov	60(%rsp),%eax
+	mov	%edi,%ebx
+	mov	%edx,%r11d
+	xor	4(%rsp),%eax
+	xor	%esi,%ebx
+	rol	$5,%r11d
+	xor	28(%rsp),%eax
+	xor	%ebp,%ebx
+	add	%r11d,%r12d
+	xor	48(%rsp),%eax
+	rol	$30,%esi
+	add	%ebx,%r12d
+	rol	$1,%eax
+	lea	-0x359d3e2a(%eax,%ebp),%r11d
+	mov	%esi,%ebx
+	mov	%r12d,%ebp
+	xor	%edx,%ebx
+	rol	$5,%ebp
+	xor	%edi,%ebx
+	add	%ebp,%r11d
+	rol	$30,%edx
+	add	%ebx,%r11d
+	// Update and save state information in SHA-1 context
+	add	0(%r8),%r11d
+	add	4(%r8),%r12d
+	add	8(%r8),%edx
+	add	12(%r8),%esi
+	add	16(%r8),%edi
+	mov	%r11d,0(%r8)
+	mov	%r12d,4(%r8)
+	mov	%edx,8(%r8)
+	mov	%esi,12(%r8)
+	mov	%edi,16(%r8)
+
+	xchg	%r11d,%edx	# mov	%r11d,%edx
+	xchg	%r12d,%esi	# mov	%r12d,%esi
+	xchg	%r11d,%edi	# mov	%edx,%edi
+	xchg	%r12d,%ebp	# mov	%esi,%ebp
+			# mov	%edi,%r11d
+	lea	64(%r9),%r9
+	sub	$1,%r10
+	jnz	.Lloop
+	mov	64(%rsp),%rsp
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+SET_SIZE(sha1_block_data_order)
+
+.data
+.asciz	"SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>"
+
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/module/icp/asm-x86_64/os/macos/sha2/sha256_impl.S b/module/icp/asm-x86_64/os/macos/sha2/sha256_impl.S
new file mode 100644
index 0000000000..0b0f3444fa
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/sha2/sha256_impl.S
@@ -0,0 +1,2058 @@
+
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * project. Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ * ====================================================================
+ *
+ * sha256/512_block procedure for x86_64.
+ *
+ * 40% improvement over compiler-generated code on Opteron. On EM64T
+ * sha256 was observed to run >80% faster and sha512 - >40%. No magical
+ * tricks, just straight implementation... I really wonder why gcc
+ * [being armed with inline assembler] fails to generate as fast code.
+ * The only thing which is cool about this module is that it's very
+ * same instruction sequence used for both SHA-256 and SHA-512. In
+ * former case the instructions operate on 32-bit operands, while in
+ * latter - on 64-bit ones. All I had to do is to get one flavor right,
+ * the other one passed the test right away:-)
+ *
+ * sha256_block runs in ~1005 cycles on Opteron, which gives you
+ * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
+ * frequency in GHz. sha512_block runs in ~1275 cycles, which results
+ * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
+ * Well, if you compare it to IA-64 implementation, which maintains
+ * X[16] in register bank[!], tends to 4 instructions per CPU clock
+ * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
+ * issue Opteron pipeline and X[16] maintained in memory. So that *if*
+ * there is a way to improve it, *then* the only way would be to try to
+ * offload X[16] updates to SSE unit, but that would require "deeper"
+ * loop unroll, which in turn would naturally cause size blow-up, not
+ * to mention increased complexity! And once again, only *if* it's
+ * actually possible to noticeably improve overall ILP, instruction
+ * level parallelism, on a given CPU implementation in this case.
+ *
+ * Special note on Intel EM64T. While Opteron CPU exhibits perfect
+ * perfromance ratio of 1.5 between 64- and 32-bit flavors [see above],
+ * [currently available] EM64T CPUs apparently are far from it. On the
+ * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
+ * sha256_block:-( This is presumably because 64-bit shifts/rotates
+ * apparently are not atomic instructions, but implemented in microcode.
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha512-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers).  Replaced the .picmeup macro with assembler code.
+ *
+ * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
+ * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
+ */
+
+/*
+ * This file was generated by a perl script (sha512-x86_64.pl) that were 
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
+ */
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sha2/sha2.h>
+
+/* ARGSUSED */
+void
+SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
+{
+}
+
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+
+ENTRY_NP(SHA256TransformBlocks)
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	mov	%rsp,%rbp		# copy %rsp
+	shl	$4,%rdx		# num*16
+	sub	$16*4+4*8,%rsp
+	lea	(%rsi,%rdx,4),%rdx	# inp+num*16*4
+	and	$-64,%rsp		# align stack frame
+	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
+	mov	%rdi,16*4+0*8(%rsp)		# save ctx, 1st arg
+	mov	%rsi,16*4+1*8(%rsp)		# save inp, 2nd arg
+	mov	%rdx,16*4+2*8(%rsp)		# save end pointer, "3rd" arg
+	mov	%rbp,16*4+3*8(%rsp)		# save copy of %rsp
+
+	//.picmeup %rbp
+	// The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
+	// the address of the "next" instruction into the target register
+	// (%rbp).  This generates these 2 instructions:
+	lea	.Llea(%rip),%rbp
+	//nop	// .picmeup generates a nop for mod 8 alignment--not needed here
+
+.Llea:
+	lea	K256-.(%rbp),%rbp
+
+	mov	4*0(%rdi),%eax
+	mov	4*1(%rdi),%ebx
+	mov	4*2(%rdi),%ecx
+	mov	4*3(%rdi),%edx
+	mov	4*4(%rdi),%r8d
+	mov	4*5(%rdi),%r9d
+	mov	4*6(%rdi),%r10d
+	mov	4*7(%rdi),%r11d
+	jmp	.Lloop
+
+.align	4, 0x90
+.Lloop:
+	xor	%rdi,%rdi
+	mov	4*0(%rsi),%r12d
+	bswap	%r12d
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+	mov	%r9d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r10d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r8d,%r15d			# (f^g)&e
+	mov	%r12d,0(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11d,%r12d			# T1+=h
+
+	mov	%eax,%r11d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+
+	ror	$2,%r11d
+	ror	$13,%r13d
+	mov	%eax,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r11d
+	ror	$9,%r13d
+	or	%ecx,%r14d			# a|c
+
+	xor	%r13d,%r11d			# h=Sigma0(a)
+	and	%ecx,%r15d			# a&c
+	add	%r12d,%edx			# d+=T1
+
+	and	%ebx,%r14d			# (a|c)&b
+	add	%r12d,%r11d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r11d			# h+=Maj(a,b,c)
+	mov	4*1(%rsi),%r12d
+	bswap	%r12d
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+	mov	%r8d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r9d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%edx,%r15d			# (f^g)&e
+	mov	%r12d,4(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10d,%r12d			# T1+=h
+
+	mov	%r11d,%r10d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+
+	ror	$2,%r10d
+	ror	$13,%r13d
+	mov	%r11d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r10d
+	ror	$9,%r13d
+	or	%ebx,%r14d			# a|c
+
+	xor	%r13d,%r10d			# h=Sigma0(a)
+	and	%ebx,%r15d			# a&c
+	add	%r12d,%ecx			# d+=T1
+
+	and	%eax,%r14d			# (a|c)&b
+	add	%r12d,%r10d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r10d			# h+=Maj(a,b,c)
+	mov	4*2(%rsi),%r12d
+	bswap	%r12d
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+	mov	%edx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r8d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ecx,%r15d			# (f^g)&e
+	mov	%r12d,8(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9d,%r12d			# T1+=h
+
+	mov	%r10d,%r9d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+
+	ror	$2,%r9d
+	ror	$13,%r13d
+	mov	%r10d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r9d
+	ror	$9,%r13d
+	or	%eax,%r14d			# a|c
+
+	xor	%r13d,%r9d			# h=Sigma0(a)
+	and	%eax,%r15d			# a&c
+	add	%r12d,%ebx			# d+=T1
+
+	and	%r11d,%r14d			# (a|c)&b
+	add	%r12d,%r9d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r9d			# h+=Maj(a,b,c)
+	mov	4*3(%rsi),%r12d
+	bswap	%r12d
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+	mov	%ecx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%edx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ebx,%r15d			# (f^g)&e
+	mov	%r12d,12(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8d,%r12d			# T1+=h
+
+	mov	%r9d,%r8d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+
+	ror	$2,%r8d
+	ror	$13,%r13d
+	mov	%r9d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r8d
+	ror	$9,%r13d
+	or	%r11d,%r14d			# a|c
+
+	xor	%r13d,%r8d			# h=Sigma0(a)
+	and	%r11d,%r15d			# a&c
+	add	%r12d,%eax			# d+=T1
+
+	and	%r10d,%r14d			# (a|c)&b
+	add	%r12d,%r8d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r8d			# h+=Maj(a,b,c)
+	mov	4*4(%rsi),%r12d
+	bswap	%r12d
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+	mov	%ebx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ecx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%eax,%r15d			# (f^g)&e
+	mov	%r12d,16(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%edx,%r12d			# T1+=h
+
+	mov	%r8d,%edx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+
+	ror	$2,%edx
+	ror	$13,%r13d
+	mov	%r8d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%edx
+	ror	$9,%r13d
+	or	%r10d,%r14d			# a|c
+
+	xor	%r13d,%edx			# h=Sigma0(a)
+	and	%r10d,%r15d			# a&c
+	add	%r12d,%r11d			# d+=T1
+
+	and	%r9d,%r14d			# (a|c)&b
+	add	%r12d,%edx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%edx			# h+=Maj(a,b,c)
+	mov	4*5(%rsi),%r12d
+	bswap	%r12d
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+	mov	%eax,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ebx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r11d,%r15d			# (f^g)&e
+	mov	%r12d,20(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ecx,%r12d			# T1+=h
+
+	mov	%edx,%ecx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+
+	ror	$2,%ecx
+	ror	$13,%r13d
+	mov	%edx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ecx
+	ror	$9,%r13d
+	or	%r9d,%r14d			# a|c
+
+	xor	%r13d,%ecx			# h=Sigma0(a)
+	and	%r9d,%r15d			# a&c
+	add	%r12d,%r10d			# d+=T1
+
+	and	%r8d,%r14d			# (a|c)&b
+	add	%r12d,%ecx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ecx			# h+=Maj(a,b,c)
+	mov	4*6(%rsi),%r12d
+	bswap	%r12d
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+	mov	%r11d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%eax,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r10d,%r15d			# (f^g)&e
+	mov	%r12d,24(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ebx,%r12d			# T1+=h
+
+	mov	%ecx,%ebx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+
+	ror	$2,%ebx
+	ror	$13,%r13d
+	mov	%ecx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ebx
+	ror	$9,%r13d
+	or	%r8d,%r14d			# a|c
+
+	xor	%r13d,%ebx			# h=Sigma0(a)
+	and	%r8d,%r15d			# a&c
+	add	%r12d,%r9d			# d+=T1
+
+	and	%edx,%r14d			# (a|c)&b
+	add	%r12d,%ebx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ebx			# h+=Maj(a,b,c)
+	mov	4*7(%rsi),%r12d
+	bswap	%r12d
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+	mov	%r10d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r11d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r9d,%r15d			# (f^g)&e
+	mov	%r12d,28(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%eax,%r12d			# T1+=h
+
+	mov	%ebx,%eax
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+
+	ror	$2,%eax
+	ror	$13,%r13d
+	mov	%ebx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%eax
+	ror	$9,%r13d
+	or	%edx,%r14d			# a|c
+
+	xor	%r13d,%eax			# h=Sigma0(a)
+	and	%edx,%r15d			# a&c
+	add	%r12d,%r8d			# d+=T1
+
+	and	%ecx,%r14d			# (a|c)&b
+	add	%r12d,%eax			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%eax			# h+=Maj(a,b,c)
+	mov	4*8(%rsi),%r12d
+	bswap	%r12d
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+	mov	%r9d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r10d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r8d,%r15d			# (f^g)&e
+	mov	%r12d,32(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11d,%r12d			# T1+=h
+
+	mov	%eax,%r11d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+
+	ror	$2,%r11d
+	ror	$13,%r13d
+	mov	%eax,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r11d
+	ror	$9,%r13d
+	or	%ecx,%r14d			# a|c
+
+	xor	%r13d,%r11d			# h=Sigma0(a)
+	and	%ecx,%r15d			# a&c
+	add	%r12d,%edx			# d+=T1
+
+	and	%ebx,%r14d			# (a|c)&b
+	add	%r12d,%r11d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r11d			# h+=Maj(a,b,c)
+	mov	4*9(%rsi),%r12d
+	bswap	%r12d
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+	mov	%r8d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r9d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%edx,%r15d			# (f^g)&e
+	mov	%r12d,36(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10d,%r12d			# T1+=h
+
+	mov	%r11d,%r10d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+
+	ror	$2,%r10d
+	ror	$13,%r13d
+	mov	%r11d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r10d
+	ror	$9,%r13d
+	or	%ebx,%r14d			# a|c
+
+	xor	%r13d,%r10d			# h=Sigma0(a)
+	and	%ebx,%r15d			# a&c
+	add	%r12d,%ecx			# d+=T1
+
+	and	%eax,%r14d			# (a|c)&b
+	add	%r12d,%r10d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r10d			# h+=Maj(a,b,c)
+	mov	4*10(%rsi),%r12d
+	bswap	%r12d
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+	mov	%edx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r8d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ecx,%r15d			# (f^g)&e
+	mov	%r12d,40(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9d,%r12d			# T1+=h
+
+	mov	%r10d,%r9d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+
+	ror	$2,%r9d
+	ror	$13,%r13d
+	mov	%r10d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r9d
+	ror	$9,%r13d
+	or	%eax,%r14d			# a|c
+
+	xor	%r13d,%r9d			# h=Sigma0(a)
+	and	%eax,%r15d			# a&c
+	add	%r12d,%ebx			# d+=T1
+
+	and	%r11d,%r14d			# (a|c)&b
+	add	%r12d,%r9d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r9d			# h+=Maj(a,b,c)
+	mov	4*11(%rsi),%r12d
+	bswap	%r12d
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+	mov	%ecx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%edx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ebx,%r15d			# (f^g)&e
+	mov	%r12d,44(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8d,%r12d			# T1+=h
+
+	mov	%r9d,%r8d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+
+	ror	$2,%r8d
+	ror	$13,%r13d
+	mov	%r9d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r8d
+	ror	$9,%r13d
+	or	%r11d,%r14d			# a|c
+
+	xor	%r13d,%r8d			# h=Sigma0(a)
+	and	%r11d,%r15d			# a&c
+	add	%r12d,%eax			# d+=T1
+
+	and	%r10d,%r14d			# (a|c)&b
+	add	%r12d,%r8d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r8d			# h+=Maj(a,b,c)
+	mov	4*12(%rsi),%r12d
+	bswap	%r12d
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+	mov	%ebx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ecx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%eax,%r15d			# (f^g)&e
+	mov	%r12d,48(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%edx,%r12d			# T1+=h
+
+	mov	%r8d,%edx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+
+	ror	$2,%edx
+	ror	$13,%r13d
+	mov	%r8d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%edx
+	ror	$9,%r13d
+	or	%r10d,%r14d			# a|c
+
+	xor	%r13d,%edx			# h=Sigma0(a)
+	and	%r10d,%r15d			# a&c
+	add	%r12d,%r11d			# d+=T1
+
+	and	%r9d,%r14d			# (a|c)&b
+	add	%r12d,%edx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%edx			# h+=Maj(a,b,c)
+	mov	4*13(%rsi),%r12d
+	bswap	%r12d
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+	mov	%eax,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ebx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r11d,%r15d			# (f^g)&e
+	mov	%r12d,52(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ecx,%r12d			# T1+=h
+
+	mov	%edx,%ecx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+
+	ror	$2,%ecx
+	ror	$13,%r13d
+	mov	%edx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ecx
+	ror	$9,%r13d
+	or	%r9d,%r14d			# a|c
+
+	xor	%r13d,%ecx			# h=Sigma0(a)
+	and	%r9d,%r15d			# a&c
+	add	%r12d,%r10d			# d+=T1
+
+	and	%r8d,%r14d			# (a|c)&b
+	add	%r12d,%ecx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ecx			# h+=Maj(a,b,c)
+	mov	4*14(%rsi),%r12d
+	bswap	%r12d
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+	mov	%r11d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%eax,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r10d,%r15d			# (f^g)&e
+	mov	%r12d,56(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ebx,%r12d			# T1+=h
+
+	mov	%ecx,%ebx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+
+	ror	$2,%ebx
+	ror	$13,%r13d
+	mov	%ecx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ebx
+	ror	$9,%r13d
+	or	%r8d,%r14d			# a|c
+
+	xor	%r13d,%ebx			# h=Sigma0(a)
+	and	%r8d,%r15d			# a&c
+	add	%r12d,%r9d			# d+=T1
+
+	and	%edx,%r14d			# (a|c)&b
+	add	%r12d,%ebx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ebx			# h+=Maj(a,b,c)
+	mov	4*15(%rsi),%r12d
+	bswap	%r12d
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+	mov	%r10d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r11d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r9d,%r15d			# (f^g)&e
+	mov	%r12d,60(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%eax,%r12d			# T1+=h
+
+	mov	%ebx,%eax
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+
+	ror	$2,%eax
+	ror	$13,%r13d
+	mov	%ebx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%eax
+	ror	$9,%r13d
+	or	%edx,%r14d			# a|c
+
+	xor	%r13d,%eax			# h=Sigma0(a)
+	and	%edx,%r15d			# a&c
+	add	%r12d,%r8d			# d+=T1
+
+	and	%ecx,%r14d			# (a|c)&b
+	add	%r12d,%eax			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%eax			# h+=Maj(a,b,c)
+	jmp	.Lrounds_16_xx
+.align	4, 0x90
+.Lrounds_16_xx:
+	mov	4(%rsp),%r13d
+	mov	56(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	36(%rsp),%r12d
+
+	add	0(%rsp),%r12d
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+	mov	%r9d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r10d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r8d,%r15d			# (f^g)&e
+	mov	%r12d,0(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11d,%r12d			# T1+=h
+
+	mov	%eax,%r11d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+
+	ror	$2,%r11d
+	ror	$13,%r13d
+	mov	%eax,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r11d
+	ror	$9,%r13d
+	or	%ecx,%r14d			# a|c
+
+	xor	%r13d,%r11d			# h=Sigma0(a)
+	and	%ecx,%r15d			# a&c
+	add	%r12d,%edx			# d+=T1
+
+	and	%ebx,%r14d			# (a|c)&b
+	add	%r12d,%r11d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r11d			# h+=Maj(a,b,c)
+	mov	8(%rsp),%r13d
+	mov	60(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	40(%rsp),%r12d
+
+	add	4(%rsp),%r12d
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+	mov	%r8d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r9d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%edx,%r15d			# (f^g)&e
+	mov	%r12d,4(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10d,%r12d			# T1+=h
+
+	mov	%r11d,%r10d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+
+	ror	$2,%r10d
+	ror	$13,%r13d
+	mov	%r11d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r10d
+	ror	$9,%r13d
+	or	%ebx,%r14d			# a|c
+
+	xor	%r13d,%r10d			# h=Sigma0(a)
+	and	%ebx,%r15d			# a&c
+	add	%r12d,%ecx			# d+=T1
+
+	and	%eax,%r14d			# (a|c)&b
+	add	%r12d,%r10d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r10d			# h+=Maj(a,b,c)
+	mov	12(%rsp),%r13d
+	mov	0(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	44(%rsp),%r12d
+
+	add	8(%rsp),%r12d
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+	mov	%edx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r8d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ecx,%r15d			# (f^g)&e
+	mov	%r12d,8(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9d,%r12d			# T1+=h
+
+	mov	%r10d,%r9d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+
+	ror	$2,%r9d
+	ror	$13,%r13d
+	mov	%r10d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r9d
+	ror	$9,%r13d
+	or	%eax,%r14d			# a|c
+
+	xor	%r13d,%r9d			# h=Sigma0(a)
+	and	%eax,%r15d			# a&c
+	add	%r12d,%ebx			# d+=T1
+
+	and	%r11d,%r14d			# (a|c)&b
+	add	%r12d,%r9d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r9d			# h+=Maj(a,b,c)
+	mov	16(%rsp),%r13d
+	mov	4(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	48(%rsp),%r12d
+
+	add	12(%rsp),%r12d
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+	mov	%ecx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%edx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ebx,%r15d			# (f^g)&e
+	mov	%r12d,12(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8d,%r12d			# T1+=h
+
+	mov	%r9d,%r8d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+
+	ror	$2,%r8d
+	ror	$13,%r13d
+	mov	%r9d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r8d
+	ror	$9,%r13d
+	or	%r11d,%r14d			# a|c
+
+	xor	%r13d,%r8d			# h=Sigma0(a)
+	and	%r11d,%r15d			# a&c
+	add	%r12d,%eax			# d+=T1
+
+	and	%r10d,%r14d			# (a|c)&b
+	add	%r12d,%r8d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r8d			# h+=Maj(a,b,c)
+	mov	20(%rsp),%r13d
+	mov	8(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	52(%rsp),%r12d
+
+	add	16(%rsp),%r12d
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+	mov	%ebx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ecx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%eax,%r15d			# (f^g)&e
+	mov	%r12d,16(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%edx,%r12d			# T1+=h
+
+	mov	%r8d,%edx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+
+	ror	$2,%edx
+	ror	$13,%r13d
+	mov	%r8d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%edx
+	ror	$9,%r13d
+	or	%r10d,%r14d			# a|c
+
+	xor	%r13d,%edx			# h=Sigma0(a)
+	and	%r10d,%r15d			# a&c
+	add	%r12d,%r11d			# d+=T1
+
+	and	%r9d,%r14d			# (a|c)&b
+	add	%r12d,%edx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%edx			# h+=Maj(a,b,c)
+	mov	24(%rsp),%r13d
+	mov	12(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	56(%rsp),%r12d
+
+	add	20(%rsp),%r12d
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+	mov	%eax,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ebx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r11d,%r15d			# (f^g)&e
+	mov	%r12d,20(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ecx,%r12d			# T1+=h
+
+	mov	%edx,%ecx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+
+	ror	$2,%ecx
+	ror	$13,%r13d
+	mov	%edx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ecx
+	ror	$9,%r13d
+	or	%r9d,%r14d			# a|c
+
+	xor	%r13d,%ecx			# h=Sigma0(a)
+	and	%r9d,%r15d			# a&c
+	add	%r12d,%r10d			# d+=T1
+
+	and	%r8d,%r14d			# (a|c)&b
+	add	%r12d,%ecx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ecx			# h+=Maj(a,b,c)
+	mov	28(%rsp),%r13d
+	mov	16(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	60(%rsp),%r12d
+
+	add	24(%rsp),%r12d
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+	mov	%r11d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%eax,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r10d,%r15d			# (f^g)&e
+	mov	%r12d,24(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ebx,%r12d			# T1+=h
+
+	mov	%ecx,%ebx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+
+	ror	$2,%ebx
+	ror	$13,%r13d
+	mov	%ecx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ebx
+	ror	$9,%r13d
+	or	%r8d,%r14d			# a|c
+
+	xor	%r13d,%ebx			# h=Sigma0(a)
+	and	%r8d,%r15d			# a&c
+	add	%r12d,%r9d			# d+=T1
+
+	and	%edx,%r14d			# (a|c)&b
+	add	%r12d,%ebx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ebx			# h+=Maj(a,b,c)
+	mov	32(%rsp),%r13d
+	mov	20(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	0(%rsp),%r12d
+
+	add	28(%rsp),%r12d
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+	mov	%r10d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r11d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r9d,%r15d			# (f^g)&e
+	mov	%r12d,28(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%eax,%r12d			# T1+=h
+
+	mov	%ebx,%eax
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+
+	ror	$2,%eax
+	ror	$13,%r13d
+	mov	%ebx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%eax
+	ror	$9,%r13d
+	or	%edx,%r14d			# a|c
+
+	xor	%r13d,%eax			# h=Sigma0(a)
+	and	%edx,%r15d			# a&c
+	add	%r12d,%r8d			# d+=T1
+
+	and	%ecx,%r14d			# (a|c)&b
+	add	%r12d,%eax			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%eax			# h+=Maj(a,b,c)
+	mov	36(%rsp),%r13d
+	mov	24(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	4(%rsp),%r12d
+
+	add	32(%rsp),%r12d
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+	mov	%r9d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r10d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r8d,%r15d			# (f^g)&e
+	mov	%r12d,32(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11d,%r12d			# T1+=h
+
+	mov	%eax,%r11d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+
+	ror	$2,%r11d
+	ror	$13,%r13d
+	mov	%eax,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r11d
+	ror	$9,%r13d
+	or	%ecx,%r14d			# a|c
+
+	xor	%r13d,%r11d			# h=Sigma0(a)
+	and	%ecx,%r15d			# a&c
+	add	%r12d,%edx			# d+=T1
+
+	and	%ebx,%r14d			# (a|c)&b
+	add	%r12d,%r11d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r11d			# h+=Maj(a,b,c)
+	mov	40(%rsp),%r13d
+	mov	28(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	8(%rsp),%r12d
+
+	add	36(%rsp),%r12d
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+	mov	%r8d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r9d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%edx,%r15d			# (f^g)&e
+	mov	%r12d,36(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10d,%r12d			# T1+=h
+
+	mov	%r11d,%r10d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+
+	ror	$2,%r10d
+	ror	$13,%r13d
+	mov	%r11d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r10d
+	ror	$9,%r13d
+	or	%ebx,%r14d			# a|c
+
+	xor	%r13d,%r10d			# h=Sigma0(a)
+	and	%ebx,%r15d			# a&c
+	add	%r12d,%ecx			# d+=T1
+
+	and	%eax,%r14d			# (a|c)&b
+	add	%r12d,%r10d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r10d			# h+=Maj(a,b,c)
+	mov	44(%rsp),%r13d
+	mov	32(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	12(%rsp),%r12d
+
+	add	40(%rsp),%r12d
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+	mov	%edx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r8d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ecx,%r15d			# (f^g)&e
+	mov	%r12d,40(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9d,%r12d			# T1+=h
+
+	mov	%r10d,%r9d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+
+	ror	$2,%r9d
+	ror	$13,%r13d
+	mov	%r10d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r9d
+	ror	$9,%r13d
+	or	%eax,%r14d			# a|c
+
+	xor	%r13d,%r9d			# h=Sigma0(a)
+	and	%eax,%r15d			# a&c
+	add	%r12d,%ebx			# d+=T1
+
+	and	%r11d,%r14d			# (a|c)&b
+	add	%r12d,%r9d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r9d			# h+=Maj(a,b,c)
+	mov	48(%rsp),%r13d
+	mov	36(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	16(%rsp),%r12d
+
+	add	44(%rsp),%r12d
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+	mov	%ecx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%edx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%ebx,%r15d			# (f^g)&e
+	mov	%r12d,44(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8d,%r12d			# T1+=h
+
+	mov	%r9d,%r8d
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+
+	ror	$2,%r8d
+	ror	$13,%r13d
+	mov	%r9d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%r8d
+	ror	$9,%r13d
+	or	%r11d,%r14d			# a|c
+
+	xor	%r13d,%r8d			# h=Sigma0(a)
+	and	%r11d,%r15d			# a&c
+	add	%r12d,%eax			# d+=T1
+
+	and	%r10d,%r14d			# (a|c)&b
+	add	%r12d,%r8d			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%r8d			# h+=Maj(a,b,c)
+	mov	52(%rsp),%r13d
+	mov	40(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	20(%rsp),%r12d
+
+	add	48(%rsp),%r12d
+	mov	%eax,%r13d
+	mov	%eax,%r14d
+	mov	%ebx,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ecx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%eax,%r15d			# (f^g)&e
+	mov	%r12d,48(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%edx,%r12d			# T1+=h
+
+	mov	%r8d,%edx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%r8d,%r13d
+	mov	%r8d,%r14d
+
+	ror	$2,%edx
+	ror	$13,%r13d
+	mov	%r8d,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%edx
+	ror	$9,%r13d
+	or	%r10d,%r14d			# a|c
+
+	xor	%r13d,%edx			# h=Sigma0(a)
+	and	%r10d,%r15d			# a&c
+	add	%r12d,%r11d			# d+=T1
+
+	and	%r9d,%r14d			# (a|c)&b
+	add	%r12d,%edx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%edx			# h+=Maj(a,b,c)
+	mov	56(%rsp),%r13d
+	mov	44(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	24(%rsp),%r12d
+
+	add	52(%rsp),%r12d
+	mov	%r11d,%r13d
+	mov	%r11d,%r14d
+	mov	%eax,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%ebx,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r11d,%r15d			# (f^g)&e
+	mov	%r12d,52(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ecx,%r12d			# T1+=h
+
+	mov	%edx,%ecx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%edx,%r13d
+	mov	%edx,%r14d
+
+	ror	$2,%ecx
+	ror	$13,%r13d
+	mov	%edx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ecx
+	ror	$9,%r13d
+	or	%r9d,%r14d			# a|c
+
+	xor	%r13d,%ecx			# h=Sigma0(a)
+	and	%r9d,%r15d			# a&c
+	add	%r12d,%r10d			# d+=T1
+
+	and	%r8d,%r14d			# (a|c)&b
+	add	%r12d,%ecx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ecx			# h+=Maj(a,b,c)
+	mov	60(%rsp),%r13d
+	mov	48(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	28(%rsp),%r12d
+
+	add	56(%rsp),%r12d
+	mov	%r10d,%r13d
+	mov	%r10d,%r14d
+	mov	%r11d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%eax,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r10d,%r15d			# (f^g)&e
+	mov	%r12d,56(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%ebx,%r12d			# T1+=h
+
+	mov	%ecx,%ebx
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ecx,%r13d
+	mov	%ecx,%r14d
+
+	ror	$2,%ebx
+	ror	$13,%r13d
+	mov	%ecx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%ebx
+	ror	$9,%r13d
+	or	%r8d,%r14d			# a|c
+
+	xor	%r13d,%ebx			# h=Sigma0(a)
+	and	%r8d,%r15d			# a&c
+	add	%r12d,%r9d			# d+=T1
+
+	and	%edx,%r14d			# (a|c)&b
+	add	%r12d,%ebx			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%ebx			# h+=Maj(a,b,c)
+	mov	0(%rsp),%r13d
+	mov	52(%rsp),%r12d
+
+	mov	%r13d,%r15d
+
+	shr	$3,%r13d
+	ror	$7,%r15d
+
+	xor	%r15d,%r13d
+	ror	$11,%r15d
+
+	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
+	mov	%r12d,%r14d
+
+	shr	$10,%r12d
+	ror	$17,%r14d
+
+	xor	%r14d,%r12d
+	ror	$2,%r14d
+
+	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
+
+	add	%r13d,%r12d
+
+	add	32(%rsp),%r12d
+
+	add	60(%rsp),%r12d
+	mov	%r9d,%r13d
+	mov	%r9d,%r14d
+	mov	%r10d,%r15d
+
+	ror	$6,%r13d
+	ror	$11,%r14d
+	xor	%r11d,%r15d			# f^g
+
+	xor	%r14d,%r13d
+	ror	$14,%r14d
+	and	%r9d,%r15d			# (f^g)&e
+	mov	%r12d,60(%rsp)
+
+	xor	%r14d,%r13d			# Sigma1(e)
+	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
+	add	%eax,%r12d			# T1+=h
+
+	mov	%ebx,%eax
+	add	%r13d,%r12d			# T1+=Sigma1(e)
+
+	add	%r15d,%r12d			# T1+=Ch(e,f,g)
+	mov	%ebx,%r13d
+	mov	%ebx,%r14d
+
+	ror	$2,%eax
+	ror	$13,%r13d
+	mov	%ebx,%r15d
+	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
+
+	xor	%r13d,%eax
+	ror	$9,%r13d
+	or	%edx,%r14d			# a|c
+
+	xor	%r13d,%eax			# h=Sigma0(a)
+	and	%edx,%r15d			# a&c
+	add	%r12d,%r8d			# d+=T1
+
+	and	%ecx,%r14d			# (a|c)&b
+	add	%r12d,%eax			# h+=T1
+
+	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14d,%eax			# h+=Maj(a,b,c)
+	cmp	$64,%rdi
+	jb	.Lrounds_16_xx
+
+	mov	16*4+0*8(%rsp),%rdi
+	lea	16*4(%rsi),%rsi
+
+	add	4*0(%rdi),%eax
+	add	4*1(%rdi),%ebx
+	add	4*2(%rdi),%ecx
+	add	4*3(%rdi),%edx
+	add	4*4(%rdi),%r8d
+	add	4*5(%rdi),%r9d
+	add	4*6(%rdi),%r10d
+	add	4*7(%rdi),%r11d
+
+	cmp	16*4+2*8(%rsp),%rsi
+
+	mov	%eax,4*0(%rdi)
+	mov	%ebx,4*1(%rdi)
+	mov	%ecx,4*2(%rdi)
+	mov	%edx,4*3(%rdi)
+	mov	%r8d,4*4(%rdi)
+	mov	%r9d,4*5(%rdi)
+	mov	%r10d,4*6(%rdi)
+	mov	%r11d,4*7(%rdi)
+	jb	.Lloop
+
+	mov	16*4+3*8(%rsp),%rsp
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+
+	ret
+SET_SIZE(SHA256TransformBlocks)
+
+.align	6, 0x90
+K256:
+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#endif /* !lint && !__lint */
diff --git a/module/icp/asm-x86_64/os/macos/sha2/sha512_impl.S b/module/icp/asm-x86_64/os/macos/sha2/sha512_impl.S
new file mode 100644
index 0000000000..1b51f9d5b4
--- /dev/null
+++ b/module/icp/asm-x86_64/os/macos/sha2/sha512_impl.S
@@ -0,0 +1,2082 @@
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * project. Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ * ====================================================================
+ *
+ * sha256/512_block procedure for x86_64.
+ *
+ * 40% improvement over compiler-generated code on Opteron. On EM64T
+ * sha256 was observed to run >80% faster and sha512 - >40%. No magical
+ * tricks, just straight implementation... I really wonder why gcc
+ * [being armed with inline assembler] fails to generate as fast code.
+ * The only thing which is cool about this module is that it's very
+ * same instruction sequence used for both SHA-256 and SHA-512. In
+ * former case the instructions operate on 32-bit operands, while in
+ * latter - on 64-bit ones. All I had to do is to get one flavor right,
+ * the other one passed the test right away:-)
+ *
+ * sha256_block runs in ~1005 cycles on Opteron, which gives you
+ * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
+ * frequency in GHz. sha512_block runs in ~1275 cycles, which results
+ * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
+ * Well, if you compare it to IA-64 implementation, which maintains
+ * X[16] in register bank[!], tends to 4 instructions per CPU clock
+ * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
+ * issue Opteron pipeline and X[16] maintained in memory. So that *if*
+ * there is a way to improve it, *then* the only way would be to try to
+ * offload X[16] updates to SSE unit, but that would require "deeper"
+ * loop unroll, which in turn would naturally cause size blow-up, not
+ * to mention increased complexity! And once again, only *if* it's
+ * actually possible to noticeably improve overall ILP, instruction
+ * level parallelism, on a given CPU implementation in this case.
+ *
+ * Special note on Intel EM64T. While Opteron CPU exhibits perfect
+ * perfromance ratio of 1.5 between 64- and 32-bit flavors [see above],
+ * [currently available] EM64T CPUs apparently are far from it. On the
+ * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
+ * sha256_block:-( This is presumably because 64-bit shifts/rotates
+ * apparently are not atomic instructions, but implemented in microcode.
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha512-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers).  Replaced the .picmeup macro with assembler code.
+ *
+ * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
+ * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
+ */
+
+/*
+ * This file was generated by a perl script (sha512-x86_64.pl) that were
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
+ */
+
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sha2/sha2.h>
+
+/* ARGSUSED */
+void
+SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
+{
+}
+
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+
+ENTRY_NP(SHA512TransformBlocks)
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	mov	%rsp,%rbp		# copy %rsp
+	shl	$4,%rdx		# num*16
+	sub	$16*8+4*8,%rsp
+	lea	(%rsi,%rdx,8),%rdx	# inp+num*16*8
+	and	$-64,%rsp		# align stack frame
+	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
+	mov	%rdi,16*8+0*8(%rsp)		# save ctx, 1st arg
+	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
+	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
+	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
+
+	//.picmeup %rbp
+	// The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
+	// the address of the "next" instruction into the target register
+	// (%rbp).  This generates these 2 instructions:
+	lea	.Llea(%rip),%rbp
+	//nop	// .picmeup generates a nop for mod 8 alignment--not needed here
+
+.Llea:
+	lea	K512-.(%rbp),%rbp
+
+	mov	8*0(%rdi),%rax
+	mov	8*1(%rdi),%rbx
+	mov	8*2(%rdi),%rcx
+	mov	8*3(%rdi),%rdx
+	mov	8*4(%rdi),%r8
+	mov	8*5(%rdi),%r9
+	mov	8*6(%rdi),%r10
+	mov	8*7(%rdi),%r11
+	jmp	.Lloop
+
+.align	4, 0x90
+.Lloop:
+	xor	%rdi,%rdi
+	mov	8*0(%rsi),%r12
+	bswap	%r12
+	mov	%r8,%r13
+	mov	%r8,%r14
+	mov	%r9,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r10,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r8,%r15			# (f^g)&e
+	mov	%r12,0(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11,%r12			# T1+=h
+
+	mov	%rax,%r11
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rax,%r13
+	mov	%rax,%r14
+
+	ror	$28,%r11
+	ror	$34,%r13
+	mov	%rax,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r11
+	ror	$5,%r13
+	or	%rcx,%r14			# a|c
+
+	xor	%r13,%r11			# h=Sigma0(a)
+	and	%rcx,%r15			# a&c
+	add	%r12,%rdx			# d+=T1
+
+	and	%rbx,%r14			# (a|c)&b
+	add	%r12,%r11			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r11			# h+=Maj(a,b,c)
+	mov	8*1(%rsi),%r12
+	bswap	%r12
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+	mov	%r8,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r9,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rdx,%r15			# (f^g)&e
+	mov	%r12,8(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10,%r12			# T1+=h
+
+	mov	%r11,%r10
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r11,%r13
+	mov	%r11,%r14
+
+	ror	$28,%r10
+	ror	$34,%r13
+	mov	%r11,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r10
+	ror	$5,%r13
+	or	%rbx,%r14			# a|c
+
+	xor	%r13,%r10			# h=Sigma0(a)
+	and	%rbx,%r15			# a&c
+	add	%r12,%rcx			# d+=T1
+
+	and	%rax,%r14			# (a|c)&b
+	add	%r12,%r10			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r10			# h+=Maj(a,b,c)
+	mov	8*2(%rsi),%r12
+	bswap	%r12
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+	mov	%rdx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r8,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rcx,%r15			# (f^g)&e
+	mov	%r12,16(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9,%r12			# T1+=h
+
+	mov	%r10,%r9
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r10,%r13
+	mov	%r10,%r14
+
+	ror	$28,%r9
+	ror	$34,%r13
+	mov	%r10,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r9
+	ror	$5,%r13
+	or	%rax,%r14			# a|c
+
+	xor	%r13,%r9			# h=Sigma0(a)
+	and	%rax,%r15			# a&c
+	add	%r12,%rbx			# d+=T1
+
+	and	%r11,%r14			# (a|c)&b
+	add	%r12,%r9			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r9			# h+=Maj(a,b,c)
+	mov	8*3(%rsi),%r12
+	bswap	%r12
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+	mov	%rcx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rdx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rbx,%r15			# (f^g)&e
+	mov	%r12,24(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8,%r12			# T1+=h
+
+	mov	%r9,%r8
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r9,%r13
+	mov	%r9,%r14
+
+	ror	$28,%r8
+	ror	$34,%r13
+	mov	%r9,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r8
+	ror	$5,%r13
+	or	%r11,%r14			# a|c
+
+	xor	%r13,%r8			# h=Sigma0(a)
+	and	%r11,%r15			# a&c
+	add	%r12,%rax			# d+=T1
+
+	and	%r10,%r14			# (a|c)&b
+	add	%r12,%r8			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r8			# h+=Maj(a,b,c)
+	mov	8*4(%rsi),%r12
+	bswap	%r12
+	mov	%rax,%r13
+	mov	%rax,%r14
+	mov	%rbx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rcx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rax,%r15			# (f^g)&e
+	mov	%r12,32(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rdx,%r12			# T1+=h
+
+	mov	%r8,%rdx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r8,%r13
+	mov	%r8,%r14
+
+	ror	$28,%rdx
+	ror	$34,%r13
+	mov	%r8,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rdx
+	ror	$5,%r13
+	or	%r10,%r14			# a|c
+
+	xor	%r13,%rdx			# h=Sigma0(a)
+	and	%r10,%r15			# a&c
+	add	%r12,%r11			# d+=T1
+
+	and	%r9,%r14			# (a|c)&b
+	add	%r12,%rdx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rdx			# h+=Maj(a,b,c)
+	mov	8*5(%rsi),%r12
+	bswap	%r12
+	mov	%r11,%r13
+	mov	%r11,%r14
+	mov	%rax,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rbx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r11,%r15			# (f^g)&e
+	mov	%r12,40(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rcx,%r12			# T1+=h
+
+	mov	%rdx,%rcx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+
+	ror	$28,%rcx
+	ror	$34,%r13
+	mov	%rdx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rcx
+	ror	$5,%r13
+	or	%r9,%r14			# a|c
+
+	xor	%r13,%rcx			# h=Sigma0(a)
+	and	%r9,%r15			# a&c
+	add	%r12,%r10			# d+=T1
+
+	and	%r8,%r14			# (a|c)&b
+	add	%r12,%rcx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rcx			# h+=Maj(a,b,c)
+	mov	8*6(%rsi),%r12
+	bswap	%r12
+	mov	%r10,%r13
+	mov	%r10,%r14
+	mov	%r11,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rax,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r10,%r15			# (f^g)&e
+	mov	%r12,48(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rbx,%r12			# T1+=h
+
+	mov	%rcx,%rbx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+
+	ror	$28,%rbx
+	ror	$34,%r13
+	mov	%rcx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rbx
+	ror	$5,%r13
+	or	%r8,%r14			# a|c
+
+	xor	%r13,%rbx			# h=Sigma0(a)
+	and	%r8,%r15			# a&c
+	add	%r12,%r9			# d+=T1
+
+	and	%rdx,%r14			# (a|c)&b
+	add	%r12,%rbx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rbx			# h+=Maj(a,b,c)
+	mov	8*7(%rsi),%r12
+	bswap	%r12
+	mov	%r9,%r13
+	mov	%r9,%r14
+	mov	%r10,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r11,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r9,%r15			# (f^g)&e
+	mov	%r12,56(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rax,%r12			# T1+=h
+
+	mov	%rbx,%rax
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+
+	ror	$28,%rax
+	ror	$34,%r13
+	mov	%rbx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rax
+	ror	$5,%r13
+	or	%rdx,%r14			# a|c
+
+	xor	%r13,%rax			# h=Sigma0(a)
+	and	%rdx,%r15			# a&c
+	add	%r12,%r8			# d+=T1
+
+	and	%rcx,%r14			# (a|c)&b
+	add	%r12,%rax			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rax			# h+=Maj(a,b,c)
+	mov	8*8(%rsi),%r12
+	bswap	%r12
+	mov	%r8,%r13
+	mov	%r8,%r14
+	mov	%r9,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r10,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r8,%r15			# (f^g)&e
+	mov	%r12,64(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11,%r12			# T1+=h
+
+	mov	%rax,%r11
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rax,%r13
+	mov	%rax,%r14
+
+	ror	$28,%r11
+	ror	$34,%r13
+	mov	%rax,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r11
+	ror	$5,%r13
+	or	%rcx,%r14			# a|c
+
+	xor	%r13,%r11			# h=Sigma0(a)
+	and	%rcx,%r15			# a&c
+	add	%r12,%rdx			# d+=T1
+
+	and	%rbx,%r14			# (a|c)&b
+	add	%r12,%r11			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r11			# h+=Maj(a,b,c)
+	mov	8*9(%rsi),%r12
+	bswap	%r12
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+	mov	%r8,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r9,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rdx,%r15			# (f^g)&e
+	mov	%r12,72(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10,%r12			# T1+=h
+
+	mov	%r11,%r10
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r11,%r13
+	mov	%r11,%r14
+
+	ror	$28,%r10
+	ror	$34,%r13
+	mov	%r11,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r10
+	ror	$5,%r13
+	or	%rbx,%r14			# a|c
+
+	xor	%r13,%r10			# h=Sigma0(a)
+	and	%rbx,%r15			# a&c
+	add	%r12,%rcx			# d+=T1
+
+	and	%rax,%r14			# (a|c)&b
+	add	%r12,%r10			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r10			# h+=Maj(a,b,c)
+	mov	8*10(%rsi),%r12
+	bswap	%r12
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+	mov	%rdx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r8,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rcx,%r15			# (f^g)&e
+	mov	%r12,80(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9,%r12			# T1+=h
+
+	mov	%r10,%r9
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r10,%r13
+	mov	%r10,%r14
+
+	ror	$28,%r9
+	ror	$34,%r13
+	mov	%r10,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r9
+	ror	$5,%r13
+	or	%rax,%r14			# a|c
+
+	xor	%r13,%r9			# h=Sigma0(a)
+	and	%rax,%r15			# a&c
+	add	%r12,%rbx			# d+=T1
+
+	and	%r11,%r14			# (a|c)&b
+	add	%r12,%r9			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r9			# h+=Maj(a,b,c)
+	mov	8*11(%rsi),%r12
+	bswap	%r12
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+	mov	%rcx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rdx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rbx,%r15			# (f^g)&e
+	mov	%r12,88(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8,%r12			# T1+=h
+
+	mov	%r9,%r8
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r9,%r13
+	mov	%r9,%r14
+
+	ror	$28,%r8
+	ror	$34,%r13
+	mov	%r9,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r8
+	ror	$5,%r13
+	or	%r11,%r14			# a|c
+
+	xor	%r13,%r8			# h=Sigma0(a)
+	and	%r11,%r15			# a&c
+	add	%r12,%rax			# d+=T1
+
+	and	%r10,%r14			# (a|c)&b
+	add	%r12,%r8			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r8			# h+=Maj(a,b,c)
+	mov	8*12(%rsi),%r12
+	bswap	%r12
+	mov	%rax,%r13
+	mov	%rax,%r14
+	mov	%rbx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rcx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rax,%r15			# (f^g)&e
+	mov	%r12,96(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rdx,%r12			# T1+=h
+
+	mov	%r8,%rdx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r8,%r13
+	mov	%r8,%r14
+
+	ror	$28,%rdx
+	ror	$34,%r13
+	mov	%r8,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rdx
+	ror	$5,%r13
+	or	%r10,%r14			# a|c
+
+	xor	%r13,%rdx			# h=Sigma0(a)
+	and	%r10,%r15			# a&c
+	add	%r12,%r11			# d+=T1
+
+	and	%r9,%r14			# (a|c)&b
+	add	%r12,%rdx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rdx			# h+=Maj(a,b,c)
+	mov	8*13(%rsi),%r12
+	bswap	%r12
+	mov	%r11,%r13
+	mov	%r11,%r14
+	mov	%rax,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rbx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r11,%r15			# (f^g)&e
+	mov	%r12,104(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rcx,%r12			# T1+=h
+
+	mov	%rdx,%rcx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+
+	ror	$28,%rcx
+	ror	$34,%r13
+	mov	%rdx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rcx
+	ror	$5,%r13
+	or	%r9,%r14			# a|c
+
+	xor	%r13,%rcx			# h=Sigma0(a)
+	and	%r9,%r15			# a&c
+	add	%r12,%r10			# d+=T1
+
+	and	%r8,%r14			# (a|c)&b
+	add	%r12,%rcx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rcx			# h+=Maj(a,b,c)
+	mov	8*14(%rsi),%r12
+	bswap	%r12
+	mov	%r10,%r13
+	mov	%r10,%r14
+	mov	%r11,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rax,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r10,%r15			# (f^g)&e
+	mov	%r12,112(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rbx,%r12			# T1+=h
+
+	mov	%rcx,%rbx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+
+	ror	$28,%rbx
+	ror	$34,%r13
+	mov	%rcx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rbx
+	ror	$5,%r13
+	or	%r8,%r14			# a|c
+
+	xor	%r13,%rbx			# h=Sigma0(a)
+	and	%r8,%r15			# a&c
+	add	%r12,%r9			# d+=T1
+
+	and	%rdx,%r14			# (a|c)&b
+	add	%r12,%rbx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rbx			# h+=Maj(a,b,c)
+	mov	8*15(%rsi),%r12
+	bswap	%r12
+	mov	%r9,%r13
+	mov	%r9,%r14
+	mov	%r10,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r11,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r9,%r15			# (f^g)&e
+	mov	%r12,120(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rax,%r12			# T1+=h
+
+	mov	%rbx,%rax
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+
+	ror	$28,%rax
+	ror	$34,%r13
+	mov	%rbx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rax
+	ror	$5,%r13
+	or	%rdx,%r14			# a|c
+
+	xor	%r13,%rax			# h=Sigma0(a)
+	and	%rdx,%r15			# a&c
+	add	%r12,%r8			# d+=T1
+
+	and	%rcx,%r14			# (a|c)&b
+	add	%r12,%rax			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rax			# h+=Maj(a,b,c)
+	jmp	.Lrounds_16_xx
+.align	4, 0x90
+.Lrounds_16_xx:
+	mov	8(%rsp),%r13
+	mov	112(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	72(%rsp),%r12
+
+	add	0(%rsp),%r12
+	mov	%r8,%r13
+	mov	%r8,%r14
+	mov	%r9,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r10,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r8,%r15			# (f^g)&e
+	mov	%r12,0(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11,%r12			# T1+=h
+
+	mov	%rax,%r11
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rax,%r13
+	mov	%rax,%r14
+
+	ror	$28,%r11
+	ror	$34,%r13
+	mov	%rax,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r11
+	ror	$5,%r13
+	or	%rcx,%r14			# a|c
+
+	xor	%r13,%r11			# h=Sigma0(a)
+	and	%rcx,%r15			# a&c
+	add	%r12,%rdx			# d+=T1
+
+	and	%rbx,%r14			# (a|c)&b
+	add	%r12,%r11			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r11			# h+=Maj(a,b,c)
+	mov	16(%rsp),%r13
+	mov	120(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	80(%rsp),%r12
+
+	add	8(%rsp),%r12
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+	mov	%r8,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r9,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rdx,%r15			# (f^g)&e
+	mov	%r12,8(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10,%r12			# T1+=h
+
+	mov	%r11,%r10
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r11,%r13
+	mov	%r11,%r14
+
+	ror	$28,%r10
+	ror	$34,%r13
+	mov	%r11,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r10
+	ror	$5,%r13
+	or	%rbx,%r14			# a|c
+
+	xor	%r13,%r10			# h=Sigma0(a)
+	and	%rbx,%r15			# a&c
+	add	%r12,%rcx			# d+=T1
+
+	and	%rax,%r14			# (a|c)&b
+	add	%r12,%r10			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r10			# h+=Maj(a,b,c)
+	mov	24(%rsp),%r13
+	mov	0(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	88(%rsp),%r12
+
+	add	16(%rsp),%r12
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+	mov	%rdx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r8,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rcx,%r15			# (f^g)&e
+	mov	%r12,16(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9,%r12			# T1+=h
+
+	mov	%r10,%r9
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r10,%r13
+	mov	%r10,%r14
+
+	ror	$28,%r9
+	ror	$34,%r13
+	mov	%r10,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r9
+	ror	$5,%r13
+	or	%rax,%r14			# a|c
+
+	xor	%r13,%r9			# h=Sigma0(a)
+	and	%rax,%r15			# a&c
+	add	%r12,%rbx			# d+=T1
+
+	and	%r11,%r14			# (a|c)&b
+	add	%r12,%r9			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r9			# h+=Maj(a,b,c)
+	mov	32(%rsp),%r13
+	mov	8(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	96(%rsp),%r12
+
+	add	24(%rsp),%r12
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+	mov	%rcx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rdx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rbx,%r15			# (f^g)&e
+	mov	%r12,24(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8,%r12			# T1+=h
+
+	mov	%r9,%r8
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r9,%r13
+	mov	%r9,%r14
+
+	ror	$28,%r8
+	ror	$34,%r13
+	mov	%r9,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r8
+	ror	$5,%r13
+	or	%r11,%r14			# a|c
+
+	xor	%r13,%r8			# h=Sigma0(a)
+	and	%r11,%r15			# a&c
+	add	%r12,%rax			# d+=T1
+
+	and	%r10,%r14			# (a|c)&b
+	add	%r12,%r8			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r8			# h+=Maj(a,b,c)
+	mov	40(%rsp),%r13
+	mov	16(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	104(%rsp),%r12
+
+	add	32(%rsp),%r12
+	mov	%rax,%r13
+	mov	%rax,%r14
+	mov	%rbx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rcx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rax,%r15			# (f^g)&e
+	mov	%r12,32(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rdx,%r12			# T1+=h
+
+	mov	%r8,%rdx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r8,%r13
+	mov	%r8,%r14
+
+	ror	$28,%rdx
+	ror	$34,%r13
+	mov	%r8,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rdx
+	ror	$5,%r13
+	or	%r10,%r14			# a|c
+
+	xor	%r13,%rdx			# h=Sigma0(a)
+	and	%r10,%r15			# a&c
+	add	%r12,%r11			# d+=T1
+
+	and	%r9,%r14			# (a|c)&b
+	add	%r12,%rdx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rdx			# h+=Maj(a,b,c)
+	mov	48(%rsp),%r13
+	mov	24(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	112(%rsp),%r12
+
+	add	40(%rsp),%r12
+	mov	%r11,%r13
+	mov	%r11,%r14
+	mov	%rax,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rbx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r11,%r15			# (f^g)&e
+	mov	%r12,40(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rcx,%r12			# T1+=h
+
+	mov	%rdx,%rcx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+
+	ror	$28,%rcx
+	ror	$34,%r13
+	mov	%rdx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rcx
+	ror	$5,%r13
+	or	%r9,%r14			# a|c
+
+	xor	%r13,%rcx			# h=Sigma0(a)
+	and	%r9,%r15			# a&c
+	add	%r12,%r10			# d+=T1
+
+	and	%r8,%r14			# (a|c)&b
+	add	%r12,%rcx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rcx			# h+=Maj(a,b,c)
+	mov	56(%rsp),%r13
+	mov	32(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	120(%rsp),%r12
+
+	add	48(%rsp),%r12
+	mov	%r10,%r13
+	mov	%r10,%r14
+	mov	%r11,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rax,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r10,%r15			# (f^g)&e
+	mov	%r12,48(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rbx,%r12			# T1+=h
+
+	mov	%rcx,%rbx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+
+	ror	$28,%rbx
+	ror	$34,%r13
+	mov	%rcx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rbx
+	ror	$5,%r13
+	or	%r8,%r14			# a|c
+
+	xor	%r13,%rbx			# h=Sigma0(a)
+	and	%r8,%r15			# a&c
+	add	%r12,%r9			# d+=T1
+
+	and	%rdx,%r14			# (a|c)&b
+	add	%r12,%rbx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rbx			# h+=Maj(a,b,c)
+	mov	64(%rsp),%r13
+	mov	40(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	0(%rsp),%r12
+
+	add	56(%rsp),%r12
+	mov	%r9,%r13
+	mov	%r9,%r14
+	mov	%r10,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r11,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r9,%r15			# (f^g)&e
+	mov	%r12,56(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rax,%r12			# T1+=h
+
+	mov	%rbx,%rax
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+
+	ror	$28,%rax
+	ror	$34,%r13
+	mov	%rbx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rax
+	ror	$5,%r13
+	or	%rdx,%r14			# a|c
+
+	xor	%r13,%rax			# h=Sigma0(a)
+	and	%rdx,%r15			# a&c
+	add	%r12,%r8			# d+=T1
+
+	and	%rcx,%r14			# (a|c)&b
+	add	%r12,%rax			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rax			# h+=Maj(a,b,c)
+	mov	72(%rsp),%r13
+	mov	48(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	8(%rsp),%r12
+
+	add	64(%rsp),%r12
+	mov	%r8,%r13
+	mov	%r8,%r14
+	mov	%r9,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r10,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r8,%r15			# (f^g)&e
+	mov	%r12,64(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r11,%r12			# T1+=h
+
+	mov	%rax,%r11
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rax,%r13
+	mov	%rax,%r14
+
+	ror	$28,%r11
+	ror	$34,%r13
+	mov	%rax,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r11
+	ror	$5,%r13
+	or	%rcx,%r14			# a|c
+
+	xor	%r13,%r11			# h=Sigma0(a)
+	and	%rcx,%r15			# a&c
+	add	%r12,%rdx			# d+=T1
+
+	and	%rbx,%r14			# (a|c)&b
+	add	%r12,%r11			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r11			# h+=Maj(a,b,c)
+	mov	80(%rsp),%r13
+	mov	56(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	16(%rsp),%r12
+
+	add	72(%rsp),%r12
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+	mov	%r8,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r9,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rdx,%r15			# (f^g)&e
+	mov	%r12,72(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r10,%r12			# T1+=h
+
+	mov	%r11,%r10
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r11,%r13
+	mov	%r11,%r14
+
+	ror	$28,%r10
+	ror	$34,%r13
+	mov	%r11,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r10
+	ror	$5,%r13
+	or	%rbx,%r14			# a|c
+
+	xor	%r13,%r10			# h=Sigma0(a)
+	and	%rbx,%r15			# a&c
+	add	%r12,%rcx			# d+=T1
+
+	and	%rax,%r14			# (a|c)&b
+	add	%r12,%r10			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r10			# h+=Maj(a,b,c)
+	mov	88(%rsp),%r13
+	mov	64(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	24(%rsp),%r12
+
+	add	80(%rsp),%r12
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+	mov	%rdx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r8,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rcx,%r15			# (f^g)&e
+	mov	%r12,80(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r9,%r12			# T1+=h
+
+	mov	%r10,%r9
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r10,%r13
+	mov	%r10,%r14
+
+	ror	$28,%r9
+	ror	$34,%r13
+	mov	%r10,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r9
+	ror	$5,%r13
+	or	%rax,%r14			# a|c
+
+	xor	%r13,%r9			# h=Sigma0(a)
+	and	%rax,%r15			# a&c
+	add	%r12,%rbx			# d+=T1
+
+	and	%r11,%r14			# (a|c)&b
+	add	%r12,%r9			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r9			# h+=Maj(a,b,c)
+	mov	96(%rsp),%r13
+	mov	72(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	32(%rsp),%r12
+
+	add	88(%rsp),%r12
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+	mov	%rcx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rdx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rbx,%r15			# (f^g)&e
+	mov	%r12,88(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%r8,%r12			# T1+=h
+
+	mov	%r9,%r8
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r9,%r13
+	mov	%r9,%r14
+
+	ror	$28,%r8
+	ror	$34,%r13
+	mov	%r9,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%r8
+	ror	$5,%r13
+	or	%r11,%r14			# a|c
+
+	xor	%r13,%r8			# h=Sigma0(a)
+	and	%r11,%r15			# a&c
+	add	%r12,%rax			# d+=T1
+
+	and	%r10,%r14			# (a|c)&b
+	add	%r12,%r8			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%r8			# h+=Maj(a,b,c)
+	mov	104(%rsp),%r13
+	mov	80(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	40(%rsp),%r12
+
+	add	96(%rsp),%r12
+	mov	%rax,%r13
+	mov	%rax,%r14
+	mov	%rbx,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rcx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%rax,%r15			# (f^g)&e
+	mov	%r12,96(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rdx,%r12			# T1+=h
+
+	mov	%r8,%rdx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%r8,%r13
+	mov	%r8,%r14
+
+	ror	$28,%rdx
+	ror	$34,%r13
+	mov	%r8,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rdx
+	ror	$5,%r13
+	or	%r10,%r14			# a|c
+
+	xor	%r13,%rdx			# h=Sigma0(a)
+	and	%r10,%r15			# a&c
+	add	%r12,%r11			# d+=T1
+
+	and	%r9,%r14			# (a|c)&b
+	add	%r12,%rdx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rdx			# h+=Maj(a,b,c)
+	mov	112(%rsp),%r13
+	mov	88(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	48(%rsp),%r12
+
+	add	104(%rsp),%r12
+	mov	%r11,%r13
+	mov	%r11,%r14
+	mov	%rax,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rbx,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r11,%r15			# (f^g)&e
+	mov	%r12,104(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rcx,%r12			# T1+=h
+
+	mov	%rdx,%rcx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rdx,%r13
+	mov	%rdx,%r14
+
+	ror	$28,%rcx
+	ror	$34,%r13
+	mov	%rdx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rcx
+	ror	$5,%r13
+	or	%r9,%r14			# a|c
+
+	xor	%r13,%rcx			# h=Sigma0(a)
+	and	%r9,%r15			# a&c
+	add	%r12,%r10			# d+=T1
+
+	and	%r8,%r14			# (a|c)&b
+	add	%r12,%rcx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rcx			# h+=Maj(a,b,c)
+	mov	120(%rsp),%r13
+	mov	96(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	56(%rsp),%r12
+
+	add	112(%rsp),%r12
+	mov	%r10,%r13
+	mov	%r10,%r14
+	mov	%r11,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%rax,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r10,%r15			# (f^g)&e
+	mov	%r12,112(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rbx,%r12			# T1+=h
+
+	mov	%rcx,%rbx
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rcx,%r13
+	mov	%rcx,%r14
+
+	ror	$28,%rbx
+	ror	$34,%r13
+	mov	%rcx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rbx
+	ror	$5,%r13
+	or	%r8,%r14			# a|c
+
+	xor	%r13,%rbx			# h=Sigma0(a)
+	and	%r8,%r15			# a&c
+	add	%r12,%r9			# d+=T1
+
+	and	%rdx,%r14			# (a|c)&b
+	add	%r12,%rbx			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rbx			# h+=Maj(a,b,c)
+	mov	0(%rsp),%r13
+	mov	104(%rsp),%r12
+
+	mov	%r13,%r15
+
+	shr	$7,%r13
+	ror	$1,%r15
+
+	xor	%r15,%r13
+	ror	$7,%r15
+
+	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
+	mov	%r12,%r14
+
+	shr	$6,%r12
+	ror	$19,%r14
+
+	xor	%r14,%r12
+	ror	$42,%r14
+
+	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
+
+	add	%r13,%r12
+
+	add	64(%rsp),%r12
+
+	add	120(%rsp),%r12
+	mov	%r9,%r13
+	mov	%r9,%r14
+	mov	%r10,%r15
+
+	ror	$14,%r13
+	ror	$18,%r14
+	xor	%r11,%r15			# f^g
+
+	xor	%r14,%r13
+	ror	$23,%r14
+	and	%r9,%r15			# (f^g)&e
+	mov	%r12,120(%rsp)
+
+	xor	%r14,%r13			# Sigma1(e)
+	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
+	add	%rax,%r12			# T1+=h
+
+	mov	%rbx,%rax
+	add	%r13,%r12			# T1+=Sigma1(e)
+
+	add	%r15,%r12			# T1+=Ch(e,f,g)
+	mov	%rbx,%r13
+	mov	%rbx,%r14
+
+	ror	$28,%rax
+	ror	$34,%r13
+	mov	%rbx,%r15
+	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
+
+	xor	%r13,%rax
+	ror	$5,%r13
+	or	%rdx,%r14			# a|c
+
+	xor	%r13,%rax			# h=Sigma0(a)
+	and	%rdx,%r15			# a&c
+	add	%r12,%r8			# d+=T1
+
+	and	%rcx,%r14			# (a|c)&b
+	add	%r12,%rax			# h+=T1
+
+	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
+	lea	1(%rdi),%rdi	# round++
+
+	add	%r14,%rax			# h+=Maj(a,b,c)
+	cmp	$80,%rdi
+	jb	.Lrounds_16_xx
+
+	mov	16*8+0*8(%rsp),%rdi
+	lea	16*8(%rsi),%rsi
+
+	add	8*0(%rdi),%rax
+	add	8*1(%rdi),%rbx
+	add	8*2(%rdi),%rcx
+	add	8*3(%rdi),%rdx
+	add	8*4(%rdi),%r8
+	add	8*5(%rdi),%r9
+	add	8*6(%rdi),%r10
+	add	8*7(%rdi),%r11
+
+	cmp	16*8+2*8(%rsp),%rsi
+
+	mov	%rax,8*0(%rdi)
+	mov	%rbx,8*1(%rdi)
+	mov	%rcx,8*2(%rdi)
+	mov	%rdx,8*3(%rdi)
+	mov	%r8,8*4(%rdi)
+	mov	%r9,8*5(%rdi)
+	mov	%r10,8*6(%rdi)
+	mov	%r11,8*7(%rdi)
+	jb	.Lloop
+
+	mov	16*8+3*8(%rsp),%rsp
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+
+	ret
+SET_SIZE(SHA512TransformBlocks)
+
+.align	6, 0x90
+K512:
+	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	.quad	0x3956c25bf348b538,0x59f111f1b605d019
+	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	.quad	0xd807aa98a3030242,0x12835b0145706fbe
+	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	.quad	0x06ca6351e003826f,0x142929670a0e6e70
+	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+	.quad	0x81c2c92e47edaee6,0x92722c851482353b
+	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+	.quad	0xd192e819d6ef5218,0xd69906245565a910
+	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	.quad	0x90befffa23631e28,0xa4506cebde82bde9
+	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	.quad	0xca273eceea26619c,0xd186b8c721c0c207
+	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	.quad	0x113f9804bef90dae,0x1b710b35131c471b
+	.quad	0x28db77f523047d84,0x32caab7b40c72493
+	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#endif /* !lint && !__lint */
diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c
index 503d4e48b4..b6de853b1b 100644
--- a/module/os/linux/spl/spl-taskq.c
+++ b/module/os/linux/spl/spl-taskq.c
@@ -1199,7 +1199,8 @@ taskq_destroy(taskq_t *tq)
 }
 EXPORT_SYMBOL(taskq_destroy);
 
-int EMPTY_TASKQ(taskq_t *tq)
+int
+EMPTY_TASKQ(taskq_t *tq)
 {
 #ifdef _KERNEL
 	return (tq->tq_lowest_id == tq->tq_next_id);
diff --git a/module/os/macos/.gitignore b/module/os/macos/.gitignore
new file mode 100644
index 0000000000..14140f47af
--- /dev/null
+++ b/module/os/macos/.gitignore
@@ -0,0 +1 @@
+*.in
diff --git a/module/os/macos/Makefile.am b/module/os/macos/Makefile.am
new file mode 100644
index 0000000000..5a729e5ad2
--- /dev/null
+++ b/module/os/macos/Makefile.am
@@ -0,0 +1,6 @@
+# Makefile used only by macOS. Should define no dependencies for
+# other platforms.
+
+if BUILD_MACOS
+SUBDIRS=kernel spl zfs
+endif
diff --git a/module/os/macos/README.md b/module/os/macos/README.md
new file mode 100644
index 0000000000..45c21cf4a0
--- /dev/null
+++ b/module/os/macos/README.md
@@ -0,0 +1,8 @@
+
+OpenZFS on OS X, the [macOS](https://openzfsonosx.org) port of [Open ZFS](https://openzfs.org)
+
+Please use the [OpenZFSOnOsX](https://github.com/openzfsonosx/openzfs)
+repository for support, troubleshooting, and using GitHub issues.
+
+For more compiling information please visit the
+[wiki](https://openzfsonosx.org/wiki/Install#Initial_installation_from_source)
diff --git a/module/os/macos/kernel/.gitignore b/module/os/macos/kernel/.gitignore
new file mode 100644
index 0000000000..f0d4d14070
--- /dev/null
+++ b/module/os/macos/kernel/.gitignore
@@ -0,0 +1,5 @@
+allsymbols
+kernelexports
+kernelexports_32
+kernelexports_64
+kextsymboltool
diff --git a/module/os/macos/kernel/Info.plist b/module/os/macos/kernel/Info.plist
new file mode 100644
index 0000000000..5dd4c6b416
--- /dev/null
+++ b/module/os/macos/kernel/Info.plist
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>KernelExports</string>
+	<key>CFBundleGetInfoString</key>
+	<string>Mach Kernel Pseudoextension, Apple Computer Inc, 12.5.0</string>
+	<key>CFBundleIdentifier</key>
+	<string>net.lundman.kernel.dependencies</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>Mach Kernel Pseudoextension</string>
+	<key>CFBundlePackageType</key>
+	<string>KEXT</string>
+	<key>CFBundleShortVersionString</key>
+	<string>12.5.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>12.5.0</string>
+	<key>OSBundleCompatibleVersion</key>
+	<string>8.0.0d0</string>
+	<key>OSKernelResource</key>
+	<true/>
+	<key>OSBundleAllowUserLoad</key>
+	<true/>
+    <key>OSBundleRequired</key>
+    <string>Root</string>
+</dict>
+</plist>
diff --git a/module/os/macos/kernel/Makefile.am b/module/os/macos/kernel/Makefile.am
new file mode 100644
index 0000000000..499bc3ef3a
--- /dev/null
+++ b/module/os/macos/kernel/Makefile.am
@@ -0,0 +1,25 @@
+
+AUTOMAKE_OPTIONS = subdir-objects
+
+noinst_PROGRAMS = kextsymboltool
+
+kextsymboltool_SOURCES = \
+	kextsymboltool.c
+
+kextsymboltool_CPPFLAGS = 
+kextsymboltool_CFLAGS = 
+kextsymboltool_LDFLAGS = -lstdc++
+
+kernelexports: zfs.exports | kextsymboltool
+	./kextsymboltool -arch x86_64 -import allsymbols -export zfs.exports -output kernelexports_64
+	./kextsymboltool -arch i386 -import allsymbols -export zfs.exports -output kernelexports_32
+	lipo -create kernelexports_32 kernelexports_64 -output kernelexports
+
+clean:
+	rm -f kernelexports kernelexports_32 kernelexports_64 allsymbols
+	rm -f kextsymboltool.o kextsymboltool
+
+allsymbols:
+	$(NM) -gj $(MACH_KERNEL) > allsymbols
+
+all:kextsymboltool allsymbols kernelexports
diff --git a/module/os/macos/kernel/README.txt b/module/os/macos/kernel/README.txt
new file mode 100644
index 0000000000..104f668bbe
--- /dev/null
+++ b/module/os/macos/kernel/README.txt
@@ -0,0 +1,10 @@
+
+Not all symbols are exported by default in OS X, and we have to do
+a little magic to get around that.
+
+This uses the OpenSource kextsymbol.c utility, and a dump of all the
+symbols in the kernel, to produce a link helper kext.
+
+We most likely need to make it better to handle kernel versions
+a little more flexibly.
+
diff --git a/module/os/macos/kernel/kextsymboltool.c b/module/os/macos/kernel/kextsymboltool.c
new file mode 100644
index 0000000000..19ffb2f306
--- /dev/null
+++ b/module/os/macos/kernel/kextsymboltool.c
@@ -0,0 +1,912 @@
+/*
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <libc.h>
+#include <errno.h>
+#include <ctype.h>
+
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+
+#include <mach-o/arch.h>
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/swap.h>
+
+#include <uuid/uuid.h>
+
+#include <IOKit/IOTypes.h>
+
+#pragma mark Typedefs, Enums, Constants
+/*********************************************************************
+* Typedefs, Enums, Constants
+*********************************************************************/
+typedef enum {
+    kErrorNone = 0,
+    kError,
+    kErrorFileAccess,
+    kErrorDiskFull,
+    kErrorDuplicate
+} ToolError;
+
+#pragma mark Function Protos
+/*********************************************************************
+* Function Protos
+*********************************************************************/
+__private_extern__ ToolError
+readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize);
+
+__private_extern__ ToolError
+writeFile(int fd, const void * data, size_t length);
+
+extern char* __cxa_demangle (const char* mangled_name,
+				   char* buf,
+				   size_t* n,
+				   int* status);
+
+#pragma mark Functions
+/*********************************************************************
+*********************************************************************/
+__private_extern__ ToolError
+writeFile(int fd, const void * data, size_t length)
+{
+    ToolError err;
+
+    if (length != (size_t)write(fd, data, length))
+        err = kErrorDiskFull;
+    else
+        err = kErrorNone;
+
+    if (kErrorNone != err)
+        perror("couldn't write output");
+
+    return( err );
+}
+
+/*********************************************************************
+*********************************************************************/
+__private_extern__ ToolError
+readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize)
+{
+    ToolError err = kErrorFileAccess;
+    int fd;
+    struct stat stat_buf;
+
+    *objAddr = 0;
+    *objSize = 0;
+
+    do
+    {
+        if((fd = open(path, O_RDONLY)) == -1)
+	    continue;
+
+	if(fstat(fd, &stat_buf) == -1)
+	    continue;
+
+        if (0 == (stat_buf.st_mode & S_IFREG))
+            continue;
+
+       /* Don't try to map an empty file, it fails now due to conformance
+        * stuff (PR 4611502).
+        */
+        if (0 == stat_buf.st_size) {
+            err = kErrorNone;
+            continue;
+        }
+
+	*objSize = stat_buf.st_size;
+
+        *objAddr = (vm_offset_t)mmap(NULL /* address */, *objSize,
+            PROT_READ|PROT_WRITE, MAP_FILE|MAP_PRIVATE /* flags */,
+            fd, 0 /* offset */);
+
+	if ((void *)*objAddr == MAP_FAILED) {
+            *objAddr = 0;
+            *objSize = 0;
+	    continue;
+	}
+
+	err = kErrorNone;
+
+    } while( false );
+
+    if (-1 != fd)
+    {
+        close(fd);
+    }
+    if (kErrorNone != err)
+    {
+        fprintf(stderr, "couldn't read %s: %s\n", path, strerror(errno));
+    }
+
+    return( err );
+}
+
+
+enum { kExported = 0x00000001, kObsolete = 0x00000002 };
+
+struct symbol {
+    char * name;
+    unsigned int name_len;
+    char * indirect;
+    unsigned int indirect_len;
+    unsigned int flags;
+    struct symbol * list;
+    unsigned int list_count;
+};
+
+static bool issymchar( char c )
+{
+    return ((c > ' ') && (c <= '~') && (c != ':') && (c != '#'));
+}
+
+static bool iswhitespace( char c )
+{
+    return ((c == ' ') || (c == '\t'));
+}
+
+/*
+ * Function for qsort for comparing symbol list names.
+ */
+static int
+qsort_cmp(const void * _left, const void * _right)
+{
+    struct symbol * left  = (struct symbol *) _left;
+    struct symbol * right = (struct symbol *) _right;
+
+    return (strcmp(left->name, right->name));
+}
+
+/*
+ * Function for bsearch for finding a symbol name.
+ */
+
+static int
+bsearch_cmp( const void * _key, const void * _cmp)
+{
+    char * key = (char *)_key;
+    struct symbol * cmp = (struct symbol *) _cmp;
+
+    return(strcmp(key, cmp->name));
+}
+
+struct bsearch_key
+{
+    char * name;
+    unsigned int name_len;
+};
+
+static int
+bsearch_cmp_prefix( const void * _key, const void * _cmp)
+{
+    struct bsearch_key * key = (struct bsearch_key *)_key;
+    struct symbol *      cmp = (struct symbol *) _cmp;
+
+    return(strncmp(key->name, cmp->name, key->name_len));
+}
+
+static uint32_t
+count_symbols(char * file, vm_size_t file_size)
+{
+    uint32_t nsyms = 0;
+    char *   scan;
+    char *   eol;
+    char *   next;
+
+    for (scan = file; true; scan = next) {
+
+        eol = memchr(scan, '\n', file_size - (scan - file));
+        if (eol == NULL) {
+            break;
+        }
+        next = eol + 1;
+
+       /* Skip empty lines.
+        */
+        if (eol == scan) {
+            continue;
+        }
+
+       /* Skip comment lines.
+        */
+        if (scan[0] == '#') {
+            continue;
+        }
+
+       /* Scan past any non-symbol characters at the beginning of the line. */
+        while ((scan < eol) && !issymchar(*scan)) {
+            scan++;
+        }
+
+       /* No symbol on line? Move along.
+        */
+        if (scan == eol) {
+            continue;
+        }
+
+       /* Skip symbols starting with '.'.
+        */
+        if (scan[0] == '.') {
+            continue;
+        }
+        nsyms++;
+    }
+
+    return nsyms;
+}
+
+static uint32_t
+store_symbols(char * file, vm_size_t file_size, struct symbol * symbols, uint32_t idx, uint32_t max_symbols)
+{
+    char *   scan;
+    char *   line;
+    char *   eol;
+    char *   next;
+
+    uint32_t strtabsize;
+
+    strtabsize = 0;
+
+    for (scan = file, line = file; true; scan = next, line = next) {
+
+        char *       name = NULL;
+        char *       name_term = NULL;
+        unsigned int name_len = 0;
+        char *       indirect = NULL;
+        char *       indirect_term = NULL;
+        unsigned int indirect_len = 0;
+        char *       option = NULL;
+        char *       option_term = NULL;
+        unsigned int option_len = 0;
+        char         optionstr[256];
+        boolean_t    obsolete = 0;
+
+        eol = memchr(scan, '\n', file_size - (scan - file));
+        if (eol == NULL) {
+            break;
+        }
+        next = eol + 1;
+
+       /* Skip empty lines.
+        */
+        if (eol == scan) {
+            continue;
+        }
+
+        *eol = '\0';
+
+       /* Skip comment lines.
+        */
+        if (scan[0] == '#') {
+            continue;
+        }
+
+       /* Scan past any non-symbol characters at the beginning of the line. */
+        while ((scan < eol) && !issymchar(*scan)) {
+            scan++;
+        }
+
+       /* No symbol on line? Move along.
+        */
+        if (scan == eol) {
+            continue;
+        }
+
+       /* Skip symbols starting with '.'.
+        */
+        if (scan[0] == '.') {
+            continue;
+        }
+
+        name = scan;
+
+       /* Find the end of the symbol.
+        */
+        while ((*scan != '\0') && issymchar(*scan)) {
+            scan++;
+        }
+
+       /* Note char past end of symbol.
+        */
+        name_term = scan;
+
+       /* Stored length must include the terminating nul char.
+        */
+        name_len = name_term - name + 1;
+
+       /* Now look for an indirect.
+        */
+        if (*scan != '\0') {
+            while ((*scan != '\0') && iswhitespace(*scan)) {
+                scan++;
+            }
+            if (*scan == ':') {
+                scan++;
+                while ((*scan != '\0') && iswhitespace(*scan)) {
+                    scan++;
+                }
+                if (issymchar(*scan)) {
+                    indirect = scan;
+
+                   /* Find the end of the symbol.
+                    */
+                    while ((*scan != '\0') && issymchar(*scan)) {
+                        scan++;
+                    }
+
+                   /* Note char past end of symbol.
+                    */
+                    indirect_term = scan;
+
+                   /* Stored length must include the terminating nul char.
+                    */
+                    indirect_len = indirect_term - indirect + 1;
+
+                } else if (*scan == '\0') {
+		    fprintf(stderr, "bad format in symbol line: %s\n", line);
+		    exit(1);
+		}
+            } else if (*scan != '\0' && *scan != '-') {
+                fprintf(stderr, "bad format in symbol line: %s\n", line);
+                exit(1);
+            }
+        }
+
+        /* Look for options.
+         */
+        if (*scan != '\0') {
+            while ((*scan != '\0') && iswhitespace(*scan)) {
+                scan++;
+            }
+
+            if (*scan == '-') {
+                scan++;
+
+                if (isalpha(*scan)) {
+                    option = scan;
+
+                   /* Find the end of the option.
+                    */
+                    while ((*scan != '\0') && isalpha(*scan)) {
+                        scan++;
+                    }
+
+                   /* Note char past end of option.
+                    */
+                    option_term = scan;
+                    option_len = option_term - option;
+
+                    if (option_len >= sizeof(optionstr)) {
+                        fprintf(stderr, "option too long in symbol line: %s\n", line);
+                        exit(1);
+                    }
+                    memcpy(optionstr, option, option_len);
+                    optionstr[option_len] = '\0';
+
+                    /* Find the option.
+                     */
+                    if (!strncmp(optionstr, "obsolete", option_len)) {
+                        obsolete = TRUE;
+                    }
+
+                } else if (*scan == '\0') {
+		    fprintf(stderr, "bad format in symbol line: %s\n", line);
+		    exit(1);
+		}
+
+            }
+
+        }
+
+        if(idx >= max_symbols) {
+            fprintf(stderr, "symbol[%d/%d] overflow: %s\n", idx, max_symbols, line);
+            exit(1);
+        }
+
+        *name_term = '\0';
+        if (indirect_term) {
+            *indirect_term = '\0';
+        }
+
+        symbols[idx].name = name;
+        symbols[idx].name_len = name_len;
+        symbols[idx].indirect = indirect;
+        symbols[idx].indirect_len = indirect_len;
+        symbols[idx].flags = (obsolete) ? kObsolete : 0;
+
+        strtabsize += symbols[idx].name_len + symbols[idx].indirect_len;
+        idx++;
+    }
+
+    return strtabsize;
+}
+
+/*********************************************************************
+*********************************************************************/
+int main(int argc, char * argv[])
+{
+    ToolError	err;
+    int			i, fd;
+    const char *	output_name = NULL;
+    uint32_t		zero = 0, num_files = 0;
+    uint32_t		filenum;
+    uint32_t		strx, strtabsize, strtabpad;
+    struct symbol *	import_symbols;
+    struct symbol *	export_symbols;
+    uint32_t		num_import_syms, num_export_syms;
+    uint32_t		result_count, num_removed_syms;
+    uint32_t		import_idx, export_idx;
+    const NXArchInfo *	host_arch;
+    const NXArchInfo *	target_arch;
+    boolean_t		require_imports = true;
+    boolean_t		diff = false;
+
+
+    struct file {
+        vm_offset_t  mapped;
+        vm_size_t    mapped_size;
+	uint32_t     nsyms;
+	boolean_t    import;
+	const char * path;
+    };
+    struct file files[64];
+
+    host_arch = NXGetLocalArchInfo();
+    target_arch = host_arch;
+
+    for( i = 1; i < argc; i += 2)
+    {
+	boolean_t import;
+
+        if (!strcmp("-sect", argv[i]))
+        {
+	    require_imports = false;
+	    i--;
+	    continue;
+        }
+        if (!strcmp("-diff", argv[i]))
+        {
+	    require_imports = false;
+	    diff = true;
+	    i--;
+	    continue;
+        }
+
+	if (i == (argc - 1))
+	{
+	    fprintf(stderr, "bad arguments: %s\n", argv[i]);
+	    exit(1);
+	}
+
+        if (!strcmp("-arch", argv[i]))
+        {
+            target_arch = NXGetArchInfoFromName(argv[i + 1]);
+	    if (!target_arch)
+	    {
+		fprintf(stderr, "unknown architecture name: %s\n", argv[i+1]);
+		exit(1);
+	    }
+            continue;
+        }
+        if (!strcmp("-output", argv[i]))
+        {
+	    output_name = argv[i+1];
+            continue;
+        }
+
+        if (!strcmp("-import", argv[i]))
+	    import = true;
+	else if (!strcmp("-export", argv[i]))
+	    import = false;
+	else
+	{
+	    fprintf(stderr, "unknown option: %s\n", argv[i]);
+	    exit(1);
+	}
+
+        err = readFile(argv[i+1], &files[num_files].mapped, &files[num_files].mapped_size);
+        if (kErrorNone != err)
+            exit(1);
+
+        if (files[num_files].mapped && files[num_files].mapped_size)
+	{
+	    files[num_files].import = import;
+	    files[num_files].path   = argv[i+1];
+            num_files++;
+	}
+    }
+
+    if (!output_name)
+    {
+	fprintf(stderr, "no output file\n");
+	exit(1);
+    }
+
+    num_import_syms = 0;
+    num_export_syms = 0;
+    for (filenum = 0; filenum < num_files; filenum++)
+    {
+        files[filenum].nsyms = count_symbols((char *) files[filenum].mapped, files[filenum].mapped_size);
+	if (files[filenum].import)
+	    num_import_syms += files[filenum].nsyms;
+	else
+	    num_export_syms += files[filenum].nsyms;
+    }
+    if (!num_export_syms)
+    {
+	fprintf(stderr, "no export names\n");
+	exit(1);
+    }
+
+    import_symbols = calloc(num_import_syms, sizeof(struct symbol));
+    export_symbols = calloc(num_export_syms, sizeof(struct symbol));
+
+    import_idx = 0;
+    export_idx = 0;
+
+    for (filenum = 0; filenum < num_files; filenum++)
+    {
+	if (files[filenum].import)
+	{
+	    store_symbols((char *) files[filenum].mapped, files[filenum].mapped_size,
+					import_symbols, import_idx, num_import_syms);
+	    import_idx += files[filenum].nsyms;
+	}
+	else
+	{
+	    store_symbols((char *) files[filenum].mapped, files[filenum].mapped_size,
+					export_symbols, export_idx, num_export_syms);
+	    export_idx += files[filenum].nsyms;
+	}
+	if (false && !files[filenum].nsyms)
+	{
+	    fprintf(stderr, "warning: file %s contains no names\n", files[filenum].path);
+	}
+    }
+
+
+    qsort(import_symbols, num_import_syms, sizeof(struct symbol), &qsort_cmp);
+    qsort(export_symbols, num_export_syms, sizeof(struct symbol), &qsort_cmp);
+
+    result_count = 0;
+    num_removed_syms = 0;
+    strtabsize = 4;
+    if (num_import_syms)
+    {
+	for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+	{
+	    struct symbol * result;
+	    char * name;
+	    size_t len;
+	    boolean_t wild;
+
+	    name = export_symbols[export_idx].indirect;
+	    len  = export_symbols[export_idx].indirect_len;
+	    if (!name)
+	    {
+		name = export_symbols[export_idx].name;
+		len  = export_symbols[export_idx].name_len;
+	    }
+	    wild = ((len > 2) && ('*' == name[len-=2]));
+	    if (wild)
+	    {
+		struct bsearch_key key;
+		key.name = name;
+		key.name_len = len;
+		result = bsearch(&key, import_symbols,
+				    num_import_syms, sizeof(struct symbol), &bsearch_cmp_prefix);
+
+		if (result)
+		{
+		    struct symbol * first;
+		    struct symbol * last;
+
+		    strtabsize += (result->name_len + result->indirect_len);
+
+		    first = result;
+		    while (--first >= &import_symbols[0])
+		    {
+			if (bsearch_cmp_prefix(&key, first))
+			    break;
+			strtabsize += (first->name_len + first->indirect_len);
+		    }
+		    first++;
+
+		    last = result;
+		    while (++last < (&import_symbols[0] + num_import_syms))
+		    {
+			if (bsearch_cmp_prefix(&key, last))
+			    break;
+			strtabsize += (last->name_len + last->indirect_len);
+		    }
+		    result_count += last - first;
+		    result = first;
+		    export_symbols[export_idx].list = first;
+		    export_symbols[export_idx].list_count = last - first;
+		    export_symbols[export_idx].flags |= kExported;
+		}
+	    }
+	    else
+		result = bsearch(name, import_symbols,
+				    num_import_syms, sizeof(struct symbol), &bsearch_cmp);
+
+	    if (!result && require_imports)
+	    {
+		int status;
+		char * demangled_result =
+			__cxa_demangle(export_symbols[export_idx].name + 1, NULL, NULL, &status);
+		fprintf(stderr, "exported name not in import list: %s\n",
+					demangled_result ? demangled_result : export_symbols[export_idx].name);
+//		fprintf(stderr, "                                : %s\n", export_symbols[export_idx].name);
+		if (demangled_result) {
+			free(demangled_result);
+		}
+		num_removed_syms++;
+	    }
+	    if (diff)
+	    {
+		if (!result)
+		    result = &export_symbols[export_idx];
+		else
+		    result = NULL;
+	    }
+	    if (result && !wild)
+	    {
+		export_symbols[export_idx].flags |= kExported;
+		strtabsize += (export_symbols[export_idx].name_len + export_symbols[export_idx].indirect_len);
+		result_count++;
+		export_symbols[export_idx].list = &export_symbols[export_idx];
+		export_symbols[export_idx].list_count = 1;
+	    }
+	}
+    }
+    strtabpad = (strtabsize + 3) & ~3;
+
+    if (require_imports && num_removed_syms)
+    {
+	err = kError;
+	goto finish;
+    }
+
+    fd = open(output_name, O_WRONLY|O_CREAT|O_TRUNC, 0755);
+    if (-1 == fd)
+    {
+	perror("couldn't write output");
+	err = kErrorFileAccess;
+	goto finish;
+    }
+
+    struct symtab_command symcmd;
+    struct uuid_command uuidcmd;
+
+    symcmd.cmd		= LC_SYMTAB;
+    symcmd.cmdsize	= sizeof(symcmd);
+    symcmd.symoff	= sizeof(symcmd) + sizeof(uuidcmd);
+    symcmd.nsyms	= result_count;
+    symcmd.strsize	= strtabpad;
+
+    uuidcmd.cmd         = LC_UUID;
+    uuidcmd.cmdsize     = sizeof(uuidcmd);
+    uuid_generate(uuidcmd.uuid);
+
+    if (CPU_ARCH_ABI64 & target_arch->cputype)
+    {
+	struct mach_header_64 hdr;
+	hdr.magic	= MH_MAGIC_64;
+	hdr.cputype	= target_arch->cputype;
+	hdr.cpusubtype	= target_arch->cpusubtype;
+	hdr.filetype	= MH_KEXT_BUNDLE;
+	hdr.ncmds	= 2;
+	hdr.sizeofcmds	= sizeof(symcmd) + sizeof(uuidcmd);
+	hdr.flags	= MH_INCRLINK;
+
+	symcmd.symoff	+= sizeof(hdr);
+	symcmd.stroff	= result_count * sizeof(struct nlist_64)
+				+ symcmd.symoff;
+
+	if (target_arch->byteorder != host_arch->byteorder)
+	    swap_mach_header_64(&hdr, target_arch->byteorder);
+	err = writeFile(fd, &hdr, sizeof(hdr));
+    }
+    else
+    {
+	struct mach_header    hdr;
+	hdr.magic	= MH_MAGIC;
+	hdr.cputype	= target_arch->cputype;
+	hdr.cpusubtype	= target_arch->cpusubtype;
+	hdr.filetype	= (target_arch->cputype == CPU_TYPE_I386) ? MH_OBJECT : MH_KEXT_BUNDLE;
+	hdr.ncmds	= 2;
+	hdr.sizeofcmds	= sizeof(symcmd) + sizeof(uuidcmd);
+	hdr.flags	= MH_INCRLINK;
+
+	symcmd.symoff	+= sizeof(hdr);
+	symcmd.stroff	= result_count * sizeof(struct nlist)
+				+ symcmd.symoff;
+
+	if (target_arch->byteorder != host_arch->byteorder)
+	    swap_mach_header(&hdr, target_arch->byteorder);
+	err = writeFile(fd, &hdr, sizeof(hdr));
+    }
+
+    if (kErrorNone != err)
+	goto finish;
+
+    if (target_arch->byteorder != host_arch->byteorder) {
+        swap_symtab_command(&symcmd, target_arch->byteorder);
+        swap_uuid_command(&uuidcmd, target_arch->byteorder);
+    }
+    err = writeFile(fd, &symcmd, sizeof(symcmd));
+    if (kErrorNone != err)
+	goto finish;
+    err = writeFile(fd, &uuidcmd, sizeof(uuidcmd));
+    if (kErrorNone != err)
+        goto finish;
+
+    strx = 4;
+    for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+    {
+	if (!export_symbols[export_idx].name)
+	    continue;
+	if (!(kExported & export_symbols[export_idx].flags))
+	    continue;
+
+	if (export_idx
+	  && export_symbols[export_idx - 1].name
+	  && !strcmp(export_symbols[export_idx - 1].name, export_symbols[export_idx].name))
+	{
+	    fprintf(stderr, "duplicate export: %s\n", export_symbols[export_idx - 1].name);
+	    err = kErrorDuplicate;
+	    goto finish;
+	}
+
+	for (import_idx = 0; import_idx < export_symbols[export_idx].list_count; import_idx++)
+	{
+
+	    if (export_symbols[export_idx].list != &export_symbols[export_idx])
+	    {
+		printf("wild: %s, %s\n", export_symbols[export_idx].name,
+			export_symbols[export_idx].list[import_idx].name);
+	    }
+	    if (CPU_ARCH_ABI64 & target_arch->cputype)
+	    {
+		struct nlist_64 nl;
+
+		nl.n_sect  = 0;
+                nl.n_desc  = 0;
+		nl.n_un.n_strx = strx;
+		strx += export_symbols[export_idx].list[import_idx].name_len;
+
+                if (export_symbols[export_idx].flags & kObsolete) {
+                    nl.n_desc |= N_DESC_DISCARDED;
+                }
+
+		if (export_symbols[export_idx].list[import_idx].indirect)
+		{
+		    nl.n_type  = N_INDR | N_EXT;
+		    nl.n_value = strx;
+		    strx += export_symbols[export_idx].list[import_idx].indirect_len;
+		}
+		else
+		{
+		    nl.n_type  = N_UNDF | N_EXT;
+		    nl.n_value = 0;
+		}
+
+		if (target_arch->byteorder != host_arch->byteorder)
+		    swap_nlist_64(&nl, 1, target_arch->byteorder);
+
+		err = writeFile(fd, &nl, sizeof(nl));
+	    }
+	    else
+	    {
+		struct nlist nl;
+
+		nl.n_sect  = 0;
+		nl.n_desc  = 0;
+		nl.n_un.n_strx = strx;
+		strx += export_symbols[export_idx].list[import_idx].name_len;
+
+                if (export_symbols[export_idx].flags & kObsolete) {
+                    nl.n_desc |= N_DESC_DISCARDED;
+                }
+
+		if (export_symbols[export_idx].list[import_idx].indirect)
+		{
+		    nl.n_type  = N_INDR | N_EXT;
+		    nl.n_value = strx;
+		    strx += export_symbols[export_idx].list[import_idx].indirect_len;
+		}
+		else
+		{
+		    nl.n_type  = N_UNDF | N_EXT;
+		    nl.n_value = 0;
+		}
+
+		if (target_arch->byteorder != host_arch->byteorder)
+		    swap_nlist(&nl, 1, target_arch->byteorder);
+
+		err = writeFile(fd, &nl, sizeof(nl));
+	    }
+	}
+
+	if (kErrorNone != err)
+	    goto finish;
+    }
+
+    strx = sizeof(uint32_t);
+    err = writeFile(fd, &zero, strx);
+    if (kErrorNone != err)
+	goto finish;
+
+    for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+    {
+	if (!export_symbols[export_idx].name)
+	    continue;
+
+	for (import_idx = 0; import_idx < export_symbols[export_idx].list_count; import_idx++)
+	{
+	    err = writeFile(fd, export_symbols[export_idx].list[import_idx].name,
+			export_symbols[export_idx].list[import_idx].name_len);
+	    if (kErrorNone != err)
+		goto finish;
+	    if (export_symbols[export_idx].list[import_idx].indirect)
+	    {
+		err = writeFile(fd, export_symbols[export_idx].list[import_idx].indirect,
+			    export_symbols[export_idx].list[import_idx].indirect_len);
+		if (kErrorNone != err)
+		    goto finish;
+	    }
+	}
+    }
+
+    err = writeFile(fd, &zero, strtabpad - strtabsize);
+    if (kErrorNone != err)
+	goto finish;
+
+    close(fd);
+
+
+finish:
+    for (filenum = 0; filenum < num_files; filenum++) {
+        // unmap file
+        if (files[filenum].mapped_size)
+        {
+            munmap((caddr_t)files[filenum].mapped, files[filenum].mapped_size);
+            files[filenum].mapped     = 0;
+            files[filenum].mapped_size = 0;
+        }
+
+    }
+
+    if (kErrorNone != err)
+    {
+	if (output_name)
+	    unlink(output_name);
+        exit(1);
+    }
+    else
+        exit(0);
+    return(0);
+}
+
diff --git a/module/os/macos/kernel/version.plist b/module/os/macos/kernel/version.plist
new file mode 100644
index 0000000000..93dfa2a162
--- /dev/null
+++ b/module/os/macos/kernel/version.plist
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+        <key>BuildVersion</key>
+        <string>1</string>
+        <key>CFBundleShortVersionString</key>
+        <string>12.0.0</string>
+        <key>CFBundleVersion</key>
+        <string>12.0.0</string>
+        <key>ProjectName</key>
+        <string>xnu</string>
+        <key>SourceVersion</key>
+        <string>2050007009000000</string>
+</dict>
+</plist>
diff --git a/module/os/macos/kernel/zfs.exports b/module/os/macos/kernel/zfs.exports
new file mode 100644
index 0000000000..82752554d0
--- /dev/null
+++ b/module/os/macos/kernel/zfs.exports
@@ -0,0 +1,32 @@
+_cpu_number
+_fp_lookup
+_fd_rdwr
+_hostname
+_kernel_memory_allocate
+_virtual_space_start
+_virtual_space_end
+_vm_page_free_wanted
+_vm_page_free_count
+_vm_page_free_min
+_vm_page_speculative_count
+_VFS_ROOT
+_vm_pool_low
+_fp_drop
+_fp_drop_written
+_fo_read
+_fo_write
+_system_inshutdown
+_cache_purgevfs
+_vfs_context_kernel
+_build_path
+_kvtophys
+__mh_execute_header
+_gLoadedKextSummaries
+_VNOP_LOOKUP
+_vnode_notify
+_vfs_get_notify_attributes
+_kauth_cred_getgroups
+_rootvnode
+_cpuid_info
+_vnode_iocount
+_kx_qsort
diff --git a/module/os/macos/spl/Makefile.am b/module/os/macos/spl/Makefile.am
new file mode 100644
index 0000000000..4bdcf4aaec
--- /dev/null
+++ b/module/os/macos/spl/Makefile.am
@@ -0,0 +1,59 @@
+
+# Anyone remember why we made this a library?
+libspl_la_CPPFLAGS= \
+	-Wall \
+	-nostdinc \
+	-mkernel \
+	-fno-builtin-printf \
+	-D_KERNEL \
+	-DKERNEL \
+	-DKERNEL_PRIVATE \
+	-DDRIVER_PRIVATE \
+	-DAPPLE \
+	-DNeXT \
+	-I$(top_srcdir)/include/os/macos/spl \
+	-I$(top_srcdir)/include \
+	-I@KERNEL_HEADERS@/Headers \
+	-I@KERNEL_HEADERS@/PrivateHeaders
+
+libspl_la_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
+
+libspl_la_LDFLAGS= \
+	-Xlinker \
+	-kext \
+	-nostdlib \
+	-lkmodc++ \
+	-lkmod \
+	-lcc_kext
+
+libspl_la_LIBS = -lnone
+
+# If we don't set this to nothing, it adds "-lz -liconv"
+LIBS =
+
+noinst_LTLIBRARIES = libspl.la
+
+libspl_la_SOURCES = 	\
+	spl-atomic.c \
+	spl-condvar.c \
+	spl-cred.c \
+	spl-ddi.c \
+	spl-err.c \
+	spl-kmem.c \
+	spl-kstat.c \
+	spl-list.c \
+	spl-mutex.c \
+	spl-osx.c \
+	spl-policy.c \
+	spl-proc.c \
+	spl-processor.c \
+	spl-proc_list.c \
+	spl-rwlock.c \
+	spl-seg_kmem.c \
+	spl-taskq.c \
+	spl-thread.c \
+	spl-time.c \
+	spl-tsd.c \
+	spl-vmem.c \
+	spl-vnode.c \
+	spl-xdr.c
diff --git a/module/os/macos/spl/README.md b/module/os/macos/spl/README.md
new file mode 100644
index 0000000000..cc8dbf288e
--- /dev/null
+++ b/module/os/macos/spl/README.md
@@ -0,0 +1,14 @@
+The Solaris Porting Layer, SPL, is a macOS kernel module which provides a
+compatibility layer used by the macOS port of Open ZFS.
+
+# Installation
+
+The latest version of the SPL is maintained as part of this repository.
+Only when building ZFS version 1.9.4 or earlier must an external SPL release
+be used.  These releases can be found at:
+
+  * Version 1.9.4: https://github.com/openzfsonosx/spl/tree/spl-1.9.4-release  
+
+# Release
+
+The SPL is released under a CDDL license.  
diff --git a/module/os/macos/spl/spl-atomic.c b/module/os/macos/spl/spl-atomic.c
new file mode 100644
index 0000000000..973f462fdf
--- /dev/null
+++ b/module/os/macos/spl/spl-atomic.c
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ *  Solaris Porting Layer (SPL) Atomic Implementation.
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/atomic.h>
+#include <sys/kernel.h>
+#include <libkern/OSAtomic.h>
+
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+void *
+atomic_cas_ptr(volatile void *target, void *cmp, void *new)
+{
+#ifdef __LP64__
+	return (void *)__sync_val_compare_and_swap((uint64_t *)target,
+	    (uint64_t)cmp, (uint64_t)new);
+#else
+	return (void *)__sync_val_compare_and_swap((uint32_t *)target, cmp,
+	    new);
+#endif
+}
diff --git a/module/os/macos/spl/spl-condvar.c b/module/os/macos/spl/spl-condvar.c
new file mode 100644
index 0000000000..e8618161b3
--- /dev/null
+++ b/module/os/macos/spl/spl-condvar.c
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/condvar.h>
+#include <sys/errno.h>
+#include <sys/callb.h>
+
+/*
+ * cv_timedwait() is similar to cv_wait() except that it additionally expects
+ * a timeout value specified in ticks.  When woken by cv_signal() or
+ * cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is
+ * returned.
+ *
+ * cv_timedwait_sig() behaves the same as cv_timedwait() but blocks
+ * interruptibly and can be woken by a signal (EINTR, ERESTART).  When
+ * this occurs 0 is returned.
+ *
+ * cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait()
+ * and cv_timedwait_sig() which should be used when waiting for outstanding
+ * IO to complete.  They are responsible for updating the iowait accounting
+ * when this is supported by the platform.
+ *
+ * cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution
+ * versions of cv_timedwait() and cv_timedwait_sig().  They expect the timeout
+ * to be specified as a hrtime_t allowing for timeouts of less than a tick.
+ *
+ * N.B. The return values differ slightly from the illumos implementation
+ * which returns the time remaining, instead of 1, when woken.  They both
+ * return -1 on timeout. Consumers which need to know the time remaining
+ * are responsible for tracking it themselves.
+ */
+
+#ifdef SPL_DEBUG_MUTEX
+void spl_wdlist_settime(void *mpleak, uint64_t value);
+#endif
+
+void
+spl_cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
+{
+}
+
+void
+spl_cv_destroy(kcondvar_t *cvp)
+{
+}
+
+void
+spl_cv_signal(kcondvar_t *cvp)
+{
+	wakeup_one((caddr_t)cvp);
+}
+
+void
+spl_cv_broadcast(kcondvar_t *cvp)
+{
+	wakeup((caddr_t)cvp);
+}
+
+
+/*
+ * Block on the indicated condition variable and
+ * release the associated mutex while blocked.
+ */
+int
+spl_cv_wait(kcondvar_t *cvp, kmutex_t *mp, int flags, const char *msg)
+{
+	int result;
+
+	if (msg != NULL && msg[0] == '&')
+		++msg;  /* skip over '&' prefixes */
+
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, 0);
+#endif
+	mp->m_owner = NULL;
+	result = msleep(cvp, (lck_mtx_t *)&mp->m_lock, flags, msg, 0);
+	mp->m_owner = current_thread();
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, gethrestime_sec());
+#endif
+
+	/*
+	 * 1 - condvar got cv_signal()/cv_broadcast()
+	 * 0 - received signal (kill -signal)
+	 */
+	return (result == EINTR ? 0 : 1);
+}
+
+/*
+ * Same as cv_wait except the thread will unblock at 'tim'
+ * (an absolute time) if it hasn't already unblocked.
+ *
+ * Returns the amount of time left from the original 'tim' value
+ * when it was unblocked.
+ */
+int
+spl_cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t tim, int flags,
+    const char *msg)
+{
+	struct timespec ts;
+	int result;
+
+	if (msg != NULL && msg[0] == '&')
+		++msg;  /* skip over '&' prefixes */
+
+	clock_t timenow = zfs_lbolt();
+
+	/* Already expired? */
+	if (timenow >= tim)
+		return (-1);
+
+	tim -= timenow;
+
+	ts.tv_sec = (tim / hz);
+	ts.tv_nsec = (tim % hz) * NSEC_PER_SEC / hz;
+
+	/* Both sec and nsec zero is a blocking call in XNU. (Not poll) */
+	if (ts.tv_sec == 0 && ts.tv_nsec == 0)
+		ts.tv_nsec = 1000;
+
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, 0);
+#endif
+
+	mp->m_owner = NULL;
+	result = msleep(cvp, (lck_mtx_t *)&mp->m_lock, flags, msg, &ts);
+
+	/* msleep grabs the mutex, even if timeout/signal */
+	mp->m_owner = current_thread();
+
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, gethrestime_sec());
+#endif
+
+	switch (result) {
+
+		case EINTR:			/* Signal */
+		case ERESTART:
+			return (0);
+
+		case EWOULDBLOCK:	/* Timeout */
+			return (-1);
+	}
+
+	return (1);
+}
+
+
+/*
+ * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
+ */
+int
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag)
+{
+	struct timespec ts;
+	int result;
+
+	if (res > 1) {
+		/*
+		 * Align expiration to the specified resolution.
+		 */
+		if (flag & CALLOUT_FLAG_ROUNDUP)
+			tim += res - 1;
+		tim = (tim / res) * res;
+	}
+
+	if ((flag & CALLOUT_FLAG_ABSOLUTE)) {
+		hrtime_t timenow = gethrtime();
+
+		/* Already expired? */
+		if (timenow >= tim)
+			return (-1);
+
+		tim -= timenow;
+	}
+
+	ts.tv_sec = NSEC2SEC(tim);
+	ts.tv_nsec = tim - SEC2NSEC(ts.tv_sec);
+
+	/* Both sec and nsec set to zero is a blocking call in XNU. */
+	if (ts.tv_sec == 0 && ts.tv_nsec == 0)
+		ts.tv_nsec = 1000;
+
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, 0);
+#endif
+
+	mp->m_owner = NULL;
+	result = msleep(cvp, (lck_mtx_t *)&mp->m_lock,
+	    flag, "cv_timedwait_hires", &ts);
+	mp->m_owner = current_thread();
+#ifdef SPL_DEBUG_MUTEX
+	spl_wdlist_settime(mp->leak, gethrestime_sec());
+#endif
+
+	switch (result) {
+
+		case EINTR:			/* Signal */
+		case ERESTART:
+			return (0);
+
+		case EWOULDBLOCK:	/* Timeout */
+			return (-1);
+	}
+
+	return (1);
+}
diff --git a/module/os/macos/spl/spl-cred.c b/module/os/macos/spl/spl-cred.c
new file mode 100644
index 0000000000..e7d9da0360
--- /dev/null
+++ b/module/os/macos/spl/spl-cred.c
@@ -0,0 +1,166 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/kauth.h>
+
+/* Return the effective user id */
+uid_t
+crgetuid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getuid((kauth_cred_t)cr));
+}
+
+/* Return the real user id */
+uid_t
+crgetruid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getruid((kauth_cred_t)cr));
+}
+
+/* Return the saved user id */
+uid_t
+crgetsuid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getsvuid((kauth_cred_t)cr));
+}
+
+/* Return the filesystem user id */
+uid_t
+crgetfsuid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (-1);
+}
+
+/* Return the effective group id */
+gid_t
+crgetgid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getgid((kauth_cred_t)cr));
+}
+
+/* Return the real group id */
+gid_t
+crgetrgid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getrgid((kauth_cred_t)cr));
+}
+
+/* Return the saved group id */
+gid_t
+crgetsgid(const cred_t *cr)
+{
+	if (!cr)
+		return (0);
+	return (kauth_cred_getsvgid((kauth_cred_t)cr));
+}
+
+/* Return the filesystem group id */
+gid_t
+crgetfsgid(const cred_t *cr)
+{
+	return (-1);
+}
+
+
+extern int kauth_cred_getgroups(kauth_cred_t _cred, gid_t *_groups,
+    int *_groupcount);
+/*
+ * Unfortunately, to get the count of groups, we have to call XNU which
+ * memcpy's them over. No real clean way to get around that, but at least
+ * these calls are done sparingly.
+ */
+int
+crgetngroups(const cred_t *cr)
+{
+	gid_t gids[NGROUPS];
+	int count = NGROUPS;
+	int ret;
+
+	ret = kauth_cred_getgroups((kauth_cred_t)cr, gids, &count);
+
+	if (!ret)
+		return (count);
+
+	return (0);
+}
+
+
+/*
+ * We always allocate NGROUPs here, since we don't know how many there will
+ * be until after the call. Unlike IllumOS, the ptr returned is allocated
+ * and must be returned by a call to crgetgroupsfree().
+ */
+gid_t *
+crgetgroups(const cred_t *cr)
+{
+	gid_t *gids;
+	int count = NGROUPS;
+
+	gids = kmem_zalloc(sizeof (gid_t) * count, KM_SLEEP);
+	if (!gids)
+		return (NULL);
+
+	kauth_cred_getgroups((kauth_cred_t)cr, gids, &count);
+
+	return (gids);
+}
+
+void
+crgetgroupsfree(gid_t *gids)
+{
+	if (!gids)
+		return;
+	kmem_free(gids, sizeof (gid_t) * NGROUPS);
+}
+
+/*
+ * Return true if "cr" belongs in group "gid".
+ */
+int
+spl_cred_ismember_gid(cred_t *cr, gid_t gid)
+{
+	int ret = 0; // Is not member.
+	kauth_cred_ismember_gid((kauth_cred_t)cr, gid, &ret);
+	if (ret == 1)
+		return (TRUE);
+	return (FALSE);
+}
diff --git a/module/os/macos/spl/spl-ddi.c b/module/os/macos/spl/spl-ddi.c
new file mode 100644
index 0000000000..0f74af6e9f
--- /dev/null
+++ b/module/os/macos/spl/spl-ddi.c
@@ -0,0 +1,383 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/sunddi.h>
+#include <sys/cmn_err.h>
+#include <miscfs/devfs/devfs.h>
+
+
+/*
+ * Allocate a set of pointers to 'n_items' objects of size 'size'
+ * bytes.  Each pointer is initialized to nil.
+ *
+ * The 'size' and 'n_items' values are stashed in the opaque
+ * handle returned to the caller.
+ *
+ * This implementation interprets 'set of pointers' to mean 'array
+ * of pointers' but note that nothing in the interface definition
+ * precludes an implementation that uses, for example, a linked list.
+ * However there should be a small efficiency gain from using an array
+ * at lookup time.
+ *
+ * NOTE	As an optimization, we make our growable array allocations in
+ *	powers of two (bytes), since that's how much kmem_alloc (currently)
+ *	gives us anyway.  It should save us some free/realloc's ..
+ *
+ *	As a further optimization, we make the growable array start out
+ *	with MIN_N_ITEMS in it.
+ */
+
+
+int
+ddi_soft_state_init(void **state_p, size_t size, size_t n_items)
+{
+	struct i_ddi_soft_state *ss;
+
+	if (state_p == NULL || *state_p != NULL || size == 0)
+		return (EINVAL);
+
+	ss = kmem_zalloc(sizeof (*ss), KM_SLEEP);
+	mutex_init(&ss->lock, NULL, MUTEX_DRIVER, NULL);
+	ss->size = size;
+
+	if (n_items < MIN_N_ITEMS)
+		ss->n_items = MIN_N_ITEMS;
+	else {
+		int bitlog;
+
+		if ((bitlog = ddi_fls(n_items)) == ddi_ffs(n_items))
+			bitlog--;
+		ss->n_items = 1 << bitlog;
+	}
+
+	ASSERT(ss->n_items >= n_items);
+
+	ss->array = kmem_zalloc(ss->n_items * sizeof (void *), KM_SLEEP);
+
+	*state_p = ss;
+
+	return (0);
+}
+
+
+/*
+ * Allocate a state structure of size 'size' to be associated
+ * with item 'item'.
+ *
+ * In this implementation, the array is extended to
+ * allow the requested offset, if needed.
+ */
+int
+ddi_soft_state_zalloc(void *state, int item)
+{
+	struct i_ddi_soft_state *ss;
+	void **array;
+	void *new_element;
+
+	if ((ss = state) == NULL || item < 0)
+		return (DDI_FAILURE);
+
+	mutex_enter(&ss->lock);
+	if (ss->size == 0) {
+		mutex_exit(&ss->lock);
+		cmn_err(CE_WARN, "ddi_soft_state_zalloc: bad handle");
+		return (DDI_FAILURE);
+	}
+
+	array = ss->array;	/* NULL if ss->n_items == 0 */
+	ASSERT(ss->n_items != 0 && array != NULL);
+
+	/*
+	 * refuse to tread on an existing element
+	 */
+	if (item < ss->n_items && array[item] != NULL) {
+		mutex_exit(&ss->lock);
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * Allocate a new element to plug in
+	 */
+	new_element = kmem_zalloc(ss->size, KM_SLEEP);
+
+	/*
+	 * Check if the array is big enough, if not, grow it.
+	 */
+	if (item >= ss->n_items) {
+		void	**new_array;
+		size_t	new_n_items;
+		struct i_ddi_soft_state *dirty;
+
+		/*
+		 * Allocate a new array of the right length, copy
+		 * all the old pointers to the new array, then
+		 * if it exists at all, put the old array on the
+		 * dirty list.
+		 *
+		 * Note that we can't kmem_free() the old array.
+		 *
+		 * Why -- well the 'get' operation is 'mutex-free', so we
+		 * can't easily catch a suspended thread that is just about
+		 * to dereference the array we just grew out of.  So we
+		 * cons up a header and put it on a list of 'dirty'
+		 * pointer arrays.  (Dirty in the sense that there may
+		 * be suspended threads somewhere that are in the middle
+		 * of referencing them).  Fortunately, we -can- garbage
+		 * collect it all at ddi_soft_state_fini time.
+		 */
+		new_n_items = ss->n_items;
+		while (new_n_items < (1 + item))
+			new_n_items <<= 1;	/* double array size .. */
+
+		ASSERT(new_n_items >= (1 + item));	/* sanity check! */
+
+		new_array = kmem_zalloc(new_n_items * sizeof (void *),
+		    KM_SLEEP);
+		/*
+		 * Copy the pointers into the new array
+		 */
+		bcopy(array, new_array, ss->n_items * sizeof (void *));
+
+		/*
+		 * Save the old array on the dirty list
+		 */
+		dirty = kmem_zalloc(sizeof (*dirty), KM_SLEEP);
+		dirty->array = ss->array;
+		dirty->n_items = ss->n_items;
+		dirty->next = ss->next;
+		ss->next = dirty;
+
+		ss->array = (array = new_array);
+		ss->n_items = new_n_items;
+	}
+
+	ASSERT(array != NULL && item < ss->n_items && array[item] == NULL);
+
+	array[item] = new_element;
+
+	mutex_exit(&ss->lock);
+	return (DDI_SUCCESS);
+}
+
+
+/*
+ * Fetch a pointer to the allocated soft state structure.
+ *
+ * This is designed to be cheap.
+ *
+ * There's an argument that there should be more checking for
+ * nil pointers and out of bounds on the array.. but we do a lot
+ * of that in the alloc/free routines.
+ *
+ * An array has the convenience that we don't need to lock read-access
+ * to it c.f. a linked list.  However our "expanding array" strategy
+ * means that we should hold a readers lock on the i_ddi_soft_state
+ * structure.
+ *
+ * However, from a performance viewpoint, we need to do it without
+ * any locks at all -- this also makes it a leaf routine.  The algorithm
+ * is 'lock-free' because we only discard the pointer arrays at
+ * ddi_soft_state_fini() time.
+ */
+void *
+ddi_get_soft_state(void *state, int item)
+{
+	struct i_ddi_soft_state *ss = state;
+
+	ASSERT(ss != NULL && item >= 0);
+
+	if (item < ss->n_items && ss->array != NULL)
+		return (ss->array[item]);
+	return (NULL);
+}
+
+/*
+ * Free the state structure corresponding to 'item.'   Freeing an
+ * element that has either gone or was never allocated is not
+ * considered an error.  Note that we free the state structure, but
+ * we don't shrink our pointer array, or discard 'dirty' arrays,
+ * since even a few pointers don't really waste too much memory.
+ *
+ * Passing an item number that is out of bounds, or a null pointer will
+ * provoke an error message.
+ */
+void
+ddi_soft_state_free(void *state, int item)
+{
+	struct i_ddi_soft_state *ss;
+	void **array;
+	void *element;
+	static char msg[] = "ddi_soft_state_free:";
+
+	if ((ss = state) == NULL) {
+		cmn_err(CE_WARN, "%s null handle",
+		    msg);
+		return;
+	}
+
+	element = NULL;
+
+	mutex_enter(&ss->lock);
+
+	if ((array = ss->array) == NULL || ss->size == 0) {
+		cmn_err(CE_WARN, "%s bad handle",
+		    msg);
+	} else if (item < 0 || item >= ss->n_items) {
+		cmn_err(CE_WARN, "%s item %d not in range [0..%lu]",
+		    msg, item, ss->n_items - 1);
+	} else if (array[item] != NULL) {
+		element = array[item];
+		array[item] = NULL;
+	}
+
+	mutex_exit(&ss->lock);
+
+	if (element)
+		kmem_free(element, ss->size);
+}
+
+
+/*
+ * Free the entire set of pointers, and any
+ * soft state structures contained therein.
+ *
+ * Note that we don't grab the ss->lock mutex, even though
+ * we're inspecting the various fields of the data structure.
+ *
+ * There is an implicit assumption that this routine will
+ * never run concurrently with any of the above on this
+ * particular state structure i.e. by the time the driver
+ * calls this routine, there should be no other threads
+ * running in the driver.
+ */
+void
+ddi_soft_state_fini(void **state_p)
+{
+	struct i_ddi_soft_state *ss, *dirty;
+	int item;
+	static char msg[] = "ddi_soft_state_fini:";
+
+	if (state_p == NULL || (ss = *state_p) == NULL)
+		return;
+
+	if (ss->size == 0) {
+		cmn_err(CE_WARN, "%s bad handle",
+		    msg);
+		return;
+	}
+
+	if (ss->n_items > 0) {
+		for (item = 0; item < ss->n_items; item++)
+			ddi_soft_state_free(ss, item);
+		kmem_free(ss->array, ss->n_items * sizeof (void *));
+	}
+
+	/*
+	 * Now delete any dirty arrays from previous 'grow' operations
+	 */
+	for (dirty = ss->next; dirty; dirty = ss->next) {
+		ss->next = dirty->next;
+		kmem_free(dirty->array, dirty->n_items * sizeof (void *));
+		kmem_free(dirty, sizeof (*dirty));
+	}
+
+	mutex_destroy(&ss->lock);
+	kmem_free(ss, sizeof (*ss));
+
+	*state_p = NULL;
+}
+
+int
+ddi_create_minor_node(dev_info_t *dip, char *name, int spec_type,
+    minor_t minor_num, char *node_type, int flag)
+{
+	dev_t dev;
+	int error = 0;
+	char *r, *dup;
+
+	dev = makedev(flag, minor_num);
+	dip->dev = dev;
+
+	/*
+	 * http://lists.apple.com/archives/darwin-kernel/2007/Nov/msg00038.html
+	 *
+	 * devfs_make_name() has an off-by-one error when using directories
+	 * and it appears Apple does not want to fix it.
+	 *
+	 * We then change "/" to "_" and create more Apple-like /dev names
+	 *
+	 */
+	MALLOC(dup, char *, strlen(name)+1, M_TEMP, M_WAITOK);
+	if (dup == NULL)
+		return (ENOMEM);
+	bcopy(name, dup, strlen(name));
+	dup[strlen(name)] = '\0';
+
+	for (r = dup;
+	    (r = strchr(r, '/'));
+	    *r = '_')
+		/* empty */;
+
+	dip->devc = NULL;
+	dip->devb = NULL;
+
+	if (spec_type == S_IFCHR)
+		dip->devc = devfs_make_node(dev, DEVFS_CHAR,
+		    UID_ROOT, GID_OPERATOR,
+		    0600, "rdisk_%s", dup);
+	else
+		dip->devb = devfs_make_node(dev, DEVFS_BLOCK,
+		    UID_ROOT, GID_OPERATOR,
+		    0600, "disk_%s", dup);
+	FREE(dup, M_TEMP);
+
+	return (error);
+}
+
+void
+ddi_remove_minor_node(dev_info_t *dip, char *name)
+{
+	if (dip->devc) {
+		devfs_remove(dip->devc);
+		dip->devc = NULL;
+	}
+	if (dip->devb) {
+		devfs_remove(dip->devb);
+		dip->devb = NULL;
+	}
+}
+
+int
+strspn(const char *string,
+    register char *charset)
+{
+	register const char *p, *q;
+
+	for (q = string; *q != '\0'; ++q) {
+		for (p = charset; *p != '\0' && *p != *q; ++p)
+			;
+		if (*p == '\0')
+			break;
+	}
+	return (q-string);
+}
diff --git a/module/os/macos/spl/spl-debug.c b/module/os/macos/spl/spl-debug.c
new file mode 100644
index 0000000000..28ec1612d4
--- /dev/null
+++ b/module/os/macos/spl/spl-debug.c
@@ -0,0 +1,10 @@
+#include <sys/sysmacros.h>
+
+
+
+/* Debug log support enabled */
+__attribute__((noinline)) int assfail(const char *str, const char *file,
+	unsigned int line) __attribute__((optnone))
+{
+	return (1); /* Must return true for ASSERT macro */
+}
diff --git a/module/os/macos/spl/spl-err.c b/module/os/macos/spl/spl-err.c
new file mode 100644
index 0000000000..455bf2c8b9
--- /dev/null
+++ b/module/os/macos/spl/spl-err.c
@@ -0,0 +1,83 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+
+void
+vcmn_err(int ce, const char *fmt, va_list ap)
+{
+	char msg[MAXMSGLEN];
+
+	vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
+
+	switch (ce) {
+		case CE_IGNORE:
+			break;
+		case CE_CONT:
+			printf("%s", msg);
+			break;
+		case CE_NOTE:
+			printf("SPL: Notice: %s\n", msg);
+			break;
+		case CE_WARN:
+			printf("SPL: Warning: %s\n", msg);
+			break;
+		case CE_PANIC:
+			PANIC("%s", msg);
+			break;
+	}
+} /* vcmn_err() */
+
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vcmn_err(ce, fmt, ap);
+	va_end(ap);
+} /* cmn_err() */
+
+
+int
+spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	char msg[MAXMSGLEN];
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void) vsnprintf(msg, sizeof (msg), fmt, ap);
+	va_end(ap);
+
+	printf("%s", msg);
+	panic("%s", msg);
+
+	/* Unreachable */
+	return (1);
+}
diff --git a/module/os/macos/spl/spl-kmem.c b/module/os/macos/spl/spl-kmem.c
new file mode 100644
index 0000000000..98a89823d3
--- /dev/null
+++ b/module/os/macos/spl/spl-kmem.c
@@ -0,0 +1,6828 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ * Copyright (C) 2014 Brendon Humphrey <brendon.humphrey@mac.com>
+ * Copyright (C) 2017 Sean Doran <smd@use.net>
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ *
+ */
+
+#include <sys/debug.h>
+#include <sys/cdefs.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/kstat.h>
+#include <sys/seg_kmem.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/thread.h>
+#include <sys/taskq.h>
+#include <sys/kmem_impl.h>
+#include <sys/vmem_impl.h>
+#include <kern/sched_prim.h>
+#include <sys/callb.h>
+#include <stdbool.h>
+
+#include <zfs_config.h>
+
+// ===============================================================
+// Options
+// ===============================================================
+// #define PRINT_CACHE_STATS 1
+
+// Uncomment to turn on kmems' debug features.
+#define	dprintf if (0) printf
+
+// ===============================================================
+// OS Interface
+// ===============================================================
+
+// This variable is a count of the number of threads
+// blocked waiting for memory pages to become free.
+// We are using wake indications on this event as a
+// indication of paging activity, and therefore as a
+// proxy to the machine experiencing memory pressure.
+//
+// xnu vm variables
+
+// 0 by default smd
+extern volatile unsigned int vm_page_free_wanted;
+
+// 3500 kern.vm_page_free_min, rarely changes
+extern unsigned int vm_page_free_min;
+
+// will tend to vm_page_free_min smd
+extern volatile unsigned int vm_page_free_count;
+
+#define	SMALL_PRESSURE_INCURSION_PAGES (vm_page_free_min >> 5)
+
+static kcondvar_t spl_free_thread_cv;
+static kmutex_t spl_free_thread_lock;
+static boolean_t spl_free_thread_exit;
+static volatile _Atomic int64_t spl_free;
+int64_t spl_free_delta_ema;
+
+static volatile _Atomic int64_t spl_free_manual_pressure = 0;
+static volatile _Atomic boolean_t spl_free_fast_pressure = FALSE;
+static _Atomic bool spl_free_maybe_reap_flag = false;
+static _Atomic uint64_t spl_free_last_pressure = 0;
+
+// Start and end address of kernel memory
+extern vm_offset_t virtual_space_start;
+extern vm_offset_t virtual_space_end;
+
+// Can be polled to determine if the VM is experiecing
+// a shortage of free pages.
+extern int vm_pool_low(void);
+
+// Which CPU are we executing on?
+extern int cpu_number(void);
+
+// Invoke the kernel debugger
+extern void Debugger(const char *message);
+
+// Read from /dev/random
+void read_random(void *buffer, uint_t numbytes);
+
+// ===============================================================
+// Non Illumos Variables
+// ===============================================================
+
+// Flag to cause tasks and threads to terminate as
+// the kmem module is preparing to unload.
+static int			shutting_down = 0;
+
+// Amount of RAM in machine
+uint64_t			physmem = 0;
+
+// Size in bytes of the memory allocated in seg_kmem
+extern uint64_t		segkmem_total_mem_allocated;
+
+// Number of active threads
+extern uint64_t		zfs_threads;
+extern uint64_t		zfs_active_mutex;
+extern uint64_t		zfs_active_rwlock;
+
+extern uint64_t		total_memory;
+extern uint64_t		real_total_memory;
+
+#define	MULT 1
+
+static const char *KMEM_VA_PREFIX = "kmem_va";
+static const char *KMEM_MAGAZINE_PREFIX = "kmem_magazine_";
+
+// ===============================================================
+// Illumos Variables
+// ===============================================================
+
+struct kmem_cache_kstat {
+	kstat_named_t	kmc_buf_size;
+	kstat_named_t	kmc_align;
+	kstat_named_t	kmc_chunk_size;
+	kstat_named_t	kmc_slab_size;
+	kstat_named_t	kmc_alloc;
+	kstat_named_t	kmc_alloc_fail;
+	kstat_named_t	kmc_free;
+	kstat_named_t	kmc_depot_alloc;
+	kstat_named_t	kmc_depot_free;
+	kstat_named_t	kmc_depot_contention;
+	kstat_named_t	kmc_slab_alloc;
+	kstat_named_t	kmc_slab_free;
+	kstat_named_t	kmc_buf_constructed;
+	kstat_named_t	kmc_buf_avail;
+	kstat_named_t	kmc_buf_inuse;
+	kstat_named_t	kmc_buf_total;
+	kstat_named_t	kmc_buf_max;
+	kstat_named_t	kmc_slab_create;
+	kstat_named_t	kmc_slab_destroy;
+	kstat_named_t	kmc_vmem_source;
+	kstat_named_t	kmc_hash_size;
+	kstat_named_t	kmc_hash_lookup_depth;
+	kstat_named_t	kmc_hash_rescale;
+	kstat_named_t	kmc_full_magazines;
+	kstat_named_t	kmc_empty_magazines;
+	kstat_named_t	kmc_magazine_size;
+	kstat_named_t	kmc_reap; /* number of kmem_cache_reap() calls */
+	kstat_named_t	kmc_defrag; /* attempts to defrag all partial slabs */
+	kstat_named_t	kmc_scan; /* attempts to defrag one partial slab */
+	kstat_named_t	kmc_move_callbacks; /* sum of yes, no, later, dn, dk */
+	kstat_named_t	kmc_move_yes;
+	kstat_named_t	kmc_move_no;
+	kstat_named_t	kmc_move_later;
+	kstat_named_t	kmc_move_dont_need;
+	kstat_named_t	kmc_move_dont_know; /* obj unrecognized by client ... */
+	kstat_named_t	kmc_move_hunt_found; /* ... but found in mag layer */
+	kstat_named_t	kmc_move_slabs_freed; /* slabs freed by consolidator */
+	kstat_named_t	kmc_move_reclaimable; /* buffers, if consolidator ran */
+	kstat_named_t   kmc_no_vba_success;
+	kstat_named_t   kmc_no_vba_fail;
+	kstat_named_t   kmc_arc_no_grow_set;
+	kstat_named_t   kmc_arc_no_grow;
+} kmem_cache_kstat = {
+	{ "buf_size",		KSTAT_DATA_UINT64 },
+	{ "align",		KSTAT_DATA_UINT64 },
+	{ "chunk_size",		KSTAT_DATA_UINT64 },
+	{ "slab_size",		KSTAT_DATA_UINT64 },
+	{ "alloc",		KSTAT_DATA_UINT64 },
+	{ "alloc_fail",		KSTAT_DATA_UINT64 },
+	{ "free",		KSTAT_DATA_UINT64 },
+	{ "depot_alloc",	KSTAT_DATA_UINT64 },
+	{ "depot_free",		KSTAT_DATA_UINT64 },
+	{ "depot_contention",	KSTAT_DATA_UINT64 },
+	{ "slab_alloc",		KSTAT_DATA_UINT64 },
+	{ "slab_free",		KSTAT_DATA_UINT64 },
+	{ "buf_constructed",	KSTAT_DATA_UINT64 },
+	{ "buf_avail",		KSTAT_DATA_UINT64 },
+	{ "buf_inuse",		KSTAT_DATA_UINT64 },
+	{ "buf_total",		KSTAT_DATA_UINT64 },
+	{ "buf_max",		KSTAT_DATA_UINT64 },
+	{ "slab_create",	KSTAT_DATA_UINT64 },
+	{ "slab_destroy",	KSTAT_DATA_UINT64 },
+	{ "vmem_source",	KSTAT_DATA_UINT64 },
+	{ "hash_size",		KSTAT_DATA_UINT64 },
+	{ "hash_lookup_depth",	KSTAT_DATA_UINT64 },
+	{ "hash_rescale",	KSTAT_DATA_UINT64 },
+	{ "full_magazines",	KSTAT_DATA_UINT64 },
+	{ "empty_magazines",	KSTAT_DATA_UINT64 },
+	{ "magazine_size",	KSTAT_DATA_UINT64 },
+	{ "reap",		KSTAT_DATA_UINT64 },
+	{ "defrag",		KSTAT_DATA_UINT64 },
+	{ "scan",		KSTAT_DATA_UINT64 },
+	{ "move_callbacks",	KSTAT_DATA_UINT64 },
+	{ "move_yes",		KSTAT_DATA_UINT64 },
+	{ "move_no",		KSTAT_DATA_UINT64 },
+	{ "move_later",		KSTAT_DATA_UINT64 },
+	{ "move_dont_need",	KSTAT_DATA_UINT64 },
+	{ "move_dont_know",	KSTAT_DATA_UINT64 },
+	{ "move_hunt_found",	KSTAT_DATA_UINT64 },
+	{ "move_slabs_freed",	KSTAT_DATA_UINT64 },
+	{ "move_reclaimable",	KSTAT_DATA_UINT64 },
+	{ "no_vba_success",	KSTAT_DATA_UINT64 },
+	{ "no_vba_fail",	KSTAT_DATA_UINT64 },
+	{ "arc_no_grow_set",	KSTAT_DATA_UINT64 },
+	{ "arc_no_grow",	KSTAT_DATA_UINT64 },
+};
+
+static kmutex_t kmem_cache_kstat_lock;
+
+/*
+ * The default set of caches to back kmem_alloc().
+ * These sizes should be reevaluated periodically.
+ *
+ * We want allocations that are multiples of the coherency granularity
+ * (64 bytes) to be satisfied from a cache which is a multiple of 64
+ * bytes, so that it will be 64-byte aligned.  For all multiples of 64,
+ * the next 1 greater than or equal to it must be a
+ * multiple of 64.
+ *
+ * We split the table into two sections:  size <= 4k and size > 4k.  This
+ * saves a lot of space and cache footprint in our cache tables.
+ */
+static const int kmem_alloc_sizes[] = {
+	1 * 8,
+	2 * 8,
+	3 * 8,
+	4 * 8,		5 * 8,		6 * 8,		7 * 8,
+	4 * 16,		5 * 16,		6 * 16,		7 * 16,
+	4 * 32,		5 * 32,		6 * 32,		7 * 32,
+	4 * 64,		5 * 64,		6 * 64,		7 * 64,
+	4 * 128, 9*64,  5 * 128,	6 * 128, 13*64,	7 * 128,
+	P2ALIGN(8192 / 8, 64),
+	P2ALIGN(8192 / 7, 64),
+	P2ALIGN(8192 / 6, 64),
+	P2ALIGN(8192 / 5, 64),
+	P2ALIGN(8192 / 4, 64),
+	P2ALIGN(8192 / 3, 64),
+	P2ALIGN(8192 / 2, 64),
+};
+
+static const int kmem_big_alloc_sizes[] = {
+	2 * 4096,	3 * 4096,
+	2 * 8192,	3 * 8192,
+	4 * 8192,	5 * 8192,	6 * 8192,	7 * 8192,
+	8 * 8192,	9 * 8192,	10 * 8192,	11 * 8192,
+	12 * 8192,	13 * 8192,	14 * 8192,	15 * 8192,
+	16 * 8192
+};
+
+#define	KMEM_MAXBUF		4096
+#define	KMEM_BIG_MAXBUF_32BIT	32768
+#define	KMEM_BIG_MAXBUF		131072
+
+#define	KMEM_BIG_MULTIPLE	4096	/* big_alloc_sizes must be a multiple */
+#define	KMEM_BIG_SHIFT		12	/* lg(KMEM_BIG_MULTIPLE) */
+
+static kmem_cache_t *kmem_alloc_table[KMEM_MAXBUF >> KMEM_ALIGN_SHIFT];
+static kmem_cache_t *kmem_big_alloc_table[KMEM_BIG_MAXBUF >> KMEM_BIG_SHIFT];
+
+#define	KMEM_ALLOC_TABLE_MAX	(KMEM_MAXBUF >> KMEM_ALIGN_SHIFT)
+static size_t kmem_big_alloc_table_max = 0;	/* # of filled elements */
+
+static kmem_magtype_t kmem_magtype[] = {
+	{ 1,	8,	3200,	65536	},
+	{ 3,	16,	256,	32768	},
+	{ 7,	32,	64,	16384	},
+	{ 15,	64,	0,	8192	},
+	{ 31,	64,	0,	4096	},
+	{ 47,	64,	0,	2048	},
+	{ 63,	64,	0,	1024	},
+	{ 95,	64,	0,	512	},
+	{ 143,	64,	0,	0	},
+};
+
+static uint32_t kmem_reaping;
+static uint32_t kmem_reaping_idspace;
+
+/*
+ * kmem tunables
+ */
+static struct timespec kmem_reap_interval = {15, 0};
+int kmem_depot_contention = 3;	/* max failed tryenters per real interval */
+pgcnt_t kmem_reapahead = 0;	/* start reaping N pages before pageout */
+int kmem_panic = 1;		/* whether to panic on error */
+int kmem_logging = 0;		/* kmem_log_enter() override */
+uint32_t kmem_mtbf = 0;		/* mean time between failures [default: off] */
+size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
+size_t kmem_content_log_size;	/* content log size [2% of memory] */
+size_t kmem_failure_log_size;	/* failure log [4 pages per CPU] */
+size_t kmem_slab_log_size;	/* slab create log [4 pages per CPU] */
+size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
+size_t kmem_lite_minsize = 0;	/* minimum buffer size for KMF_LITE */
+size_t kmem_lite_maxalign = 8192; /* maximum buffer alignment for KMF_LITE */
+int kmem_lite_pcs = 4;		/* number of PCs to store in KMF_LITE mode */
+size_t kmem_maxverify;		/* maximum bytes to inspect in debug routines */
+size_t kmem_minfirewall;	/* hardware-enforced redzone threshold */
+
+size_t	kmem_max_cached = KMEM_BIG_MAXBUF;	/* maximum kmem_alloc cache */
+
+/*
+ * Be aware that KMF_AUDIT does not release memory, and you will eventually
+ * grind to a halt. But it is useful to enable if you can trigger a memory
+ * fault, and wish to see the calling stack.
+ */
+#ifdef DEBUG
+// can be 0 or KMF_LITE
+// or KMF_DEADBEEF | KMF_REDZONE | KMF_CONTENTS
+// with or without KMF_AUDIT
+int kmem_flags = KMF_DEADBEEF | KMF_REDZONE | KMF_LITE;
+#else
+int kmem_flags = 0;
+#endif
+int kmem_ready;
+
+static kmem_cache_t	*kmem_slab_cache;
+static kmem_cache_t	*kmem_bufctl_cache;
+static kmem_cache_t	*kmem_bufctl_audit_cache;
+
+static kmutex_t		kmem_cache_lock;	/* inter-cache linkage only */
+static list_t		kmem_caches;
+extern vmem_t		*heap_arena;
+static taskq_t		*kmem_taskq;
+static kmutex_t		kmem_flags_lock;
+static vmem_t		*kmem_metadata_arena;
+static vmem_t		*kmem_msb_arena;	/* arena for metadata caches */
+static vmem_t		*kmem_cache_arena;
+static vmem_t		*kmem_hash_arena;
+static vmem_t		*kmem_log_arena;
+static vmem_t		*kmem_oversize_arena;
+static vmem_t		*kmem_va_arena;
+static vmem_t		*kmem_default_arena;
+static vmem_t		*kmem_firewall_arena;
+
+/*
+ * kmem slab consolidator thresholds (tunables)
+ */
+size_t kmem_frag_minslabs = 101;	/* minimum total slabs */
+size_t kmem_frag_numer = 1;		/* free buffers (numerator) */
+size_t kmem_frag_denom = KMEM_VOID_FRACTION; /* buffers (denominator) */
+/*
+ * Maximum number of slabs from which to move buffers during a single
+ * maintenance interval while the system is not low on memory.
+ */
+size_t kmem_reclaim_max_slabs = 4; // smd 1
+/*
+ * Number of slabs to scan backwards from the end of the partial slab list
+ * when searching for buffers to relocate.
+ */
+size_t kmem_reclaim_scan_range = 48; // smd 12
+
+/* consolidator knobs */
+static boolean_t kmem_move_noreap;
+static boolean_t kmem_move_blocked;
+static boolean_t kmem_move_fulltilt;
+static boolean_t kmem_move_any_partial;
+
+#ifdef	DEBUG
+/*
+ * kmem consolidator debug tunables:
+ * Ensure code coverage by occasionally running the consolidator even when the
+ * caches are not fragmented (they may never be). These intervals are mean time
+ * in cache maintenance intervals (kmem_cache_update).
+ */
+uint32_t kmem_mtb_move = 20;	/* defrag 1 slab (~5min) */
+uint32_t kmem_mtb_reap = 240;	/* defrag all slabs (~1hrs) */
+uint32_t kmem_mtb_reap_count = 0;
+#endif	/* DEBUG */
+
+static kmem_cache_t	*kmem_defrag_cache;
+static kmem_cache_t	*kmem_move_cache;
+static taskq_t		*kmem_move_taskq;
+
+static void kmem_cache_scan(kmem_cache_t *);
+static void kmem_cache_defrag(kmem_cache_t *);
+static void kmem_slab_prefill(kmem_cache_t *, kmem_slab_t *);
+
+
+kmem_log_header_t	*kmem_transaction_log;
+kmem_log_header_t	*kmem_content_log;
+kmem_log_header_t	*kmem_failure_log;
+kmem_log_header_t	*kmem_slab_log;
+
+static int		kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
+
+#define	KMEM_BUFTAG_LITE_ENTER(bt, count, caller)			\
+if ((count) > 0) {						\
+pc_t *_s = ((kmem_buftag_lite_t *)(bt))->bt_history;	\
+pc_t *_e;						\
+/* memmove() the old entries down one notch */		\
+for (_e = &_s[(count) - 1]; _e > _s; _e--)		\
+*_e = *(_e - 1);				\
+*_s = (uintptr_t)(caller);				\
+}
+
+#define	KMERR_MODIFIED	0	/* buffer modified while on freelist */
+#define	KMERR_REDZONE	1	/* redzone violation (write past end of buf) */
+#define	KMERR_DUPFREE	2	/* freed a buffer twice */
+#define	KMERR_BADADDR	3	/* freed a bad (unallocated) address */
+#define	KMERR_BADBUFTAG	4	/* buftag corrupted */
+#define	KMERR_BADBUFCTL	5	/* bufctl corrupted */
+#define	KMERR_BADCACHE	6	/* freed a buffer to the wrong cache */
+#define	KMERR_BADSIZE	7	/* alloc size != free size */
+#define	KMERR_BADBASE	8	/* buffer base address wrong */
+
+struct {
+	hrtime_t	kmp_timestamp;	/* timestamp of panic */
+	int		kmp_error;	/* type of kmem error */
+	void		*kmp_buffer;	/* buffer that induced panic */
+	void		*kmp_realbuf;	/* real start address for buffer */
+	kmem_cache_t	*kmp_cache;	/* buffer's cache according to client */
+	kmem_cache_t	*kmp_realcache;	/* actual cache containing buffer */
+	kmem_slab_t	*kmp_slab;	/* slab accoring to kmem_findslab() */
+	kmem_bufctl_t	*kmp_bufctl;	/* bufctl */
+} kmem_panic_info;
+
+extern uint64_t stat_osif_malloc_success;
+extern uint64_t stat_osif_malloc_bytes;
+extern uint64_t stat_osif_free;
+extern uint64_t stat_osif_free_bytes;
+
+extern uint64_t spl_bucket_non_pow2_allocs;
+
+// stats for spl_root_allocator();
+extern uint64_t spl_root_allocator_calls;
+extern uint64_t spl_root_allocator_large_bytes_asked;
+extern uint64_t spl_root_allocator_small_bytes_asked;
+extern uint64_t spl_root_allocator_minalloc_bytes_asked;
+extern uint64_t spl_root_allocator_extra_pass;
+extern uint64_t spl_root_allocator_recovered;
+extern uint64_t spl_root_allocator_recovered_bytes;
+
+extern uint64_t spl_vmem_unconditional_allocs;
+extern uint64_t spl_vmem_unconditional_alloc_bytes;
+extern uint64_t spl_vmem_conditional_allocs;
+extern uint64_t spl_vmem_conditional_alloc_bytes;
+extern uint64_t spl_vmem_conditional_alloc_deny;
+extern uint64_t spl_vmem_conditional_alloc_deny_bytes;
+
+extern uint64_t spl_xat_success;
+extern uint64_t spl_xat_late_success;
+extern uint64_t spl_xat_late_success_nosleep;
+extern uint64_t spl_xat_pressured;
+extern uint64_t spl_xat_bailed;
+extern uint64_t spl_xat_bailed_contended;
+extern uint64_t spl_xat_lastalloc;
+extern uint64_t spl_xat_lastfree;
+extern uint64_t spl_xat_forced;
+extern uint64_t spl_xat_sleep;
+extern uint64_t spl_xat_late_deny;
+extern uint64_t spl_xat_no_waiters;
+extern uint64_t spl_xft_wait;
+
+extern uint64_t spl_vba_parent_memory_appeared;
+extern uint64_t spl_vba_parent_memory_blocked;
+extern uint64_t spl_vba_hiprio_blocked;
+extern uint64_t spl_vba_cv_timeout;
+extern uint64_t spl_vba_loop_timeout;
+extern uint64_t spl_vba_cv_timeout_blocked;
+extern uint64_t spl_vba_loop_timeout_blocked;
+extern uint64_t spl_vba_sleep;
+extern uint64_t spl_vba_loop_entries;
+
+extern uint64_t spl_bucket_tunable_large_span;
+extern uint64_t spl_bucket_tunable_small_span;
+extern void spl_set_bucket_tunable_large_span(uint64_t);
+extern void spl_set_bucket_tunable_small_span(uint64_t);
+
+extern _Atomic uint64_t spl_arc_no_grow_bits;
+extern uint64_t spl_arc_no_grow_count;
+
+extern uint64_t spl_frag_max_walk;
+extern uint64_t spl_frag_walked_out;
+extern uint64_t spl_frag_walk_cnt;
+
+uint64_t spl_buckets_mem_free = 0;
+uint64_t spl_arc_reclaim_avoided = 0;
+
+uint64_t kmem_free_to_slab_when_fragmented = 0;
+
+typedef struct spl_stats {
+	kstat_named_t spl_os_alloc;
+	kstat_named_t spl_active_threads;
+	kstat_named_t spl_active_mutex;
+	kstat_named_t spl_active_rwlock;
+	kstat_named_t spl_active_tsd;
+	kstat_named_t spl_free_wake_count;
+	kstat_named_t spl_spl_free;
+	kstat_named_t spl_spl_free_manual_pressure;
+	kstat_named_t spl_spl_free_fast_pressure;
+	kstat_named_t spl_spl_free_delta_ema;
+	kstat_named_t spl_spl_free_negative_count;
+	kstat_named_t spl_osif_malloc_success;
+	kstat_named_t spl_osif_malloc_bytes;
+	kstat_named_t spl_osif_free;
+	kstat_named_t spl_osif_free_bytes;
+	kstat_named_t spl_bucket_non_pow2_allocs;
+
+	kstat_named_t spl_vmem_unconditional_allocs;
+	kstat_named_t spl_vmem_unconditional_alloc_bytes;
+	kstat_named_t spl_vmem_conditional_allocs;
+	kstat_named_t spl_vmem_conditional_alloc_bytes;
+	kstat_named_t spl_vmem_conditional_alloc_deny;
+	kstat_named_t spl_vmem_conditional_alloc_deny_bytes;
+
+	kstat_named_t spl_xat_success;
+	kstat_named_t spl_xat_late_success;
+	kstat_named_t spl_xat_late_success_nosleep;
+	kstat_named_t spl_xat_pressured;
+	kstat_named_t spl_xat_bailed;
+	kstat_named_t spl_xat_bailed_contended;
+	kstat_named_t spl_xat_lastalloc;
+	kstat_named_t spl_xat_lastfree;
+	kstat_named_t spl_xat_forced;
+	kstat_named_t spl_xat_sleep;
+	kstat_named_t spl_xat_late_deny;
+	kstat_named_t spl_xat_no_waiters;
+	kstat_named_t spl_xft_wait;
+
+	kstat_named_t spl_vba_parent_memory_appeared;
+	kstat_named_t spl_vba_parent_memory_blocked;
+	kstat_named_t spl_vba_hiprio_blocked;
+	kstat_named_t spl_vba_cv_timeout;
+	kstat_named_t spl_vba_loop_timeout;
+	kstat_named_t spl_vba_cv_timeout_blocked;
+	kstat_named_t spl_vba_loop_timeout_blocked;
+	kstat_named_t spl_vba_sleep;
+	kstat_named_t spl_vba_loop_entries;
+
+	kstat_named_t spl_bucket_tunable_large_span;
+	kstat_named_t spl_bucket_tunable_small_span;
+
+	kstat_named_t spl_buckets_mem_free;
+	kstat_named_t spl_arc_no_grow_bits;
+	kstat_named_t spl_arc_no_grow_count;
+	kstat_named_t spl_frag_max_walk;
+	kstat_named_t spl_frag_walked_out;
+	kstat_named_t spl_frag_walk_cnt;
+	kstat_named_t spl_arc_reclaim_avoided;
+
+	kstat_named_t kmem_free_to_slab_when_fragmented;
+} spl_stats_t;
+
+static spl_stats_t spl_stats = {
+	{"os_mem_alloc", KSTAT_DATA_UINT64},
+	{"active_threads", KSTAT_DATA_UINT64},
+	{"active_mutex", KSTAT_DATA_UINT64},
+	{"active_rwlock", KSTAT_DATA_UINT64},
+	{"active_tsd", KSTAT_DATA_UINT64},
+	{"spl_free_wake_count", KSTAT_DATA_UINT64},
+	{"spl_spl_free", KSTAT_DATA_INT64},
+	{"spl_spl_free_manual_pressure", KSTAT_DATA_UINT64},
+	{"spl_spl_free_fast_pressure", KSTAT_DATA_UINT64},
+	{"spl_spl_free_delta_ema", KSTAT_DATA_UINT64},
+	{"spl_spl_free_negative_count", KSTAT_DATA_UINT64},
+	{"spl_osif_malloc_success", KSTAT_DATA_UINT64},
+	{"spl_osif_malloc_bytes", KSTAT_DATA_UINT64},
+	{"spl_osif_free", KSTAT_DATA_UINT64},
+	{"spl_osif_free_bytes", KSTAT_DATA_UINT64},
+	{"spl_bucket_non_pow2_allocs", KSTAT_DATA_UINT64},
+
+	{"vmem_unconditional_allocs", KSTAT_DATA_UINT64},
+	{"vmem_unconditional_alloc_bytes", KSTAT_DATA_UINT64},
+	{"vmem_conditional_allocs", KSTAT_DATA_UINT64},
+	{"vmem_conditional_alloc_bytes", KSTAT_DATA_UINT64},
+	{"vmem_conditional_alloc_deny", KSTAT_DATA_UINT64},
+	{"vmem_conditional_alloc_deny_bytes", KSTAT_DATA_UINT64},
+
+	{"spl_xat_success", KSTAT_DATA_UINT64},
+	{"spl_xat_late_success", KSTAT_DATA_UINT64},
+	{"spl_xat_late_success_nosleep", KSTAT_DATA_UINT64},
+	{"spl_xat_pressured", KSTAT_DATA_UINT64},
+	{"spl_xat_bailed", KSTAT_DATA_UINT64},
+	{"spl_xat_bailed_contended", KSTAT_DATA_UINT64},
+	{"spl_xat_lastalloc", KSTAT_DATA_UINT64},
+	{"spl_xat_lastfree", KSTAT_DATA_UINT64},
+	{"spl_xat_forced", KSTAT_DATA_UINT64},
+	{"spl_xat_sleep", KSTAT_DATA_UINT64},
+	{"spl_xat_late_deny", KSTAT_DATA_UINT64},
+	{"spl_xat_no_waiters", KSTAT_DATA_UINT64},
+	{"spl_xft_wait", KSTAT_DATA_UINT64},
+
+	{"spl_vba_parent_memory_appeared", KSTAT_DATA_UINT64},
+	{"spl_vba_parent_memory_blocked", KSTAT_DATA_UINT64},
+	{"spl_vba_hiprio_blocked", KSTAT_DATA_UINT64},
+	{"spl_vba_cv_timeout", KSTAT_DATA_UINT64},
+	{"spl_vba_loop_timeout", KSTAT_DATA_UINT64},
+	{"spl_vba_cv_timeout_blocked", KSTAT_DATA_UINT64},
+	{"spl_vba_loop_timeout_blocked", KSTAT_DATA_UINT64},
+	{"spl_vba_sleep", KSTAT_DATA_UINT64},
+	{"spl_vba_loop_entries", KSTAT_DATA_UINT64},
+
+	{"spl_tunable_large_span", KSTAT_DATA_UINT64},
+	{"spl_tunable_small_span", KSTAT_DATA_UINT64},
+
+	{"spl_buckets_mem_free", KSTAT_DATA_UINT64},
+	{"spl_arc_no_grow_bits", KSTAT_DATA_UINT64},
+	{"spl_arc_no_grow_count", KSTAT_DATA_UINT64},
+
+	{"spl_vmem_frag_max_walk", KSTAT_DATA_UINT64},
+	{"spl_vmem_frag_walked_out", KSTAT_DATA_UINT64},
+	{"spl_vmem_frag_walk_cnt", KSTAT_DATA_UINT64},
+	{"spl_arc_reclaim_avoided", KSTAT_DATA_UINT64},
+
+	{"kmem_free_to_slab_when_fragmented", KSTAT_DATA_UINT64},
+};
+
+static kstat_t *spl_ksp = 0;
+
+// Stub out caller()
+caddr_t
+caller()
+{
+	return ((caddr_t)(0));
+}
+
+void *
+calloc(size_t n, size_t s)
+{
+	return (zfs_kmem_zalloc(n * s, KM_NOSLEEP));
+}
+
+#define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
+
+#define	IS_ALPHA(c)	\
+(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+
+/*
+ * Get bytes from the /dev/random generator. Returns 0
+ * on success. Returns EAGAIN if there is insufficient entropy.
+ */
+int
+random_get_bytes(uint8_t *ptr, size_t len)
+{
+	read_random(ptr, len);
+	return (0);
+}
+
+/*
+ * BGH - Missing from OSX?
+ *
+ * Convert a string into a valid C identifier by replacing invalid
+ * characters with '_'.  Also makes sure the string is nul-terminated
+ * and takes up at most n bytes.
+ */
+void
+strident_canon(char *s, size_t n)
+{
+	char c;
+	char *end = s + n - 1;
+
+	if ((c = *s) == 0)
+		return;
+
+	if (!IS_ALPHA(c) && c != '_')
+		*s = '_';
+
+	while (s < end && ((c = *(++s)) != 0)) {
+		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
+			*s = '_';
+	}
+	*s = 0;
+}
+
+int
+strident_valid(const char *id)
+{
+	int c = *id++;
+
+	if (!IS_ALPHA(c) && c != '_')
+		return (0);
+	while ((c = *id++) != 0) {
+		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
+			return (0);
+	}
+	return (1);
+}
+
+static void
+copy_pattern(uint64_t pattern, void *buf_arg, size_t size)
+{
+	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
+	uint64_t *buf = buf_arg;
+
+	while (buf < bufend)
+		*buf++ = pattern;
+}
+
+static void *
+verify_pattern(uint64_t pattern, void *buf_arg, size_t size)
+{
+	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
+	uint64_t *buf;
+
+	for (buf = buf_arg; buf < bufend; buf++)
+		if (*buf != pattern)
+			return (buf);
+	return (NULL);
+}
+
+static void *
+verify_and_copy_pattern(uint64_t old, uint64_t new, void *buf_arg, size_t size)
+{
+	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
+	uint64_t *buf;
+
+	for (buf = buf_arg; buf < bufend; buf++) {
+		if (*buf != old) {
+			copy_pattern(old, buf_arg,
+			    (char *)buf - (char *)buf_arg);
+			return (buf);
+		}
+		*buf = new;
+	}
+
+	return (NULL);
+}
+
+static void
+kmem_cache_applyall(void (*func)(kmem_cache_t *), taskq_t *tq, int tqflag)
+{
+	kmem_cache_t *cp;
+
+	mutex_enter(&kmem_cache_lock);
+	for (cp = list_head(&kmem_caches); cp != NULL;
+	    cp = list_next(&kmem_caches, cp))
+		if (tq != NULL)
+			(void) taskq_dispatch(tq, (task_func_t *)func, cp,
+			    tqflag);
+		else
+			func(cp);
+	mutex_exit(&kmem_cache_lock);
+}
+
+static void
+kmem_cache_applyall_id(void (*func)(kmem_cache_t *), taskq_t *tq, int tqflag)
+{
+	kmem_cache_t *cp;
+
+	mutex_enter(&kmem_cache_lock);
+	for (cp = list_head(&kmem_caches); cp != NULL;
+	    cp = list_next(&kmem_caches, cp)) {
+		if (!(cp->cache_cflags & KMC_IDENTIFIER))
+			continue;
+		if (tq != NULL)
+			(void) taskq_dispatch(tq, (task_func_t *)func, cp,
+			    tqflag);
+		else
+			func(cp);
+	}
+	mutex_exit(&kmem_cache_lock);
+}
+
+/*
+ * Debugging support.  Given a buffer address, find its slab.
+ */
+static kmem_slab_t *
+kmem_findslab(kmem_cache_t *cp, void *buf)
+{
+	kmem_slab_t *sp;
+
+	mutex_enter(&cp->cache_lock);
+	for (sp = list_head(&cp->cache_complete_slabs); sp != NULL;
+	    sp = list_next(&cp->cache_complete_slabs, sp)) {
+		if (KMEM_SLAB_MEMBER(sp, buf)) {
+			mutex_exit(&cp->cache_lock);
+			return (sp);
+		}
+	}
+	for (sp = avl_first(&cp->cache_partial_slabs); sp != NULL;
+	    sp = AVL_NEXT(&cp->cache_partial_slabs, sp)) {
+		if (KMEM_SLAB_MEMBER(sp, buf)) {
+			mutex_exit(&cp->cache_lock);
+			return (sp);
+		}
+	}
+	mutex_exit(&cp->cache_lock);
+
+	return (NULL);
+}
+
+static void
+kmem_error(int error, kmem_cache_t *cparg, void *bufarg)
+{
+	kmem_buftag_t *btp = NULL;
+	kmem_bufctl_t *bcp = NULL;
+	kmem_cache_t *cp = cparg;
+	kmem_slab_t *sp;
+	uint64_t *off;
+	void *buf = bufarg;
+
+	kmem_logging = 0;	/* stop logging when a bad thing happens */
+
+	kmem_panic_info.kmp_timestamp = gethrtime();
+
+	sp = kmem_findslab(cp, buf);
+	if (sp == NULL) {
+		for (cp = list_tail(&kmem_caches); cp != NULL;
+		    cp = list_prev(&kmem_caches, cp)) {
+			if ((sp = kmem_findslab(cp, buf)) != NULL)
+				break;
+		}
+	}
+
+	if (sp == NULL) {
+		cp = NULL;
+		error = KMERR_BADADDR;
+	} else {
+		if (cp != cparg)
+			error = KMERR_BADCACHE;
+		else
+			buf = (char *)bufarg -
+			    ((uintptr_t)bufarg -
+			    (uintptr_t)sp->slab_base) % cp->cache_chunksize;
+		if (buf != bufarg)
+			error = KMERR_BADBASE;
+		if (cp->cache_flags & KMF_BUFTAG)
+			btp = KMEM_BUFTAG(cp, buf);
+		if (cp->cache_flags & KMF_HASH) {
+			mutex_enter(&cp->cache_lock);
+			for (bcp = *KMEM_HASH(cp, buf); bcp; bcp = bcp->bc_next)
+				if (bcp->bc_addr == buf)
+					break;
+			mutex_exit(&cp->cache_lock);
+			if (bcp == NULL && btp != NULL)
+				bcp = btp->bt_bufctl;
+			if (kmem_findslab(cp->cache_bufctl_cache, bcp) ==
+			    NULL || P2PHASE((uintptr_t)bcp, KMEM_ALIGN) ||
+			    bcp->bc_addr != buf) {
+				error = KMERR_BADBUFCTL;
+				bcp = NULL;
+			}
+		}
+	}
+
+	kmem_panic_info.kmp_error = error;
+	kmem_panic_info.kmp_buffer = bufarg;
+	kmem_panic_info.kmp_realbuf = buf;
+	kmem_panic_info.kmp_cache = cparg;
+	kmem_panic_info.kmp_realcache = cp;
+	kmem_panic_info.kmp_slab = sp;
+	kmem_panic_info.kmp_bufctl = bcp;
+
+	printf("SPL: kernel memory allocator: ");
+
+	switch (error) {
+
+		case KMERR_MODIFIED:
+			printf("buffer modified after being freed\n");
+			off = verify_pattern(KMEM_FREE_PATTERN, buf,
+			    cp->cache_verify);
+			if (off == NULL)	/* shouldn't happen */
+				off = buf;
+			printf("SPL: modification occurred at offset 0x%lx "
+			    "(0x%llx replaced by 0x%llx)\n",
+			    (uintptr_t)off - (uintptr_t)buf,
+			    (longlong_t)KMEM_FREE_PATTERN, (longlong_t)*off);
+			break;
+
+		case KMERR_REDZONE:
+			printf("redzone violation: write past end of buffer\n");
+			break;
+
+		case KMERR_BADADDR:
+			printf("invalid free: buffer not in cache\n");
+			break;
+
+		case KMERR_DUPFREE:
+			printf("duplicate free: buffer freed twice\n");
+			break;
+
+		case KMERR_BADBUFTAG:
+			printf("boundary tag corrupted\n");
+			printf("SPL: bcp ^ bxstat = %lx, should be %lx\n",
+			    (intptr_t)btp->bt_bufctl ^ btp->bt_bxstat,
+			    KMEM_BUFTAG_FREE);
+			break;
+
+		case KMERR_BADBUFCTL:
+			printf("bufctl corrupted\n");
+			break;
+
+		case KMERR_BADCACHE:
+			printf("buffer freed to wrong cache\n");
+			printf("SPL: buffer was allocated from %s,\n",
+			    cp->cache_name);
+			printf("SPL: caller attempting free to %s.\n",
+			    cparg->cache_name);
+			break;
+
+		case KMERR_BADSIZE:
+			printf("bad free: free size (%u) != alloc size (%u)\n",
+			    KMEM_SIZE_DECODE(((uint32_t *)btp)[0]),
+			    KMEM_SIZE_DECODE(((uint32_t *)btp)[1]));
+			break;
+
+		case KMERR_BADBASE:
+			printf("bad free: free address (%p) != alloc address"
+			    " (%p)\n", bufarg, buf);
+			break;
+	}
+
+	printf("SPL: buffer=%p  bufctl=%p  cache: %s\n",
+	    bufarg, (void *)bcp, cparg->cache_name);
+
+	if (bcp != NULL && (cp->cache_flags & KMF_AUDIT) &&
+	    error != KMERR_BADBUFCTL) {
+		int d;
+		timestruc_t ts = {0, 0};
+		kmem_bufctl_audit_t *bcap = (kmem_bufctl_audit_t *)bcp;
+
+		hrt2ts(kmem_panic_info.kmp_timestamp - bcap->bc_timestamp, &ts);
+		printf("SPL: previous transaction on buffer %p:\n", buf);
+		printf("SPL: thread=%p  time=T-%ld.%09ld  slab=%p  cache: %s\n",
+		    (void *)bcap->bc_thread, ts.tv_sec, ts.tv_nsec,
+		    (void *)sp, cp->cache_name);
+		for (d = 0; d < MIN(bcap->bc_depth, KMEM_STACK_DEPTH); d++) {
+			print_symbol(bcap->bc_stack[d]);
+		}
+	}
+
+	if (kmem_panic > 0) {
+		extern  void IODelay(unsigned microseconds); // <IOKit/IOLib.h?
+		IODelay(1000000);
+		panic("kernel heap corruption detected");
+	}
+
+	kmem_logging = 1;	/* resume logging */
+}
+
+static kmem_log_header_t *
+kmem_log_init(size_t logsize)
+{
+	kmem_log_header_t *lhp;
+	int nchunks = 4 * max_ncpus;
+	size_t lhsize = (size_t)&((kmem_log_header_t *)0)->lh_cpu[max_ncpus];
+	int i;
+
+	/*
+	 * Make sure that lhp->lh_cpu[] is nicely aligned
+	 * to prevent false sharing of cache lines.
+	 */
+	lhsize = P2ROUNDUP(lhsize, KMEM_ALIGN);
+	lhp = vmem_xalloc(kmem_log_arena, lhsize, 64, P2NPHASE(lhsize, 64), 0,
+	    NULL, NULL, VM_SLEEP);
+	bzero(lhp, lhsize);
+
+	mutex_init(&lhp->lh_lock, NULL, MUTEX_DEFAULT, NULL);
+	lhp->lh_nchunks = nchunks;
+	lhp->lh_chunksize = P2ROUNDUP(logsize / nchunks + 1, PAGESIZE);
+	lhp->lh_base = vmem_alloc(kmem_log_arena,
+	    lhp->lh_chunksize * nchunks, VM_SLEEP);
+	lhp->lh_free = vmem_alloc(kmem_log_arena,
+	    nchunks * sizeof (int), VM_SLEEP);
+	bzero(lhp->lh_base, lhp->lh_chunksize * nchunks);
+
+	for (i = 0; i < max_ncpus; i++) {
+		kmem_cpu_log_header_t *clhp = &lhp->lh_cpu[i];
+		mutex_init(&clhp->clh_lock, NULL, MUTEX_DEFAULT, NULL);
+		clhp->clh_chunk = i;
+	}
+
+	for (i = max_ncpus; i < nchunks; i++)
+		lhp->lh_free[i] = i;
+
+	lhp->lh_head = max_ncpus;
+	lhp->lh_tail = 0;
+
+	return (lhp);
+}
+
+
+static void
+kmem_log_fini(kmem_log_header_t *lhp)
+{
+	int nchunks = 4 * max_ncpus;
+	size_t lhsize = (size_t)&((kmem_log_header_t *)0)->lh_cpu[max_ncpus];
+	int i;
+
+
+
+	for (i = 0; i < max_ncpus; i++) {
+		kmem_cpu_log_header_t *clhp = &lhp->lh_cpu[i];
+		mutex_destroy(&clhp->clh_lock);
+	}
+
+	vmem_free(kmem_log_arena, lhp->lh_free, nchunks * sizeof (int));
+
+	vmem_free(kmem_log_arena, lhp->lh_base, lhp->lh_chunksize * nchunks);
+
+	mutex_destroy(&lhp->lh_lock);
+
+	lhsize = P2ROUNDUP(lhsize, KMEM_ALIGN);
+	vmem_xfree(kmem_log_arena, lhp, lhsize);
+}
+
+
+static void *
+kmem_log_enter(kmem_log_header_t *lhp, void *data, size_t size)
+{
+	void *logspace;
+
+	kmem_cpu_log_header_t *clhp = &lhp->lh_cpu[cpu_number()];
+
+	//    if (lhp == NULL || kmem_logging == 0 || panicstr)
+	if (lhp == NULL || kmem_logging == 0)
+		return (NULL);
+
+	mutex_enter(&clhp->clh_lock);
+	clhp->clh_hits++;
+	if (size > clhp->clh_avail) {
+		mutex_enter(&lhp->lh_lock);
+		lhp->lh_hits++;
+		lhp->lh_free[lhp->lh_tail] = clhp->clh_chunk;
+		lhp->lh_tail = (lhp->lh_tail + 1) % lhp->lh_nchunks;
+		clhp->clh_chunk = lhp->lh_free[lhp->lh_head];
+		lhp->lh_head = (lhp->lh_head + 1) % lhp->lh_nchunks;
+		clhp->clh_current = lhp->lh_base +
+		    clhp->clh_chunk * lhp->lh_chunksize;
+		clhp->clh_avail = lhp->lh_chunksize;
+		if (size > lhp->lh_chunksize)
+			size = lhp->lh_chunksize;
+		mutex_exit(&lhp->lh_lock);
+	}
+	logspace = clhp->clh_current;
+	clhp->clh_current += size;
+	clhp->clh_avail -= size;
+	bcopy(data, logspace, size);
+	mutex_exit(&clhp->clh_lock);
+	return (logspace);
+}
+
+#define	KMEM_AUDIT(lp, cp, bcp)						\
+{									\
+kmem_bufctl_audit_t *_bcp = (kmem_bufctl_audit_t *)(bcp);		\
+_bcp->bc_timestamp = gethrtime();					\
+_bcp->bc_thread = spl_current_thread();					\
+_bcp->bc_depth = getpcstack(_bcp->bc_stack, KMEM_STACK_DEPTH);		\
+_bcp->bc_lastlog = kmem_log_enter((lp), _bcp, sizeof (*_bcp));		\
+}
+
+static void
+kmem_log_event(kmem_log_header_t *lp, kmem_cache_t *cp,
+    kmem_slab_t *sp, void *addr)
+{
+	kmem_bufctl_audit_t bca;
+
+	bzero(&bca, sizeof (kmem_bufctl_audit_t));
+	bca.bc_addr = addr;
+	bca.bc_slab = sp;
+	KMEM_AUDIT(lp, cp, &bca);
+}
+
+/*
+ * Create a new slab for cache cp.
+ */
+static kmem_slab_t *
+kmem_slab_create(kmem_cache_t *cp, int kmflag)
+{
+	size_t slabsize = cp->cache_slabsize;
+	size_t chunksize = cp->cache_chunksize;
+	int cache_flags = cp->cache_flags;
+	size_t color, chunks;
+	char *buf, *slab;
+	kmem_slab_t *sp;
+	kmem_bufctl_t *bcp;
+	vmem_t *vmp = cp->cache_arena;
+
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+
+	color = cp->cache_color + cp->cache_align;
+	if (color > cp->cache_maxcolor)
+		color = cp->cache_mincolor;
+	cp->cache_color = color;
+
+	slab = vmem_alloc(vmp, slabsize, kmflag & KM_VMFLAGS);
+
+	if (slab == NULL)
+		goto vmem_alloc_failure;
+
+	ASSERT(P2PHASE((uintptr_t)slab, vmp->vm_quantum) == 0);
+
+	/*
+	 * Reverify what was already checked in kmem_cache_set_move(), since the
+	 * consolidator depends (for correctness) on slabs being initialized
+	 * with the 0xbaddcafe memory pattern (setting a low order bit usable by
+	 * clients to distinguish uninitialized memory from known objects).
+	 */
+	ASSERT((cp->cache_move == NULL) || !(cp->cache_cflags & KMC_NOTOUCH));
+	if (!(cp->cache_cflags & KMC_NOTOUCH))
+		copy_pattern(KMEM_UNINITIALIZED_PATTERN, slab, slabsize);
+
+	if (cache_flags & KMF_HASH) {
+		if ((sp = kmem_cache_alloc(kmem_slab_cache, kmflag)) == NULL)
+			goto slab_alloc_failure;
+		chunks = (slabsize - color) / chunksize;
+	} else {
+		sp = KMEM_SLAB(cp, slab);
+		chunks = (slabsize - sizeof (kmem_slab_t) - color) / chunksize;
+	}
+
+	sp->slab_cache	= cp;
+	sp->slab_head	= NULL;
+	sp->slab_refcnt	= 0;
+	sp->slab_base	= buf = slab + color;
+	sp->slab_chunks	= chunks;
+	sp->slab_stuck_offset = (uint32_t)-1;
+	sp->slab_later_count = 0;
+	sp->slab_flags = 0;
+	sp->slab_create_time = gethrtime();
+
+	ASSERT(chunks > 0);
+	while (chunks-- != 0) {
+		if (cache_flags & KMF_HASH) {
+			bcp = kmem_cache_alloc(cp->cache_bufctl_cache, kmflag);
+			if (bcp == NULL)
+				goto bufctl_alloc_failure;
+			if (cache_flags & KMF_AUDIT) {
+				kmem_bufctl_audit_t *bcap =
+				    (kmem_bufctl_audit_t *)bcp;
+				bzero(bcap, sizeof (kmem_bufctl_audit_t));
+				bcap->bc_cache = cp;
+			}
+			bcp->bc_addr = buf;
+			bcp->bc_slab = sp;
+		} else {
+			bcp = KMEM_BUFCTL(cp, buf);
+		}
+		if (cache_flags & KMF_BUFTAG) {
+			kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+			btp->bt_redzone = KMEM_REDZONE_PATTERN;
+			btp->bt_bufctl = bcp;
+			btp->bt_bxstat = (intptr_t)bcp ^ KMEM_BUFTAG_FREE;
+			if (cache_flags & KMF_DEADBEEF) {
+				copy_pattern(KMEM_FREE_PATTERN, buf,
+				    cp->cache_verify);
+			}
+		}
+		bcp->bc_next = sp->slab_head;
+		sp->slab_head = bcp;
+		buf += chunksize;
+	}
+
+	kmem_log_event(kmem_slab_log, cp, sp, slab);
+
+	return (sp);
+
+bufctl_alloc_failure:
+
+	while ((bcp = sp->slab_head) != NULL) {
+		sp->slab_head = bcp->bc_next;
+		kmem_cache_free(cp->cache_bufctl_cache, bcp);
+	}
+	kmem_cache_free(kmem_slab_cache, sp);
+
+slab_alloc_failure:
+
+	vmem_free(vmp, slab, slabsize);
+
+vmem_alloc_failure:
+
+	if (0 == (kmflag & KM_NO_VBA)) {
+		kmem_log_event(kmem_failure_log, cp, NULL, NULL);
+		atomic_inc_64(&cp->cache_alloc_fail);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Destroy a slab.
+ */
+static void
+kmem_slab_destroy(kmem_cache_t *cp, kmem_slab_t *sp)
+{
+	vmem_t *vmp = cp->cache_arena;
+	void *slab = (void *)P2ALIGN((uintptr_t)sp->slab_base, vmp->vm_quantum);
+
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+	ASSERT(sp->slab_refcnt == 0);
+
+	if (cp->cache_flags & KMF_HASH) {
+		kmem_bufctl_t *bcp;
+		while ((bcp = sp->slab_head) != NULL) {
+			sp->slab_head = bcp->bc_next;
+			kmem_cache_free(cp->cache_bufctl_cache, bcp);
+		}
+		kmem_cache_free(kmem_slab_cache, sp);
+	}
+	kpreempt(KPREEMPT_SYNC);
+	vmem_free(vmp, slab, cp->cache_slabsize);
+}
+
+static void *
+kmem_slab_alloc_impl(kmem_cache_t *cp, kmem_slab_t *sp, boolean_t prefill)
+{
+	kmem_bufctl_t *bcp, **hash_bucket;
+	void *buf;
+	boolean_t new_slab = (sp->slab_refcnt == 0);
+
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	/*
+	 * kmem_slab_alloc() drops cache_lock when it creates a new slab, so we
+	 * can't ASSERT(avl_is_empty(&cp->cache_partial_slabs)) here when the
+	 * slab is newly created.
+	 */
+	ASSERT(new_slab || (KMEM_SLAB_IS_PARTIAL(sp) &&
+	    (sp == avl_first(&cp->cache_partial_slabs))));
+	ASSERT(sp->slab_cache == cp);
+
+	cp->cache_slab_alloc++;
+	cp->cache_bufslab--;
+	sp->slab_refcnt++;
+
+	bcp = sp->slab_head;
+	sp->slab_head = bcp->bc_next;
+
+	if (cp->cache_flags & KMF_HASH) {
+		/*
+		 * Add buffer to allocated-address hash table.
+		 */
+		buf = bcp->bc_addr;
+		hash_bucket = KMEM_HASH(cp, buf);
+		bcp->bc_next = *hash_bucket;
+		*hash_bucket = bcp;
+		if ((cp->cache_flags & (KMF_AUDIT | KMF_BUFTAG)) == KMF_AUDIT) {
+			KMEM_AUDIT(kmem_transaction_log, cp, bcp);
+		}
+	} else {
+		buf = KMEM_BUF(cp, bcp);
+	}
+
+	ASSERT(KMEM_SLAB_MEMBER(sp, buf));
+
+	if (sp->slab_head == NULL) {
+		ASSERT(KMEM_SLAB_IS_ALL_USED(sp));
+		if (new_slab) {
+			ASSERT(sp->slab_chunks == 1);
+		} else {
+			ASSERT(sp->slab_chunks > 1); /* the slab was partial */
+			avl_remove(&cp->cache_partial_slabs, sp);
+			sp->slab_later_count = 0; /* clear history */
+			sp->slab_flags &= ~KMEM_SLAB_NOMOVE;
+			sp->slab_stuck_offset = (uint32_t)-1;
+		}
+		list_insert_head(&cp->cache_complete_slabs, sp);
+		cp->cache_complete_slab_count++;
+		return (buf);
+	}
+
+	ASSERT(KMEM_SLAB_IS_PARTIAL(sp));
+	/*
+	 * Peek to see if the magazine layer is enabled before
+	 * we prefill.  We're not holding the cpu cache lock,
+	 * so the peek could be wrong, but there's no harm in it.
+	 */
+	if (new_slab && prefill && (cp->cache_flags & KMF_PREFILL) &&
+	    (KMEM_CPU_CACHE(cp)->cc_magsize != 0))  {
+		kmem_slab_prefill(cp, sp);
+		return (buf);
+	}
+
+	if (new_slab) {
+		avl_add(&cp->cache_partial_slabs, sp);
+		return (buf);
+	}
+
+	/*
+	 * The slab is now more allocated than it was, so the
+	 * order remains unchanged.
+	 */
+	ASSERT(!avl_update(&cp->cache_partial_slabs, sp));
+	return (buf);
+}
+
+/*
+ * Allocate a raw (unconstructed) buffer from cp's slab layer.
+ */
+static void *
+kmem_slab_alloc(kmem_cache_t *cp, int kmflag)
+{
+	kmem_slab_t *sp;
+	void *buf;
+	boolean_t test_destructor;
+
+	mutex_enter(&cp->cache_lock);
+	test_destructor = (cp->cache_slab_alloc == 0);
+	sp = avl_first(&cp->cache_partial_slabs);
+	if (sp == NULL) {
+		ASSERT(cp->cache_bufslab == 0);
+
+		/*
+		 * The freelist is empty.  Create a new slab.
+		 */
+		mutex_exit(&cp->cache_lock);
+		if ((sp = kmem_slab_create(cp, kmflag)) == NULL) {
+			return (NULL);
+		}
+		mutex_enter(&cp->cache_lock);
+		cp->cache_slab_create++;
+		if ((cp->cache_buftotal += sp->slab_chunks) > cp->cache_bufmax)
+			cp->cache_bufmax = cp->cache_buftotal;
+		cp->cache_bufslab += sp->slab_chunks;
+	}
+
+	buf = kmem_slab_alloc_impl(cp, sp, B_TRUE);
+	ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
+	    (cp->cache_complete_slab_count +
+	    avl_numnodes(&cp->cache_partial_slabs) +
+	    (cp->cache_defrag == NULL ? 0 : cp->cache_defrag->kmd_deadcount)));
+	mutex_exit(&cp->cache_lock);
+
+	if (test_destructor && cp->cache_destructor != NULL) {
+		copy_pattern(KMEM_UNINITIALIZED_PATTERN, buf,
+		    cp->cache_bufsize);
+		if (cp->cache_flags & KMF_DEADBEEF) {
+			copy_pattern(KMEM_FREE_PATTERN, buf, cp->cache_verify);
+		}
+	}
+
+	return (buf);
+}
+
+static void kmem_slab_move_yes(kmem_cache_t *, kmem_slab_t *, void *);
+
+/*
+ * Free a raw (unconstructed) buffer to cp's slab layer.
+ */
+static void
+kmem_slab_free(kmem_cache_t *cp, void *buf)
+{
+	kmem_slab_t *sp;
+	kmem_bufctl_t *bcp, **prev_bcpp;
+
+	ASSERT(buf != NULL);
+
+	mutex_enter(&cp->cache_lock);
+	cp->cache_slab_free++;
+
+	if (cp->cache_flags & KMF_HASH) {
+		/*
+		 * Look up buffer in allocated-address hash table.
+		 */
+		prev_bcpp = KMEM_HASH(cp, buf);
+		while ((bcp = *prev_bcpp) != NULL) {
+			if (bcp->bc_addr == buf) {
+				*prev_bcpp = bcp->bc_next;
+				sp = bcp->bc_slab;
+				break;
+			}
+			cp->cache_lookup_depth++;
+			prev_bcpp = &bcp->bc_next;
+		}
+	} else {
+		bcp = KMEM_BUFCTL(cp, buf);
+		sp = KMEM_SLAB(cp, buf);
+	}
+
+	if (bcp == NULL || sp->slab_cache != cp || !KMEM_SLAB_MEMBER(sp, buf)) {
+		mutex_exit(&cp->cache_lock);
+		kmem_error(KMERR_BADADDR, cp, buf);
+		return;
+	}
+
+	if (KMEM_SLAB_OFFSET(sp, buf) == sp->slab_stuck_offset) {
+		/*
+		 * If this is the buffer that prevented the consolidator from
+		 * clearing the slab, we can reset the slab flags now that the
+		 * buffer is freed. (It makes sense to do this in
+		 * kmem_cache_free(), where the client gives up ownership of the
+		 * buffer, but on the hot path the test is too expensive.)
+		 */
+		kmem_slab_move_yes(cp, sp, buf);
+	}
+
+	if ((cp->cache_flags & (KMF_AUDIT | KMF_BUFTAG)) == KMF_AUDIT) {
+		if (cp->cache_flags & KMF_CONTENTS)
+			((kmem_bufctl_audit_t *)bcp)->bc_contents =
+			    kmem_log_enter(kmem_content_log, buf,
+			    cp->cache_contents);
+		KMEM_AUDIT(kmem_transaction_log, cp, bcp);
+	}
+
+	bcp->bc_next = sp->slab_head;
+	sp->slab_head = bcp;
+
+	cp->cache_bufslab++;
+	ASSERT(sp->slab_refcnt >= 1);
+
+	if (--sp->slab_refcnt == 0) {
+		/*
+		 * There are no outstanding allocations from this slab,
+		 * so we can reclaim the memory.
+		 */
+		if (sp->slab_chunks == 1) {
+			list_remove(&cp->cache_complete_slabs, sp);
+			cp->cache_complete_slab_count--;
+		} else {
+			avl_remove(&cp->cache_partial_slabs, sp);
+		}
+
+		cp->cache_buftotal -= sp->slab_chunks;
+		cp->cache_bufslab -= sp->slab_chunks;
+		/*
+		 * Defer releasing the slab to the virtual memory subsystem
+		 * while there is a pending move callback, since we guarantee
+		 * that buffers passed to the move callback have only been
+		 * touched by kmem or by the client itself. Since the memory
+		 * patterns baddcafe (uninitialized) and deadbeef (freed) both
+		 * set at least one of the two lowest order bits, the client can
+		 * test those bits in the move callback to determine whether or
+		 * not it knows about the buffer (assuming that the client also
+		 * sets one of those low order bits whenever it frees a buffer).
+		 */
+		if (cp->cache_defrag == NULL ||
+		    (avl_is_empty(&cp->cache_defrag->kmd_moves_pending) &&
+		    !(sp->slab_flags & KMEM_SLAB_MOVE_PENDING))) {
+				cp->cache_slab_destroy++;
+				mutex_exit(&cp->cache_lock);
+				kmem_slab_destroy(cp, sp);
+			} else {
+				list_t *deadlist =
+				    &cp->cache_defrag->kmd_deadlist;
+				/*
+				 * Slabs are inserted at both ends of the
+				 * deadlist to distinguish between slabs
+				 * freed while move callbacks are pending
+				 * (list head) and a slab freed while the
+				 * lock is dropped in kmem_move_buffers()
+				 * (list tail) so that in both cases
+				 * slab_destroy() is called from the
+				 * right context.
+				 */
+				if (sp->slab_flags & KMEM_SLAB_MOVE_PENDING) {
+					list_insert_tail(deadlist, sp);
+				} else {
+					list_insert_head(deadlist, sp);
+				}
+				cp->cache_defrag->kmd_deadcount++;
+				mutex_exit(&cp->cache_lock);
+			}
+		return;
+	}
+
+	if (bcp->bc_next == NULL) {
+		/* Transition the slab from completely allocated to partial. */
+		ASSERT(sp->slab_refcnt == (sp->slab_chunks - 1));
+		ASSERT(sp->slab_chunks > 1);
+		list_remove(&cp->cache_complete_slabs, sp);
+		cp->cache_complete_slab_count--;
+		avl_add(&cp->cache_partial_slabs, sp);
+	} else {
+		(void) avl_update_gt(&cp->cache_partial_slabs, sp);
+	}
+
+	ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
+	    (cp->cache_complete_slab_count +
+	    avl_numnodes(&cp->cache_partial_slabs) +
+	    (cp->cache_defrag == NULL ? 0 : cp->cache_defrag->kmd_deadcount)));
+	mutex_exit(&cp->cache_lock);
+}
+
+/*
+ * Return -1 if kmem_error, 1 if constructor fails, 0 if successful.
+ */
+static int
+kmem_cache_alloc_debug(kmem_cache_t *cp, void *buf, int kmflag, int construct,
+    caddr_t caller)
+{
+	kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+	kmem_bufctl_audit_t *bcp = (kmem_bufctl_audit_t *)btp->bt_bufctl;
+	uint32_t mtbf;
+
+	if (btp->bt_bxstat != ((intptr_t)bcp ^ KMEM_BUFTAG_FREE)) {
+		kmem_error(KMERR_BADBUFTAG, cp, buf);
+		return (-1);
+	}
+
+	btp->bt_bxstat = (intptr_t)bcp ^ KMEM_BUFTAG_ALLOC;
+
+	if ((cp->cache_flags & KMF_HASH) && bcp->bc_addr != buf) {
+		kmem_error(KMERR_BADBUFCTL, cp, buf);
+		return (-1);
+	}
+
+	if (cp->cache_flags & KMF_DEADBEEF) {
+		if (!construct && (cp->cache_flags & KMF_LITE)) {
+			if (*(uint64_t *)buf != KMEM_FREE_PATTERN) {
+				kmem_error(KMERR_MODIFIED, cp, buf);
+				return (-1);
+			}
+			if (cp->cache_constructor != NULL)
+				*(uint64_t *)buf = btp->bt_redzone;
+			else
+				*(uint64_t *)buf = KMEM_UNINITIALIZED_PATTERN;
+		} else {
+			construct = 1;
+			if (verify_and_copy_pattern(KMEM_FREE_PATTERN,
+			    KMEM_UNINITIALIZED_PATTERN, buf,
+			    cp->cache_verify)) {
+				kmem_error(KMERR_MODIFIED, cp, buf);
+				return (-1);
+			}
+		}
+	}
+	btp->bt_redzone = KMEM_REDZONE_PATTERN;
+
+	if ((mtbf = kmem_mtbf | cp->cache_mtbf) != 0 &&
+	    gethrtime() % mtbf == 0 &&
+	    (kmflag & (KM_NOSLEEP | KM_PANIC)) == KM_NOSLEEP) {
+		kmem_log_event(kmem_failure_log, cp, NULL, NULL);
+		if (!construct && cp->cache_destructor != NULL)
+			cp->cache_destructor(buf, cp->cache_private);
+	} else {
+		mtbf = 0;
+	}
+
+	if (mtbf || (construct && cp->cache_constructor != NULL &&
+	    cp->cache_constructor(buf, cp->cache_private, kmflag) != 0)) {
+		atomic_inc_64(&cp->cache_alloc_fail);
+		btp->bt_bxstat = (intptr_t)bcp ^ KMEM_BUFTAG_FREE;
+		if (cp->cache_flags & KMF_DEADBEEF)
+			copy_pattern(KMEM_FREE_PATTERN, buf, cp->cache_verify);
+		kmem_slab_free(cp, buf);
+		return (1);
+	}
+
+	if (cp->cache_flags & KMF_AUDIT) {
+		KMEM_AUDIT(kmem_transaction_log, cp, bcp);
+	}
+
+	if ((cp->cache_flags & KMF_LITE) &&
+	    !(cp->cache_cflags & KMC_KMEM_ALLOC)) {
+		KMEM_BUFTAG_LITE_ENTER(btp, kmem_lite_count, caller);
+	}
+
+	return (0);
+}
+
+static int
+kmem_cache_free_debug(kmem_cache_t *cp, void *buf, caddr_t caller)
+{
+	kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+	kmem_bufctl_audit_t *bcp = (kmem_bufctl_audit_t *)btp->bt_bufctl;
+	kmem_slab_t *sp;
+
+	if (btp->bt_bxstat != ((intptr_t)bcp ^ KMEM_BUFTAG_ALLOC)) {
+		if (btp->bt_bxstat == ((intptr_t)bcp ^ KMEM_BUFTAG_FREE)) {
+			kmem_error(KMERR_DUPFREE, cp, buf);
+			return (-1);
+		}
+		sp = kmem_findslab(cp, buf);
+		if (sp == NULL || sp->slab_cache != cp)
+			kmem_error(KMERR_BADADDR, cp, buf);
+		else
+			kmem_error(KMERR_REDZONE, cp, buf);
+		return (-1);
+	}
+
+	btp->bt_bxstat = (intptr_t)bcp ^ KMEM_BUFTAG_FREE;
+
+	if ((cp->cache_flags & KMF_HASH) && bcp->bc_addr != buf) {
+		kmem_error(KMERR_BADBUFCTL, cp, buf);
+		return (-1);
+	}
+
+	if (btp->bt_redzone != KMEM_REDZONE_PATTERN) {
+		kmem_error(KMERR_REDZONE, cp, buf);
+		return (-1);
+	}
+
+	if (cp->cache_flags & KMF_AUDIT) {
+		if (cp->cache_flags & KMF_CONTENTS)
+			bcp->bc_contents = kmem_log_enter(kmem_content_log,
+			    buf, cp->cache_contents);
+		KMEM_AUDIT(kmem_transaction_log, cp, bcp);
+	}
+
+	if ((cp->cache_flags & KMF_LITE) &&
+	    !(cp->cache_cflags & KMC_KMEM_ALLOC)) {
+		KMEM_BUFTAG_LITE_ENTER(btp, kmem_lite_count, caller);
+	}
+
+	if (cp->cache_flags & KMF_DEADBEEF) {
+		if (cp->cache_flags & KMF_LITE)
+			btp->bt_redzone = *(uint64_t *)buf;
+		else if (cp->cache_destructor != NULL)
+			cp->cache_destructor(buf, cp->cache_private);
+
+		copy_pattern(KMEM_FREE_PATTERN, buf, cp->cache_verify);
+	}
+
+	return (0);
+}
+
+/*
+ * Free each object in magazine mp to cp's slab layer, and free mp itself.
+ */
+static void
+kmem_magazine_destroy(kmem_cache_t *cp, kmem_magazine_t *mp, int nrounds)
+{
+	int round;
+
+	ASSERT(!list_link_active(&cp->cache_link) ||
+	    taskq_member(kmem_taskq, curthread));
+
+	for (round = 0; round < nrounds; round++) {
+		void *buf = mp->mag_round[round];
+
+		if (cp->cache_flags & KMF_DEADBEEF) {
+			if (verify_pattern(KMEM_FREE_PATTERN, buf,
+			    cp->cache_verify) != NULL) {
+				kmem_error(KMERR_MODIFIED, cp, buf);
+				continue;
+			}
+			if ((cp->cache_flags & KMF_LITE) &&
+			    cp->cache_destructor != NULL) {
+				kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+				*(uint64_t *)buf = btp->bt_redzone;
+				cp->cache_destructor(buf, cp->cache_private);
+				*(uint64_t *)buf = KMEM_FREE_PATTERN;
+			}
+		} else if (cp->cache_destructor != NULL) {
+			cp->cache_destructor(buf, cp->cache_private);
+		}
+
+		kmem_slab_free(cp, buf);
+		kpreempt(KPREEMPT_SYNC);
+	}
+	ASSERT(KMEM_MAGAZINE_VALID(cp, mp));
+	kmem_cache_free(cp->cache_magtype->mt_cache, mp);
+}
+
+/*
+ * Allocate a magazine from the depot.
+ */
+static kmem_magazine_t *
+kmem_depot_alloc(kmem_cache_t *cp, kmem_maglist_t *mlp)
+{
+	kmem_magazine_t *mp;
+
+	/*
+	 * If we can't get the depot lock without contention,
+	 * update our contention count.  We use the depot
+	 * contention rate to determine whether we need to
+	 * increase the magazine size for better scalability.
+	 */
+	if (!mutex_tryenter(&cp->cache_depot_lock)) {
+		mutex_enter(&cp->cache_depot_lock);
+		cp->cache_depot_contention++;
+	}
+
+	if ((mp = mlp->ml_list) != NULL) {
+		ASSERT(KMEM_MAGAZINE_VALID(cp, mp));
+		mlp->ml_list = mp->mag_next;
+		if (--mlp->ml_total < mlp->ml_min)
+			mlp->ml_min = mlp->ml_total;
+		mlp->ml_alloc++;
+	}
+
+	mutex_exit(&cp->cache_depot_lock);
+
+	return (mp);
+}
+
+/*
+ * Free a magazine to the depot.
+ */
+static void
+kmem_depot_free(kmem_cache_t *cp, kmem_maglist_t *mlp, kmem_magazine_t *mp)
+{
+	mutex_enter(&cp->cache_depot_lock);
+	ASSERT(KMEM_MAGAZINE_VALID(cp, mp));
+	mp->mag_next = mlp->ml_list;
+	mlp->ml_list = mp;
+	mlp->ml_total++;
+	mutex_exit(&cp->cache_depot_lock);
+}
+
+/*
+ * Update the working set statistics for cp's depot.
+ */
+static void
+kmem_depot_ws_update(kmem_cache_t *cp)
+{
+	mutex_enter(&cp->cache_depot_lock);
+	cp->cache_full.ml_reaplimit = cp->cache_full.ml_min;
+	cp->cache_full.ml_min = cp->cache_full.ml_total;
+	cp->cache_empty.ml_reaplimit = cp->cache_empty.ml_min;
+	cp->cache_empty.ml_min = cp->cache_empty.ml_total;
+	mutex_exit(&cp->cache_depot_lock);
+}
+
+/*
+ * Set the working set statistics for cp's depot to zero. (Everything is
+ * eligible for reaping.)
+ */
+void
+kmem_depot_ws_zero(kmem_cache_t *cp)
+{
+	mutex_enter(&cp->cache_depot_lock);
+	cp->cache_full.ml_reaplimit = cp->cache_full.ml_total;
+	cp->cache_full.ml_min = cp->cache_full.ml_total;
+	cp->cache_empty.ml_reaplimit = cp->cache_empty.ml_total;
+	cp->cache_empty.ml_min = cp->cache_empty.ml_total;
+	mutex_exit(&cp->cache_depot_lock);
+}
+
+/*
+ * The number of bytes to reap before we call kpreempt(). The default (1MB)
+ * causes us to preempt reaping up to hundres of times per second.  Using a
+ * larger value (1GB) causes this to have virtually no effect.
+ */
+size_t kmem_reap_preempt_bytes = 64 * 1024 * 1024;
+
+
+/*
+ * Reap all magazines that have fallen out of the depot's working set.
+ */
+static void
+kmem_depot_ws_reap(kmem_cache_t *cp)
+{
+	size_t bytes = 0;
+	long reap;
+	kmem_magazine_t *mp;
+
+	ASSERT(!list_link_active(&cp->cache_link) ||
+	    taskq_member(kmem_taskq, curthread));
+
+	reap = MIN(cp->cache_full.ml_reaplimit, cp->cache_full.ml_min);
+	while (reap-- &&
+	    (mp = kmem_depot_alloc(cp, &cp->cache_full)) != NULL) {
+		kmem_magazine_destroy(cp, mp, cp->cache_magtype->mt_magsize);
+		bytes += cp->cache_magtype->mt_magsize * cp->cache_bufsize;
+		if (bytes > kmem_reap_preempt_bytes) {
+			kpreempt(KPREEMPT_SYNC);
+			bytes = 0;
+		}
+	}
+
+	reap = MIN(cp->cache_empty.ml_reaplimit, cp->cache_empty.ml_min);
+	while (reap-- &&
+	    (mp = kmem_depot_alloc(cp, &cp->cache_empty)) != NULL) {
+		kmem_magazine_destroy(cp, mp, 0);
+		bytes += cp->cache_magtype->mt_magsize * cp->cache_bufsize;
+		if (bytes > kmem_reap_preempt_bytes) {
+			kpreempt(KPREEMPT_SYNC);
+			bytes = 0;
+		}
+	}
+}
+
+static void
+kmem_cpu_reload(kmem_cpu_cache_t *ccp, kmem_magazine_t *mp, int rounds)
+{
+	ASSERT((ccp->cc_loaded == NULL && ccp->cc_rounds == -1) ||
+	    (ccp->cc_loaded && ccp->cc_rounds + rounds == ccp->cc_magsize));
+	ASSERT(ccp->cc_magsize > 0);
+
+	ccp->cc_ploaded = ccp->cc_loaded;
+	ccp->cc_prounds = ccp->cc_rounds;
+	ccp->cc_loaded = mp;
+	ccp->cc_rounds = rounds;
+}
+
+/*
+ * Intercept kmem alloc/free calls during crash dump in order to avoid
+ * changing kmem state while memory is being saved to the dump device.
+ * Otherwise, ::kmem_verify will report "corrupt buffers".  Note that
+ * there are no locks because only one CPU calls kmem during a crash
+ * dump. To enable this feature, first create the associated vmem
+ * arena with VMC_DUMPSAFE.
+ */
+static void *kmem_dump_start;	/* start of pre-reserved heap */
+static void *kmem_dump_end;	/* end of heap area */
+static void *kmem_dump_curr;	/* current free heap pointer */
+static size_t kmem_dump_size;	/* size of heap area */
+
+/* append to each buf created in the pre-reserved heap */
+typedef struct kmem_dumpctl {
+	void	*kdc_next;	/* cache dump free list linkage */
+} kmem_dumpctl_t;
+
+#define	KMEM_DUMPCTL(cp, buf)	\
+((kmem_dumpctl_t *)P2ROUNDUP((uintptr_t)(buf) + (cp)->cache_bufsize, \
+sizeof (void *)))
+
+/* Keep some simple stats. */
+#define	KMEM_DUMP_LOGS	(100)
+
+typedef struct kmem_dump_log {
+	kmem_cache_t	*kdl_cache;
+	uint_t		kdl_allocs;		/* # of dump allocations */
+	uint_t		kdl_frees;		/* # of dump frees */
+	uint_t		kdl_alloc_fails;	/* # of allocation failures */
+	uint_t		kdl_free_nondump;	/* # of non-dump frees */
+	uint_t		kdl_unsafe;		/* cache was used, but unsafe */
+} kmem_dump_log_t;
+
+static kmem_dump_log_t *kmem_dump_log;
+static int kmem_dump_log_idx;
+
+#define	KDI_LOG(cp, stat) {						\
+kmem_dump_log_t *kdl;						\
+if ((kdl = (kmem_dump_log_t *)((cp)->cache_dumplog)) != NULL) {	\
+kdl->stat++;						\
+} else if (kmem_dump_log_idx < KMEM_DUMP_LOGS) {		\
+kdl = &kmem_dump_log[kmem_dump_log_idx++];		\
+kdl->stat++;						\
+kdl->kdl_cache = (cp);					\
+(cp)->cache_dumplog = kdl;				\
+}								\
+}
+
+/* set non zero for full report */
+uint_t kmem_dump_verbose = 0;
+
+/* stats for overize heap */
+uint_t kmem_dump_oversize_allocs = 0;
+uint_t kmem_dump_oversize_max = 0;
+
+static void
+kmem_dumppr(char **pp, char *e, const char *format, ...)
+{
+	char *p = *pp;
+
+	if (p < e) {
+		int n;
+		va_list ap;
+
+		va_start(ap, format);
+		n = vsnprintf(p, e - p, format, ap);
+		va_end(ap);
+		*pp = p + n;
+	}
+}
+
+/*
+ * Called when dumpadm(1M) configures dump parameters.
+ */
+void
+kmem_dump_init(size_t size)
+{
+	if (kmem_dump_start != NULL)
+		zfs_kmem_free(kmem_dump_start, kmem_dump_size);
+
+	if (kmem_dump_log == NULL)
+		kmem_dump_log =
+		    (kmem_dump_log_t *)zfs_kmem_zalloc(
+		    KMEM_DUMP_LOGS * sizeof (kmem_dump_log_t), KM_SLEEP);
+
+	kmem_dump_start = zfs_kmem_alloc(size, KM_SLEEP);
+
+	if (kmem_dump_start != NULL) {
+		kmem_dump_size = size;
+		kmem_dump_curr = kmem_dump_start;
+		kmem_dump_end = (void *)((char *)kmem_dump_start + size);
+		copy_pattern(KMEM_UNINITIALIZED_PATTERN, kmem_dump_start, size);
+	} else {
+		kmem_dump_size = 0;
+		kmem_dump_curr = NULL;
+		kmem_dump_end = NULL;
+	}
+}
+
+/*
+ * Set flag for each kmem_cache_t if is safe to use alternate dump
+ * memory. Called just before panic crash dump starts. Set the flag
+ * for the calling CPU.
+ */
+void
+kmem_dump_begin(void)
+{
+	if (kmem_dump_start != NULL) {
+		kmem_cache_t *cp;
+
+		for (cp = list_head(&kmem_caches); cp != NULL;
+		    cp = list_next(&kmem_caches, cp)) {
+			kmem_cpu_cache_t *ccp = KMEM_CPU_CACHE(cp);
+
+			if (cp->cache_arena->vm_cflags & VMC_DUMPSAFE) {
+				cp->cache_flags |= KMF_DUMPDIVERT;
+				ccp->cc_flags |= KMF_DUMPDIVERT;
+				ccp->cc_dump_rounds = ccp->cc_rounds;
+				ccp->cc_dump_prounds = ccp->cc_prounds;
+				ccp->cc_rounds = ccp->cc_prounds = -1;
+			} else {
+				cp->cache_flags |= KMF_DUMPUNSAFE;
+				ccp->cc_flags |= KMF_DUMPUNSAFE;
+			}
+		}
+	}
+}
+
+/*
+ * finished dump intercept
+ * print any warnings on the console
+ * return verbose information to dumpsys() in the given buffer
+ */
+size_t
+kmem_dump_finish(char *buf, size_t size)
+{
+	int kdi_idx;
+	int kdi_end = kmem_dump_log_idx;
+	int percent = 0;
+	int header = 0;
+	int warn = 0;
+	size_t used;
+	kmem_cache_t *cp;
+	kmem_dump_log_t *kdl;
+	char *e = buf + size;
+	char *p = buf;
+
+	if (kmem_dump_size == 0 || kmem_dump_verbose == 0)
+		return (0);
+
+	used = (char *)kmem_dump_curr - (char *)kmem_dump_start;
+	percent = (used * 100) / kmem_dump_size;
+
+	kmem_dumppr(&p, e, "%% heap used,%d\n", percent);
+	kmem_dumppr(&p, e, "used bytes,%ld\n", used);
+	kmem_dumppr(&p, e, "heap size,%ld\n", kmem_dump_size);
+	kmem_dumppr(&p, e, "Oversize allocs,%d\n",
+	    kmem_dump_oversize_allocs);
+	kmem_dumppr(&p, e, "Oversize max size,%ld\n",
+	    kmem_dump_oversize_max);
+
+	for (kdi_idx = 0; kdi_idx < kdi_end; kdi_idx++) {
+		kdl = &kmem_dump_log[kdi_idx];
+		cp = kdl->kdl_cache;
+		if (cp == NULL)
+			break;
+		if (kdl->kdl_alloc_fails)
+			++warn;
+		if (header == 0) {
+			kmem_dumppr(&p, e,
+			    "Cache Name,Allocs,Frees,Alloc Fails,"
+			    "Nondump Frees,Unsafe Allocs/Frees\n");
+			header = 1;
+		}
+		kmem_dumppr(&p, e, "%s,%d,%d,%d,%d,%d\n",
+		    cp->cache_name, kdl->kdl_allocs, kdl->kdl_frees,
+		    kdl->kdl_alloc_fails, kdl->kdl_free_nondump,
+		    kdl->kdl_unsafe);
+	}
+
+	/* return buffer size used */
+	if (p < e)
+		bzero(p, e - p);
+	return (p - buf);
+}
+
+/*
+ * Allocate a constructed object from alternate dump memory.
+ */
+void *
+kmem_cache_alloc_dump(kmem_cache_t *cp, int kmflag)
+{
+	void *buf;
+	void *curr;
+	char *bufend;
+
+	/* return a constructed object */
+	if ((buf = cp->cache_dumpfreelist) != NULL) {
+		cp->cache_dumpfreelist = KMEM_DUMPCTL(cp, buf)->kdc_next;
+		KDI_LOG(cp, kdl_allocs);
+		return (buf);
+	}
+
+	/* create a new constructed object */
+	curr = kmem_dump_curr;
+	buf = (void *)P2ROUNDUP((uintptr_t)curr, cp->cache_align);
+	bufend = (char *)KMEM_DUMPCTL(cp, buf) + sizeof (kmem_dumpctl_t);
+
+	/* hat layer objects cannot cross a page boundary */
+	if (cp->cache_align < PAGESIZE) {
+		char *page = (char *)P2ROUNDUP((uintptr_t)buf, PAGESIZE);
+		if (bufend > page) {
+			bufend += page - (char *)buf;
+			buf = (void *)page;
+		}
+	}
+
+	/* fall back to normal alloc if reserved area is used up */
+	if (bufend > (char *)kmem_dump_end) {
+		kmem_dump_curr = kmem_dump_end;
+		KDI_LOG(cp, kdl_alloc_fails);
+		return (NULL);
+	}
+
+	/*
+	 * Must advance curr pointer before calling a constructor that
+	 * may also allocate memory.
+	 */
+	kmem_dump_curr = bufend;
+
+	/* run constructor */
+	if (cp->cache_constructor != NULL &&
+	    cp->cache_constructor(buf, cp->cache_private, kmflag)
+	    != 0) {
+#ifdef DEBUG
+		printf("name='%s' cache=0x%p: kmem cache constructor failed\n",
+		    cp->cache_name, (void *)cp);
+#endif
+		/* reset curr pointer iff no allocs were done */
+		if (kmem_dump_curr == bufend)
+			kmem_dump_curr = curr;
+
+		/* fall back to normal alloc if the constructor fails */
+		KDI_LOG(cp, kdl_alloc_fails);
+		return (NULL);
+	}
+
+	KDI_LOG(cp, kdl_allocs);
+	return (buf);
+}
+
+/*
+ * Free a constructed object in alternate dump memory.
+ */
+int
+kmem_cache_free_dump(kmem_cache_t *cp, void *buf)
+{
+	/* save constructed buffers for next time */
+	if ((char *)buf >= (char *)kmem_dump_start &&
+	    (char *)buf < (char *)kmem_dump_end) {
+		KMEM_DUMPCTL(cp, buf)->kdc_next = cp->cache_dumpfreelist;
+		cp->cache_dumpfreelist = buf;
+		KDI_LOG(cp, kdl_frees);
+		return (0);
+	}
+
+	/* count all non-dump buf frees */
+	KDI_LOG(cp, kdl_free_nondump);
+
+	/* just drop buffers that were allocated before dump started */
+	if (kmem_dump_curr < kmem_dump_end)
+		return (0);
+
+	/* fall back to normal free if reserved area is used up */
+	return (1);
+}
+
+/*
+ * Allocate a constructed object from cache cp.
+ */
+void *
+kmem_cache_alloc(kmem_cache_t *cp, int kmflag)
+{
+	kmem_cpu_cache_t *ccp = KMEM_CPU_CACHE(cp);
+	kmem_magazine_t *fmp;
+	void *buf;
+	mutex_enter(&ccp->cc_lock);
+	for (;;) {
+		/*
+		 * If there's an object available in the current CPU's
+		 * loaded magazine, just take it and return.
+		 */
+		if (ccp->cc_rounds > 0) {
+			buf = ccp->cc_loaded->mag_round[--ccp->cc_rounds];
+			ccp->cc_alloc++;
+			mutex_exit(&ccp->cc_lock);
+			if (ccp->cc_flags & (KMF_BUFTAG | KMF_DUMPUNSAFE)) {
+				if (ccp->cc_flags & KMF_DUMPUNSAFE) {
+					ASSERT(!(ccp->cc_flags &
+					    KMF_DUMPDIVERT));
+					KDI_LOG(cp, kdl_unsafe);
+				}
+				if ((ccp->cc_flags & KMF_BUFTAG) &&
+				    kmem_cache_alloc_debug(cp, buf, kmflag, 0,
+				    caller()) != 0) {
+						if (kmflag & KM_NOSLEEP)
+							return (NULL);
+						mutex_enter(&ccp->cc_lock);
+						continue;
+					}
+			}
+			return (buf);
+		}
+
+		/*
+		 * The loaded magazine is empty.  If the previously loaded
+		 * magazine was full, exchange them and try again.
+		 */
+		if (ccp->cc_prounds > 0) {
+			kmem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds);
+			continue;
+		}
+
+		/*
+		 * Return an alternate buffer at dump time to preserve
+		 * the heap.
+		 */
+		if (ccp->cc_flags & (KMF_DUMPDIVERT | KMF_DUMPUNSAFE)) {
+			if (ccp->cc_flags & KMF_DUMPUNSAFE) {
+				ASSERT(!(ccp->cc_flags & KMF_DUMPDIVERT));
+				/* log it so that we can warn about it */
+				KDI_LOG(cp, kdl_unsafe);
+			} else {
+				if ((buf = kmem_cache_alloc_dump(cp, kmflag)) !=
+				    NULL) {
+					mutex_exit(&ccp->cc_lock);
+					return (buf);
+				}
+				break;		/* fall back to slab layer */
+			}
+		}
+
+		/*
+		 * If the magazine layer is disabled, break out now.
+		 */
+		if (ccp->cc_magsize == 0)
+			break;
+
+		/*
+		 * Try to get a full magazine from the depot.
+		 */
+		fmp = kmem_depot_alloc(cp, &cp->cache_full);
+		if (fmp != NULL) {
+			if (ccp->cc_ploaded != NULL)
+				kmem_depot_free(cp, &cp->cache_empty,
+				    ccp->cc_ploaded);
+			kmem_cpu_reload(ccp, fmp, ccp->cc_magsize);
+			continue;
+		}
+
+		/*
+		 * There are no full magazines in the depot,
+		 * so fall through to the slab layer.
+		 */
+		break;
+	}
+	mutex_exit(&ccp->cc_lock);
+
+	/*
+	 * We couldn't allocate a constructed object from the magazine layer,
+	 * so get a raw buffer from the slab layer and apply its constructor.
+	 */
+	buf = kmem_slab_alloc(cp, kmflag);
+
+	if (buf == NULL)
+		return (NULL);
+
+	if (cp->cache_flags & KMF_BUFTAG) {
+		/*
+		 * Make kmem_cache_alloc_debug() apply the constructor for us.
+		 */
+		int rc = kmem_cache_alloc_debug(cp, buf, kmflag, 1, caller());
+		if (rc != 0) {
+			if (kmflag & KM_NOSLEEP)
+				return (NULL);
+			/*
+			 * kmem_cache_alloc_debug() detected corruption
+			 * but didn't panic (kmem_panic <= 0). We should not be
+			 * here because the constructor failed (indicated by a
+			 * return code of 1). Try again.
+			 */
+			ASSERT(rc == -1);
+			return (kmem_cache_alloc(cp, kmflag));
+		}
+		return (buf);
+	}
+
+	if (cp->cache_constructor != NULL &&
+	    cp->cache_constructor(buf, cp->cache_private, kmflag) != 0) {
+		atomic_inc_64(&cp->cache_alloc_fail);
+		kmem_slab_free(cp, buf);
+		return (NULL);
+	}
+
+	return (buf);
+}
+
+/*
+ * The freed argument tells whether or not kmem_cache_free_debug() has already
+ * been called so that we can avoid the duplicate free error. For example, a
+ * buffer on a magazine has already been freed by the client but is still
+ * constructed.
+ */
+static void
+kmem_slab_free_constructed(kmem_cache_t *cp, void *buf, boolean_t freed)
+{
+	if (!freed && (cp->cache_flags & KMF_BUFTAG))
+		if (kmem_cache_free_debug(cp, buf, caller()) == -1)
+			return;
+
+	/*
+	 * Note that if KMF_DEADBEEF is in effect and KMF_LITE is not,
+	 * kmem_cache_free_debug() will have already applied the destructor.
+	 */
+	if ((cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) != KMF_DEADBEEF &&
+	    cp->cache_destructor != NULL) {
+		if (cp->cache_flags & KMF_DEADBEEF) {	/* KMF_LITE implied */
+			kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+			*(uint64_t *)buf = btp->bt_redzone;
+			cp->cache_destructor(buf, cp->cache_private);
+			*(uint64_t *)buf = KMEM_FREE_PATTERN;
+		} else {
+			cp->cache_destructor(buf, cp->cache_private);
+		}
+	}
+
+	kmem_slab_free(cp, buf);
+}
+
+/*
+ * Used when there's no room to free a buffer to the per-CPU cache.
+ * Drops and re-acquires &ccp->cc_lock, and returns non-zero if the
+ * caller should try freeing to the per-CPU cache again.
+ * Note that we don't directly install the magazine in the cpu cache,
+ * since its state may have changed wildly while the lock was dropped.
+ */
+static int
+kmem_cpucache_magazine_alloc(kmem_cpu_cache_t *ccp, kmem_cache_t *cp)
+{
+	kmem_magazine_t *emp;
+	kmem_magtype_t *mtp;
+
+	ASSERT(MUTEX_HELD(&ccp->cc_lock));
+	ASSERT(((uint_t)ccp->cc_rounds == ccp->cc_magsize ||
+	    ((uint_t)ccp->cc_rounds == -1)) &&
+	    ((uint_t)ccp->cc_prounds == ccp->cc_magsize ||
+	    ((uint_t)ccp->cc_prounds == -1)));
+
+	emp = kmem_depot_alloc(cp, &cp->cache_empty);
+	if (emp != NULL) {
+		if (ccp->cc_ploaded != NULL)
+			kmem_depot_free(cp, &cp->cache_full,
+			    ccp->cc_ploaded);
+		kmem_cpu_reload(ccp, emp, 0);
+		return (1);
+	}
+	/*
+	 * There are no empty magazines in the depot,
+	 * so try to allocate a new one.  We must drop all locks
+	 * across kmem_cache_alloc() because lower layers may
+	 * attempt to allocate from this cache.
+	 */
+	mtp = cp->cache_magtype;
+	mutex_exit(&ccp->cc_lock);
+	emp = kmem_cache_alloc(mtp->mt_cache, KM_NOSLEEP);
+	mutex_enter(&ccp->cc_lock);
+
+	if (emp != NULL) {
+		/*
+		 * We successfully allocated an empty magazine.
+		 * However, we had to drop ccp->cc_lock to do it,
+		 * so the cache's magazine size may have changed.
+		 * If so, free the magazine and try again.
+		 */
+		if (ccp->cc_magsize != mtp->mt_magsize) {
+			mutex_exit(&ccp->cc_lock);
+			kmem_cache_free(mtp->mt_cache, emp);
+			mutex_enter(&ccp->cc_lock);
+			return (1);
+		}
+
+		/*
+		 * We got a magazine of the right size.  Add it to
+		 * the depot and try the whole dance again.
+		 */
+		kmem_depot_free(cp, &cp->cache_empty, emp);
+		return (1);
+	}
+
+	/*
+	 * We couldn't allocate an empty magazine,
+	 * so fall through to the slab layer.
+	 */
+	return (0);
+}
+
+/*
+ * If the cache's parent arena is a leaf arena (i.e., it imports all its memory)
+ * then we can consider it fragmented if either there is 1 GiB free in the arena
+ * or one eighth of the arena is free.
+ *
+ * This is useful in kmem_cache_free{_debug} to determine whether to free to the
+ * slab layer if the loaded magazine is full.
+ */
+static inline boolean_t
+kmem_cache_parent_arena_fragmented(kmem_cache_t *cp)
+{
+		const vmem_kstat_t *kp = &cp->cache_arena->vm_kstat;
+		const int64_t vk_import = kp->vk_mem_import.value.ui64;
+		const int64_t vk_inuse = kp->vk_mem_inuse.value.ui64;
+		const int64_t vk_total = kp->vk_mem_total.value.ui64;
+
+		if (vk_import == vk_total && vk_inuse < vk_total) {
+			const int64_t vk_free = vk_total - vk_inuse;
+			const int64_t highthresh = 1024LL*1024LL*1024LL;
+			// we are fragmented if we have 1GiB free
+			if (vk_free >= highthresh)
+				return (B_TRUE);
+			// we are fragmented if at least 1/8 of the
+			// total arena space is free
+			if (vk_free > 0 && vk_total > 0) {
+				const int64_t eighth_total = vk_total / 8;
+				if (vk_free >= eighth_total)
+					return (B_TRUE);
+			}
+		}
+		return (B_FALSE);
+}
+
+/*
+ * Free a constructed object to cache cp.
+ */
+void
+kmem_cache_free(kmem_cache_t *cp, void *buf)
+{
+	kmem_cpu_cache_t *ccp = KMEM_CPU_CACHE(cp);
+
+	/*
+	 * The client must not free either of the buffers passed to the move
+	 * callback function.
+	 */
+	ASSERT(cp->cache_defrag == NULL ||
+	    cp->cache_defrag->kmd_thread != spl_current_thread() ||
+	    (buf != cp->cache_defrag->kmd_from_buf &&
+	    buf != cp->cache_defrag->kmd_to_buf));
+
+	if (ccp->cc_flags & (KMF_BUFTAG | KMF_DUMPDIVERT | KMF_DUMPUNSAFE)) {
+		if (ccp->cc_flags & KMF_DUMPUNSAFE) {
+			ASSERT(!(ccp->cc_flags & KMF_DUMPDIVERT));
+			/* log it so that we can warn about it */
+			KDI_LOG(cp, kdl_unsafe);
+		} else if (KMEM_DUMPCC(ccp) && !kmem_cache_free_dump(cp, buf)) {
+			return;
+		}
+		if (ccp->cc_flags & KMF_BUFTAG) {
+			if (kmem_cache_free_debug(cp, buf, caller()) == -1)
+				return;
+		}
+	}
+
+	mutex_enter(&ccp->cc_lock);
+	/*
+	 * Any changes to this logic should be reflected in kmem_slab_prefill()
+	 */
+	for (;;) {
+		/*
+		 * If there's a slot available in the current CPU's
+		 * loaded magazine, just put the object there and return.
+		 */
+		if ((uint_t)ccp->cc_rounds < ccp->cc_magsize) {
+			ccp->cc_loaded->mag_round[ccp->cc_rounds++] = buf;
+			ccp->cc_free++;
+			mutex_exit(&ccp->cc_lock);
+			return;
+		}
+
+		/*
+		 * If the magazine layer is disabled, break out now.
+		 */
+		if (ccp->cc_magsize == 0) {
+			break;
+		}
+
+		/*
+		 * The magazine layer is on, but the loaded magazine is now
+		 * full (of allocatable constructed elements).
+		 *
+		 * If the cache's arena is badly fragmented, break out now;
+		 * this frees to the slab layer.
+		 *
+		 * Note: this is not reflected in kmem_slab_prefill() which
+		 * deals with a freshly allocated slab.
+		 */
+
+		if (kmem_free_to_slab_when_fragmented == 1 &&
+		    kmem_cache_parent_arena_fragmented(cp))
+			break;
+
+		/*
+		 * The loaded magazine is full.  If the previously loaded
+		 * magazine was empty, exchange them and try again.
+		 */
+		if (ccp->cc_prounds == 0) {
+			kmem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds);
+			continue;
+		}
+
+		if (!kmem_cpucache_magazine_alloc(ccp, cp)) {
+			/*
+			 * We couldn't free our constructed object to the
+			 * magazine layer, so apply its destructor and free it
+			 * to the slab layer.
+			 */
+			break;
+		}
+	}
+	mutex_exit(&ccp->cc_lock);
+	kpreempt(KPREEMPT_SYNC);
+	kmem_slab_free_constructed(cp, buf, B_TRUE);
+}
+
+/*
+ * Free a constructed object to cache cp.
+ * Do not free to the magazine layer.
+ * This is essentially just kmem_cache_free() without
+ * the for(;;) loop or the ccp critical section.
+ */
+void
+kmem_cache_free_to_slab(kmem_cache_t *cp, void *buf)
+{
+	kmem_cpu_cache_t *ccp = KMEM_CPU_CACHE(cp);
+
+	/*
+	 * The client must not free either of the buffers passed to the move
+	 * callback function.
+	 */
+	ASSERT(cp->cache_defrag == NULL ||
+	    cp->cache_defrag->kmd_thread != spl_current_thread() ||
+	    (buf != cp->cache_defrag->kmd_from_buf &&
+	    buf != cp->cache_defrag->kmd_to_buf));
+
+	if (ccp->cc_flags & (KMF_BUFTAG | KMF_DUMPDIVERT | KMF_DUMPUNSAFE)) {
+		if (ccp->cc_flags & KMF_DUMPUNSAFE) {
+			ASSERT(!(ccp->cc_flags & KMF_DUMPDIVERT));
+			/* log it so that we can warn about it */
+			KDI_LOG(cp, kdl_unsafe);
+		} else if (KMEM_DUMPCC(ccp) && !kmem_cache_free_dump(cp, buf)) {
+			return;
+		}
+		if (ccp->cc_flags & KMF_BUFTAG) {
+			if (kmem_cache_free_debug(cp, buf, caller()) == -1)
+				return;
+		}
+	}
+
+	/* omitted the for(;;) loop from kmem_cache_free */
+	/* also do not take ccp mutex */
+
+	kmem_slab_free_constructed(cp, buf, B_TRUE);
+}
+
+static void
+kmem_slab_prefill(kmem_cache_t *cp, kmem_slab_t *sp)
+{
+	kmem_cpu_cache_t *ccp = KMEM_CPU_CACHE(cp);
+
+	kmem_bufctl_t *next, *head;
+	size_t nbufs;
+
+	/*
+	 * Completely allocate the newly created slab and put the pre-allocated
+	 * buffers in magazines. Any of the buffers that cannot be put in
+	 * magazines must be returned to the slab.
+	 */
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	ASSERT(cp->cache_constructor == NULL);
+	ASSERT(sp->slab_cache == cp);
+	ASSERT(sp->slab_refcnt == 1);
+	ASSERT(sp->slab_head != NULL && sp->slab_chunks > sp->slab_refcnt);
+	ASSERT(avl_find(&cp->cache_partial_slabs, sp, NULL) == NULL);
+
+	head = sp->slab_head;
+	nbufs = (sp->slab_chunks - sp->slab_refcnt);
+	sp->slab_head = NULL;
+	sp->slab_refcnt += nbufs;
+	cp->cache_bufslab -= nbufs;
+	cp->cache_slab_alloc += nbufs;
+	list_insert_head(&cp->cache_complete_slabs, sp);
+	cp->cache_complete_slab_count++;
+	mutex_exit(&cp->cache_lock);
+	mutex_enter(&ccp->cc_lock);
+
+	while (head != NULL) {
+		void *buf = KMEM_BUF(cp, head);
+		/*
+		 * If there's a slot available in the current CPU's
+		 * loaded magazine, just put the object there and
+		 * continue.
+		 */
+		if ((uint_t)ccp->cc_rounds < ccp->cc_magsize) {
+			ccp->cc_loaded->mag_round[ccp->cc_rounds++] =
+			    buf;
+			ccp->cc_free++;
+			nbufs--;
+			head = head->bc_next;
+			continue;
+		}
+
+		/*
+		 * The loaded magazine is full.  If the previously
+		 * loaded magazine was empty, exchange them and try
+		 * again.
+		 */
+		if (ccp->cc_prounds == 0) {
+			kmem_cpu_reload(ccp, ccp->cc_ploaded,
+			    ccp->cc_prounds);
+			continue;
+		}
+
+		/*
+		 * If the magazine layer is disabled, break out now.
+		 */
+
+		if (ccp->cc_magsize == 0) {
+			break;
+		}
+
+		if (!kmem_cpucache_magazine_alloc(ccp, cp))
+			break;
+	}
+	mutex_exit(&ccp->cc_lock);
+	if (nbufs != 0) {
+		ASSERT(head != NULL);
+
+		/*
+		 * If there was a failure, return remaining objects to
+		 * the slab
+		 */
+		while (head != NULL) {
+			ASSERT(nbufs != 0);
+			next = head->bc_next;
+			head->bc_next = NULL;
+			kmem_slab_free(cp, KMEM_BUF(cp, head));
+			head = next;
+			nbufs--;
+		}
+	}
+	ASSERT(head == NULL);
+	ASSERT(nbufs == 0);
+	mutex_enter(&cp->cache_lock);
+}
+
+void *
+zfs_kmem_zalloc(size_t size, int kmflag)
+{
+	size_t index;
+	void *buf;
+
+	if ((index = ((size - 1) >> KMEM_ALIGN_SHIFT)) < KMEM_ALLOC_TABLE_MAX) {
+		kmem_cache_t *cp = kmem_alloc_table[index];
+		buf = kmem_cache_alloc(cp, kmflag);
+		if (buf != NULL) {
+			if ((cp->cache_flags & KMF_BUFTAG) && !KMEM_DUMP(cp)) {
+				kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+				((uint8_t *)buf)[size] = KMEM_REDZONE_BYTE;
+				((uint32_t *)btp)[1] = KMEM_SIZE_ENCODE(size);
+
+				if (cp->cache_flags & KMF_LITE) {
+					KMEM_BUFTAG_LITE_ENTER(btp,
+					    kmem_lite_count, caller());
+				}
+			}
+			bzero(buf, size);
+		}
+	} else {
+		buf = zfs_kmem_alloc(size, kmflag);
+		if (buf != NULL)
+			bzero(buf, size);
+	}
+	return (buf);
+}
+
+void *
+zfs_kmem_alloc(size_t size, int kmflag)
+{
+	size_t index;
+	kmem_cache_t *cp;
+	void *buf;
+
+	if (size == 0)
+		return (KMEM_ZERO_SIZE_PTR);
+
+	if ((index = ((size - 1) >> KMEM_ALIGN_SHIFT)) < KMEM_ALLOC_TABLE_MAX) {
+		cp = kmem_alloc_table[index];
+		/* fall through to kmem_cache_alloc() */
+
+	} else if ((index = ((size - 1) >> KMEM_BIG_SHIFT)) <
+	    kmem_big_alloc_table_max) {
+		cp = kmem_big_alloc_table[index];
+		/* fall through to kmem_cache_alloc() */
+
+	} else {
+
+		buf = vmem_alloc(kmem_oversize_arena, size,
+		    kmflag & KM_VMFLAGS);
+		if (buf == NULL)
+			kmem_log_event(kmem_failure_log, NULL, NULL,
+			    (void *)size);
+		else if (KMEM_DUMP(kmem_slab_cache)) {
+			/* stats for dump intercept */
+			kmem_dump_oversize_allocs++;
+			if (size > kmem_dump_oversize_max)
+				kmem_dump_oversize_max = size;
+		}
+		return (buf);
+	}
+
+	buf = kmem_cache_alloc(cp, kmflag);
+	if ((cp->cache_flags & KMF_BUFTAG) && !KMEM_DUMP(cp) && buf != NULL) {
+		kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+		((uint8_t *)buf)[size] = KMEM_REDZONE_BYTE;
+		((uint32_t *)btp)[1] = KMEM_SIZE_ENCODE(size);
+
+		if (cp->cache_flags & KMF_LITE) {
+			KMEM_BUFTAG_LITE_ENTER(btp, kmem_lite_count, caller());
+		}
+	}
+	return (buf);
+}
+
+void
+zfs_kmem_free(void *buf, size_t size)
+{
+	size_t index;
+	kmem_cache_t *cp;
+
+	if (size == 0 || buf == KMEM_ZERO_SIZE_PTR || buf == NULL)
+		return;
+
+	if ((index = (size - 1) >> KMEM_ALIGN_SHIFT) < KMEM_ALLOC_TABLE_MAX) {
+		cp = kmem_alloc_table[index];
+		/* fall through to kmem_cache_free() */
+
+	} else if ((index = ((size - 1) >> KMEM_BIG_SHIFT)) <
+	    kmem_big_alloc_table_max) {
+		cp = kmem_big_alloc_table[index];
+		/* fall through to kmem_cache_free() */
+
+	} else {
+		vmem_free(kmem_oversize_arena, buf, size);
+		return;
+	}
+
+	if ((cp->cache_flags & KMF_BUFTAG) && !KMEM_DUMP(cp)) {
+		kmem_buftag_t *btp = KMEM_BUFTAG(cp, buf);
+		uint32_t *ip = (uint32_t *)btp;
+		if (ip[1] != KMEM_SIZE_ENCODE(size)) {
+			if (*(uint64_t *)buf == KMEM_FREE_PATTERN) {
+				kmem_error(KMERR_DUPFREE, cp, buf);
+				return;
+			}
+			if (KMEM_SIZE_VALID(ip[1])) {
+				ip[0] = KMEM_SIZE_ENCODE(size);
+				kmem_error(KMERR_BADSIZE, cp, buf);
+			} else {
+				kmem_error(KMERR_REDZONE, cp, buf);
+			}
+			return;
+		}
+		if (((uint8_t *)buf)[size] != KMEM_REDZONE_BYTE) {
+			kmem_error(KMERR_REDZONE, cp, buf);
+			return;
+		}
+		btp->bt_redzone = KMEM_REDZONE_PATTERN;
+		if (cp->cache_flags & KMF_LITE) {
+			KMEM_BUFTAG_LITE_ENTER(btp, kmem_lite_count,
+			    caller());
+		}
+	}
+	kmem_cache_free(cp, buf);
+}
+
+/*
+ * Try to allocate at least `size' bytes of memory without sleeping or
+ * panicking. Return actual allocated size in `asize'. If allocation failed,
+ * try final allocation with sleep or panic allowed.
+ */
+void *
+kmem_alloc_tryhard(size_t size, size_t *asize, int kmflag)
+{
+	void *p;
+
+	*asize = P2ROUNDUP(size, KMEM_ALIGN);
+	do {
+		p = kmem_alloc(*asize, (kmflag | KM_NOSLEEP) & ~KM_PANIC);
+		if (p != NULL)
+			return (p);
+		*asize += KMEM_ALIGN;
+	} while (*asize <= PAGESIZE);
+
+	*asize = P2ROUNDUP(size, KMEM_ALIGN);
+	return (zfs_kmem_alloc(*asize, kmflag));
+}
+
+/*
+ * Reclaim all unused memory from a cache.
+ */
+static void
+kmem_cache_reap(kmem_cache_t *cp)
+{
+	ASSERT(taskq_member(kmem_taskq, curthread));
+
+	cp->cache_reap++;
+
+	/*
+	 * Ask the cache's owner to free some memory if possible.
+	 * The idea is to handle things like the inode cache, which
+	 * typically sits on a bunch of memory that it doesn't truly
+	 * *need*.  Reclaim policy is entirely up to the owner; this
+	 * callback is just an advisory plea for help.
+	 */
+	if (cp->cache_reclaim != NULL) {
+		long delta;
+
+		/*
+		 * Reclaimed memory should be reapable (not included in the
+		 * depot's working set).
+		 */
+		delta = cp->cache_full.ml_total;
+		cp->cache_reclaim(cp->cache_private);
+		delta = cp->cache_full.ml_total - delta;
+		if (delta > 0) {
+			mutex_enter(&cp->cache_depot_lock);
+			cp->cache_full.ml_reaplimit += delta;
+			cp->cache_full.ml_min += delta;
+			mutex_exit(&cp->cache_depot_lock);
+		}
+	}
+
+	kmem_depot_ws_reap(cp);
+
+	if (cp->cache_defrag != NULL && !kmem_move_noreap) {
+		kmem_cache_defrag(cp);
+	}
+}
+
+static void
+kmem_reap_timeout(void *flag_arg)
+{
+	uint32_t *flag = (uint32_t *)flag_arg;
+
+	ASSERT(flag == &kmem_reaping || flag == &kmem_reaping_idspace);
+	*flag = 0;
+}
+
+static void
+kmem_reap_done(void *flag)
+{
+	(void) bsd_timeout(kmem_reap_timeout, flag, &kmem_reap_interval);
+}
+
+static void
+kmem_reap_start(void *flag)
+{
+	ASSERT(flag == &kmem_reaping || flag == &kmem_reaping_idspace);
+
+	if (flag == &kmem_reaping) {
+		kmem_cache_applyall(kmem_cache_reap, kmem_taskq, TQ_NOSLEEP);
+		/*
+		 * if we have segkp under heap, reap segkp cache.
+		 */
+	}
+	else
+		kmem_cache_applyall_id(kmem_cache_reap, kmem_taskq, TQ_NOSLEEP);
+
+	/*
+	 * We use taskq_dispatch() to schedule a timeout to clear
+	 * the flag so that kmem_reap() becomes self-throttling:
+	 * we won't reap again until the current reap completes *and*
+	 * at least kmem_reap_interval ticks have elapsed.
+	 */
+	if (!taskq_dispatch(kmem_taskq, kmem_reap_done, flag, TQ_NOSLEEP))
+		kmem_reap_done(flag);
+}
+
+static void
+kmem_reap_common(void *flag_arg)
+{
+	uint32_t *flag = (uint32_t *)flag_arg;
+
+
+	if (MUTEX_HELD(&kmem_cache_lock) || kmem_taskq == NULL ||
+	    atomic_cas_32(flag, 0, 1) != 0)
+		return;
+
+	/*
+	 * It may not be kosher to do memory allocation when a reap is called
+	 * is called (for example, if vmem_populate() is in the call chain).
+	 * So we start the reap going with a TQ_NOALLOC dispatch.  If the
+	 * dispatch fails, we reset the flag, and the next reap will try again.
+	 */
+	if (!taskq_dispatch(kmem_taskq, kmem_reap_start, flag, TQ_NOALLOC))
+		*flag = 0;
+}
+
+/*
+ * Reclaim all unused memory from all caches.  Called from the VM system
+ * when memory gets tight.
+ */
+void
+kmem_reap(void)
+{
+	kmem_reap_common(&kmem_reaping);
+}
+
+/*
+ * Reclaim all unused memory from identifier arenas, called when a vmem
+ * arena not back by memory is exhausted.  Since reaping memory-backed caches
+ * cannot help with identifier exhaustion, we avoid both a large amount of
+ * work and unwanted side-effects from reclaim callbacks.
+ */
+void
+kmem_reap_idspace(void)
+{
+	kmem_reap_common(&kmem_reaping_idspace);
+}
+
+/*
+ * Purge all magazines from a cache and set its magazine limit to zero.
+ * All calls are serialized by the kmem_taskq lock, except for the final
+ * call from kmem_cache_destroy().
+ */
+static void
+kmem_cache_magazine_purge(kmem_cache_t *cp)
+{
+	kmem_cpu_cache_t *ccp;
+	kmem_magazine_t *mp, *pmp;
+	int rounds, prounds, cpu_seqid;
+
+	ASSERT(!list_link_active(&cp->cache_link) ||
+	    taskq_member(kmem_taskq, curthread));
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+		ccp = &cp->cache_cpu[cpu_seqid];
+
+		mutex_enter(&ccp->cc_lock);
+		mp = ccp->cc_loaded;
+		pmp = ccp->cc_ploaded;
+		rounds = ccp->cc_rounds;
+		prounds = ccp->cc_prounds;
+		ccp->cc_loaded = NULL;
+		ccp->cc_ploaded = NULL;
+		ccp->cc_rounds = -1;
+		ccp->cc_prounds = -1;
+		ccp->cc_magsize = 0;
+		mutex_exit(&ccp->cc_lock);
+
+		if (mp)
+			kmem_magazine_destroy(cp, mp, rounds);
+
+		if (pmp)
+			kmem_magazine_destroy(cp, pmp, prounds);
+	}
+
+	kmem_depot_ws_zero(cp);
+	kmem_depot_ws_reap(cp);
+}
+
+/*
+ * Enable per-cpu magazines on a cache.
+ */
+static void
+kmem_cache_magazine_enable(kmem_cache_t *cp)
+{
+	int cpu_seqid;
+
+	if (cp->cache_flags & KMF_NOMAGAZINE)
+		return;
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
+		mutex_enter(&ccp->cc_lock);
+		ccp->cc_magsize = cp->cache_magtype->mt_magsize;
+		mutex_exit(&ccp->cc_lock);
+	}
+
+}
+
+static void
+kmem_cache_magazine_disable(kmem_cache_t *cp)
+{
+	int cpu_seqid;
+
+	if (cp->cache_flags & KMF_NOMAGAZINE)
+		return;
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
+		mutex_enter(&ccp->cc_lock);
+		ccp->cc_magsize = 0;
+		mutex_exit(&ccp->cc_lock);
+	}
+
+}
+
+/*
+ * Allow our caller to determine if there are running reaps.
+ *
+ * This call is very conservative and may return B_TRUE even when
+ * reaping activity isn't active. If it returns B_FALSE, then reaping
+ * activity is definitely inactive.
+ */
+boolean_t
+kmem_cache_reap_active(void)
+{
+	return (!taskq_empty(kmem_taskq));
+}
+
+/*
+ * Reap (almost) everything right now.
+ */
+void
+kmem_cache_reap_now(kmem_cache_t *cp)
+{
+	ASSERT(list_link_active(&cp->cache_link));
+
+	kmem_depot_ws_zero(cp);
+
+	(void) taskq_dispatch(kmem_taskq,
+	    (task_func_t *)kmem_depot_ws_reap, cp, TQ_SLEEP);
+}
+
+/*
+ * Recompute a cache's magazine size.  The trade-off is that larger magazines
+ * provide a higher transfer rate with the depot, while smaller magazines
+ * reduce memory consumption.  Magazine resizing is an expensive operation;
+ * it should not be done frequently.
+ *
+ * Changes to the magazine size are serialized by the kmem_taskq lock.
+ *
+ * Note: at present this only grows the magazine size.  It might be useful
+ * to allow shrinkage too.
+ */
+static void
+kmem_cache_magazine_resize(kmem_cache_t *cp)
+{
+	kmem_magtype_t *mtp = cp->cache_magtype;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+
+	if (cp->cache_chunksize < mtp->mt_maxbuf) {
+		kmem_cache_magazine_purge(cp);
+		mutex_enter(&cp->cache_depot_lock);
+		cp->cache_magtype = ++mtp;
+		cp->cache_depot_contention_prev =
+		    cp->cache_depot_contention + INT_MAX;
+		mutex_exit(&cp->cache_depot_lock);
+		kmem_cache_magazine_enable(cp);
+	}
+}
+
+/*
+ * Rescale a cache's hash table, so that the table size is roughly the
+ * cache size.  We want the average lookup time to be extremely small.
+ */
+static void
+kmem_hash_rescale(kmem_cache_t *cp)
+{
+	kmem_bufctl_t **old_table, **new_table, *bcp;
+	size_t old_size, new_size, h;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+
+	new_size = MAX(KMEM_HASH_INITIAL,
+	    1 << (highbit(3 * cp->cache_buftotal + 4) - 2));
+	old_size = cp->cache_hash_mask + 1;
+
+	if ((old_size >> 1) <= new_size && new_size <= (old_size << 1))
+		return;
+
+	new_table = vmem_alloc(kmem_hash_arena, new_size * sizeof (void *),
+	    VM_NOSLEEP);
+	if (new_table == NULL)
+		return;
+	bzero(new_table, new_size * sizeof (void *));
+
+	mutex_enter(&cp->cache_lock);
+
+	old_size = cp->cache_hash_mask + 1;
+	old_table = cp->cache_hash_table;
+
+	cp->cache_hash_mask = new_size - 1;
+	cp->cache_hash_table = new_table;
+	cp->cache_rescale++;
+
+	for (h = 0; h < old_size; h++) {
+		bcp = old_table[h];
+		while (bcp != NULL) {
+			void *addr = bcp->bc_addr;
+			kmem_bufctl_t *next_bcp = bcp->bc_next;
+			kmem_bufctl_t **hash_bucket = KMEM_HASH(cp, addr);
+			bcp->bc_next = *hash_bucket;
+			*hash_bucket = bcp;
+			bcp = next_bcp;
+		}
+	}
+
+	mutex_exit(&cp->cache_lock);
+
+	vmem_free(kmem_hash_arena, old_table, old_size * sizeof (void *));
+}
+
+/*
+ * Perform periodic maintenance on a cache: hash rescaling, depot working-set
+ * update, magazine resizing, and slab consolidation.
+ */
+static void
+kmem_cache_update(kmem_cache_t *cp)
+{
+	int need_hash_rescale = 0;
+	int need_magazine_resize = 0;
+
+	/*
+	 * If the cache has become much larger or smaller than its hash table,
+	 * fire off a request to rescale the hash table.
+	 */
+	mutex_enter(&cp->cache_lock);
+
+	if ((cp->cache_flags & KMF_HASH) &&
+	    (cp->cache_buftotal > (cp->cache_hash_mask << 1) ||
+	    (cp->cache_buftotal < (cp->cache_hash_mask >> 1) &&
+	    cp->cache_hash_mask > KMEM_HASH_INITIAL)))
+		need_hash_rescale = 1;
+
+	mutex_exit(&cp->cache_lock);
+
+	/*
+	 * Update the depot working set statistics.
+	 */
+	kmem_depot_ws_update(cp);
+
+	/*
+	 * If there's a lot of contention in the depot,
+	 * increase the magazine size.
+	 */
+	mutex_enter(&cp->cache_depot_lock);
+
+	if (cp->cache_chunksize < cp->cache_magtype->mt_maxbuf &&
+	    (int)(cp->cache_depot_contention -
+	    cp->cache_depot_contention_prev) > kmem_depot_contention)
+		need_magazine_resize = 1;
+
+	cp->cache_depot_contention_prev = cp->cache_depot_contention;
+
+	mutex_exit(&cp->cache_depot_lock);
+
+	if (need_hash_rescale)
+		(void) taskq_dispatch(kmem_taskq,
+		    (task_func_t *)kmem_hash_rescale, cp, TQ_NOSLEEP);
+
+	if (need_magazine_resize)
+		(void) taskq_dispatch(kmem_taskq,
+		    (task_func_t *)kmem_cache_magazine_resize,
+		    cp, TQ_NOSLEEP);
+
+	// smd : the following if is only true for the dnode cache
+	if (cp->cache_defrag != NULL)
+		(void) taskq_dispatch(kmem_taskq,
+		    (task_func_t *)kmem_cache_scan, cp, TQ_NOSLEEP);
+
+#ifdef DEBUG
+	else {
+		// for every other cache, duplicate some of the logic from
+		// kmem_cache_scan() below
+		// run reap occasionally even if there is plenty of memory
+		uint16_t debug_rand;
+
+		(void) random_get_bytes((uint8_t *)&debug_rand, 2);
+		if (!kmem_move_noreap &&
+		    ((debug_rand % kmem_mtb_reap) == 0)) {
+			/*
+			 * no mutex above, so no need to give it up as
+			 * in kmem_cache_scan()
+			 */
+		}
+	}
+#endif
+
+}
+
+static void kmem_update(void *);
+
+static void
+kmem_update_timeout(void *dummy)
+{
+	(void) bsd_timeout(kmem_update, dummy, &kmem_reap_interval);
+}
+
+static void
+kmem_update(void *dummy)
+{
+	kmem_cache_applyall(kmem_cache_update, NULL, TQ_NOSLEEP);
+
+	/*
+	 * We use taskq_dispatch() to reschedule the timeout so that
+	 * kmem_update() becomes self-throttling: it won't schedule
+	 * new tasks until all previous tasks have completed.
+	 */
+	if (!taskq_dispatch(kmem_taskq, kmem_update_timeout, dummy, TQ_NOSLEEP))
+		kmem_update_timeout(NULL);
+
+}
+
+static int
+kmem_cache_kstat_update(kstat_t *ksp, int rw)
+{
+	struct kmem_cache_kstat *kmcp = &kmem_cache_kstat;
+	kmem_cache_t *cp = ksp->ks_private;
+	uint64_t cpu_buf_avail;
+	uint64_t buf_avail = 0;
+	int cpu_seqid;
+	long reap;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	mutex_enter(&cp->cache_lock);
+
+	kmcp->kmc_alloc_fail.value.ui64		= cp->cache_alloc_fail;
+	kmcp->kmc_alloc.value.ui64		= cp->cache_slab_alloc;
+	kmcp->kmc_free.value.ui64		= cp->cache_slab_free;
+	kmcp->kmc_slab_alloc.value.ui64		= cp->cache_slab_alloc;
+	kmcp->kmc_slab_free.value.ui64		= cp->cache_slab_free;
+	kmcp->kmc_no_vba_success.value.ui64	= cp->no_vba_success;
+	kmcp->kmc_no_vba_fail.value.ui64	= cp->no_vba_fail;
+	kmcp->kmc_arc_no_grow_set.value.ui64    = cp->arc_no_grow_set;
+	kmcp->kmc_arc_no_grow.value.ui64	= cp->arc_no_grow;
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
+
+		mutex_enter(&ccp->cc_lock);
+
+		cpu_buf_avail = 0;
+		if (ccp->cc_rounds > 0)
+			cpu_buf_avail += ccp->cc_rounds;
+		if (ccp->cc_prounds > 0)
+			cpu_buf_avail += ccp->cc_prounds;
+
+		kmcp->kmc_alloc.value.ui64	+= ccp->cc_alloc;
+		kmcp->kmc_free.value.ui64	+= ccp->cc_free;
+		buf_avail			+= cpu_buf_avail;
+
+		mutex_exit(&ccp->cc_lock);
+	}
+
+	mutex_enter(&cp->cache_depot_lock);
+
+	kmcp->kmc_depot_alloc.value.ui64	= cp->cache_full.ml_alloc;
+	kmcp->kmc_depot_free.value.ui64		= cp->cache_empty.ml_alloc;
+	kmcp->kmc_depot_contention.value.ui64	= cp->cache_depot_contention;
+	kmcp->kmc_full_magazines.value.ui64	= cp->cache_full.ml_total;
+	kmcp->kmc_empty_magazines.value.ui64	= cp->cache_empty.ml_total;
+	kmcp->kmc_magazine_size.value.ui64	=
+	    (cp->cache_flags & KMF_NOMAGAZINE) ?
+	    0 : cp->cache_magtype->mt_magsize;
+
+	kmcp->kmc_alloc.value.ui64		+= cp->cache_full.ml_alloc;
+	kmcp->kmc_free.value.ui64		+= cp->cache_empty.ml_alloc;
+	buf_avail += cp->cache_full.ml_total * cp->cache_magtype->mt_magsize;
+
+	reap = MIN(cp->cache_full.ml_reaplimit, cp->cache_full.ml_min);
+	reap = MIN(reap, cp->cache_full.ml_total);
+
+	mutex_exit(&cp->cache_depot_lock);
+
+	kmcp->kmc_buf_size.value.ui64	= cp->cache_bufsize;
+	kmcp->kmc_align.value.ui64	= cp->cache_align;
+	kmcp->kmc_chunk_size.value.ui64	= cp->cache_chunksize;
+	kmcp->kmc_slab_size.value.ui64	= cp->cache_slabsize;
+	kmcp->kmc_buf_constructed.value.ui64 = buf_avail;
+	buf_avail += cp->cache_bufslab;
+	kmcp->kmc_buf_avail.value.ui64	= buf_avail;
+	kmcp->kmc_buf_inuse.value.ui64	= cp->cache_buftotal - buf_avail;
+	kmcp->kmc_buf_total.value.ui64	= cp->cache_buftotal;
+	kmcp->kmc_buf_max.value.ui64	= cp->cache_bufmax;
+	kmcp->kmc_slab_create.value.ui64	= cp->cache_slab_create;
+	kmcp->kmc_slab_destroy.value.ui64	= cp->cache_slab_destroy;
+	kmcp->kmc_hash_size.value.ui64	= (cp->cache_flags & KMF_HASH) ?
+	    cp->cache_hash_mask + 1 : 0;
+	kmcp->kmc_hash_lookup_depth.value.ui64	= cp->cache_lookup_depth;
+	kmcp->kmc_hash_rescale.value.ui64	= cp->cache_rescale;
+	kmcp->kmc_vmem_source.value.ui64	= cp->cache_arena->vm_id;
+	kmcp->kmc_reap.value.ui64	= cp->cache_reap;
+
+	if (cp->cache_defrag == NULL) {
+		kmcp->kmc_move_callbacks.value.ui64	= 0;
+		kmcp->kmc_move_yes.value.ui64		= 0;
+		kmcp->kmc_move_no.value.ui64		= 0;
+		kmcp->kmc_move_later.value.ui64		= 0;
+		kmcp->kmc_move_dont_need.value.ui64	= 0;
+		kmcp->kmc_move_dont_know.value.ui64	= 0;
+		kmcp->kmc_move_hunt_found.value.ui64	= 0;
+		kmcp->kmc_move_slabs_freed.value.ui64	= 0;
+		kmcp->kmc_defrag.value.ui64		= 0;
+		kmcp->kmc_scan.value.ui64		= 0;
+		kmcp->kmc_move_reclaimable.value.ui64	= 0;
+	} else {
+		int64_t reclaimable;
+
+		kmem_defrag_t *kd = cp->cache_defrag;
+		kmcp->kmc_move_callbacks.value.ui64	= kd->kmd_callbacks;
+		kmcp->kmc_move_yes.value.ui64		= kd->kmd_yes;
+		kmcp->kmc_move_no.value.ui64		= kd->kmd_no;
+		kmcp->kmc_move_later.value.ui64		= kd->kmd_later;
+		kmcp->kmc_move_dont_need.value.ui64	= kd->kmd_dont_need;
+		kmcp->kmc_move_dont_know.value.ui64	= kd->kmd_dont_know;
+		kmcp->kmc_move_hunt_found.value.ui64	= 0;
+		kmcp->kmc_move_slabs_freed.value.ui64	= kd->kmd_slabs_freed;
+		kmcp->kmc_defrag.value.ui64		= kd->kmd_defrags;
+		kmcp->kmc_scan.value.ui64		= kd->kmd_scans;
+
+		reclaimable = cp->cache_bufslab - (cp->cache_maxchunks - 1);
+		reclaimable = MAX(reclaimable, 0);
+		reclaimable += ((uint64_t)reap * cp->cache_magtype->mt_magsize);
+		kmcp->kmc_move_reclaimable.value.ui64	= reclaimable;
+	}
+
+	mutex_exit(&cp->cache_lock);
+	return (0);
+}
+
+/*
+ * Return a named statistic about a particular cache.
+ * This shouldn't be called very often, so it's currently designed for
+ * simplicity (leverages existing kstat support) rather than efficiency.
+ */
+uint64_t
+kmem_cache_stat(kmem_cache_t *cp, char *name)
+{
+	int i;
+	kstat_t *ksp = cp->cache_kstat;
+	kstat_named_t *knp = (kstat_named_t *)&kmem_cache_kstat;
+	uint64_t value = 0;
+
+	if (ksp != NULL) {
+		mutex_enter(&kmem_cache_kstat_lock);
+		(void) kmem_cache_kstat_update(ksp, KSTAT_READ);
+		for (i = 0; i < ksp->ks_ndata; i++) {
+			if (strcmp(knp[i].name, name) == 0) {
+				value = knp[i].value.ui64;
+				break;
+			}
+		}
+		mutex_exit(&kmem_cache_kstat_lock);
+	}
+	return (value);
+}
+
+// TRUE if we have more than a critical minimum of memory
+// used in arc_memory_throttle; if FALSE, we throttle
+static inline bool
+spl_minimal_physmem_p_logic()
+{
+	// do we have enough memory to avoid throttling?
+	if (vm_page_free_wanted > 0)
+		return (false);
+	if (vm_page_free_count < (vm_page_free_min + 512))
+		// 512 pages above 3500 (normal vm_page_free_min)
+		// 2MiB above 13 MiB
+		return (false);
+	return (true);
+}
+
+int32_t
+spl_minimal_physmem_p(void)
+{
+
+	// arc will throttle throttle if we are paging, otherwise
+	// we want a small bit of pressure here so that we can compete
+	// a little with the xnu buffer cache
+
+	return (spl_free > -1024LL);
+}
+
+/*
+ * Return the maximum amount of memory that is (in theory) allocatable
+ * from the heap. This may be used as an estimate only since there
+ * is no guarentee this space will still be available when an allocation
+ * request is made, nor that the space may be allocated in one big request
+ * due to kernel heap fragmentation.
+ */
+size_t
+kmem_maxavail(void)
+{
+#ifndef APPLE
+	//    spgcnt_t pmem = availrmem - tune.t_minarmem;
+	//    spgcnt_t vmem = btop(vmem_size(heap_arena, VMEM_FREE));
+	//
+	//    return ((size_t)ptob(MAX(MIN(pmem, vmem), 0)));
+#endif
+	return (physmem * PAGE_SIZE);
+}
+
+/*
+ * Indicate whether memory-intensive kmem debugging is enabled.
+ */
+int
+kmem_debugging(void)
+{
+	return (kmem_flags & (KMF_AUDIT | KMF_REDZONE));
+}
+
+/* binning function, sorts finely at the two extremes */
+#define	KMEM_PARTIAL_SLAB_WEIGHT(sp, binshift)				\
+((((sp)->slab_refcnt <= (binshift)) ||				\
+(((sp)->slab_chunks - (sp)->slab_refcnt) <= (binshift)))	\
+? -(sp)->slab_refcnt					\
+: -((binshift) + ((sp)->slab_refcnt >> (binshift))))
+
+/*
+ * Minimizing the number of partial slabs on the freelist minimizes
+ * fragmentation (the ratio of unused buffers held by the slab layer). There are
+ * two ways to get a slab off of the freelist: 1) free all the buffers on the
+ * slab, and 2) allocate all the buffers on the slab. It follows that we want
+ * the most-used slabs at the front of the list where they have the best chance
+ * of being completely allocated, and the least-used slabs at a safe distance
+ * from the front to improve the odds that the few remaining buffers will all be
+ * freed before another allocation can tie up the slab. For that reason a slab
+ * with a higher slab_refcnt sorts less than than a slab with a lower
+ * slab_refcnt.
+ *
+ * However, if a slab has at least one buffer that is deemed unfreeable, we
+ * would rather have that slab at the front of the list regardless of
+ * slab_refcnt, since even one unfreeable buffer makes the entire slab
+ * unfreeable. If the client returns KMEM_CBRC_NO in response to a cache_move()
+ * callback, the slab is marked unfreeable for as long as it remains on the
+ * freelist.
+ */
+static int
+kmem_partial_slab_cmp(const void *pp0, const void *pp1)
+{
+	const kmem_cache_t *cp;
+	const kmem_slab_t *s0 = pp0;
+	const kmem_slab_t *s1 = pp1;
+	int w0, w1;
+	size_t binshift;
+
+	ASSERT(KMEM_SLAB_IS_PARTIAL(s0));
+	ASSERT(KMEM_SLAB_IS_PARTIAL(s1));
+	ASSERT(s0->slab_cache == s1->slab_cache);
+	cp = s1->slab_cache;
+	ASSERT(MUTEX_HELD((struct kmutex *)&cp->cache_lock));
+	binshift = cp->cache_partial_binshift;
+
+	/* weight of first slab */
+	w0 = KMEM_PARTIAL_SLAB_WEIGHT(s0, binshift);
+	if (s0->slab_flags & KMEM_SLAB_NOMOVE) {
+		w0 -= cp->cache_maxchunks;
+	}
+
+	/* weight of second slab */
+	w1 = KMEM_PARTIAL_SLAB_WEIGHT(s1, binshift);
+	if (s1->slab_flags & KMEM_SLAB_NOMOVE) {
+		w1 -= cp->cache_maxchunks;
+	}
+
+	if (w0 < w1)
+		return (-1);
+	if (w0 > w1)
+		return (1);
+
+	// compare slab age if available
+	hrtime_t c0 = s0->slab_create_time, c1 = s1->slab_create_time;
+	if (c0 != 0 && c1 != 0 && c0 != c1) {
+		// higher time is newer; newer sorts before older
+		if (c0 < c1) // c0 is older than c1
+			return (1); // so c0 sorts after c1
+		if (c0 > c1)
+			return (-1);
+	}
+
+	/* compare pointer values */
+	if ((uintptr_t)s0 < (uintptr_t)s1)
+		return (-1);
+	if ((uintptr_t)s0 > (uintptr_t)s1)
+		return (1);
+
+	return (0);
+}
+
+/*
+ * It must be valid to call the destructor (if any) on a newly created object.
+ * That is, the constructor (if any) must leave the object in a valid state for
+ * the destructor.
+ */
+kmem_cache_t *
+kmem_cache_create(
+    char *name,		/* descriptive name for this cache */
+    size_t bufsize,		/* size of the objects it manages */
+    size_t align,		/* required object alignment */
+    int (*constructor)(void *, void *, int), /* object constructor */
+    void (*destructor)(void *, void *),	/* object destructor */
+    void (*reclaim)(void *), /* memory reclaim callback */
+    void *private,		/* pass-thru arg for constr/destr/reclaim */
+    vmem_t *vmp,		/* vmem source for slab allocation */
+    int cflags)		/* cache creation flags */
+{
+	int cpu_seqid;
+	size_t chunksize;
+	kmem_cache_t *cp;
+	kmem_magtype_t *mtp;
+	size_t csize = KMEM_CACHE_SIZE(max_ncpus);
+
+#ifdef	DEBUG
+	/*
+	 * Cache names should conform to the rules for valid C identifiers
+	 */
+	if (!strident_valid(name)) {
+		cmn_err(CE_CONT,
+		    "kmem_cache_create: '%s' is an invalid cache name\n"
+		    "cache names must conform to the rules for "
+		    "C identifiers\n", name);
+	}
+#endif	/* DEBUG */
+
+	if (vmp == NULL)
+		vmp = kmem_default_arena;
+
+	/*
+	 * If this kmem cache has an identifier vmem arena as its source, mark
+	 * it such to allow kmem_reap_idspace().
+	 */
+	ASSERT(!(cflags & KMC_IDENTIFIER));   /* consumer should not set this */
+	if (vmp->vm_cflags & VMC_IDENTIFIER)
+		cflags |= KMC_IDENTIFIER;
+
+	/*
+	 * Get a kmem_cache structure.  We arrange that cp->cache_cpu[]
+	 * is aligned on a KMEM_CPU_CACHE_SIZE boundary to prevent
+	 * false sharing of per-CPU data.
+	 */
+	cp = vmem_xalloc(kmem_cache_arena, csize,
+	    KMEM_CPU_CACHE_SIZE,
+	    P2NPHASE(csize, KMEM_CPU_CACHE_SIZE),
+	    0, NULL, NULL, VM_SLEEP);
+	bzero(cp, csize);
+	list_link_init(&cp->cache_link);
+
+	if (align == 0)
+		align = KMEM_ALIGN;
+
+	/*
+	 * If we're not at least KMEM_ALIGN aligned, we can't use free
+	 * memory to hold bufctl information (because we can't safely
+	 * perform word loads and stores on it).
+	 */
+	if (align < KMEM_ALIGN)
+		cflags |= KMC_NOTOUCH;
+
+	if ((align & (align - 1)) != 0 || align > vmp->vm_quantum)
+		panic("kmem_cache_create: bad alignment %lu", align);
+
+	mutex_enter(&kmem_flags_lock);
+	if (kmem_flags & KMF_RANDOMIZE)
+		kmem_flags = (((kmem_flags | ~KMF_RANDOM) + 1) & KMF_RANDOM) |
+		    KMF_RANDOMIZE;
+	cp->cache_flags = (kmem_flags | cflags) & KMF_DEBUG;
+	mutex_exit(&kmem_flags_lock);
+
+	/*
+	 * Make sure all the various flags are reasonable.
+	 */
+	ASSERT(!(cflags & KMC_NOHASH) || !(cflags & KMC_NOTOUCH));
+
+	if (cp->cache_flags & KMF_LITE) {
+		if (bufsize >= kmem_lite_minsize &&
+		    align <= kmem_lite_maxalign &&
+		    P2PHASE(bufsize, kmem_lite_maxalign) != 0) {
+			cp->cache_flags |= KMF_BUFTAG;
+			cp->cache_flags &= ~(KMF_AUDIT | KMF_FIREWALL);
+		} else {
+			cp->cache_flags &= ~KMF_DEBUG;
+		}
+	}
+
+	if (cp->cache_flags & KMF_DEADBEEF)
+		cp->cache_flags |= KMF_REDZONE;
+
+	if ((cflags & KMC_QCACHE) && (cp->cache_flags & KMF_AUDIT))
+		cp->cache_flags |= KMF_NOMAGAZINE;
+
+	if (cflags & KMC_NODEBUG)
+		cp->cache_flags &= ~KMF_DEBUG;
+
+	if (cflags & KMC_NOTOUCH)
+		cp->cache_flags &= ~KMF_TOUCH;
+
+	if (cflags & KMC_PREFILL)
+		cp->cache_flags |= KMF_PREFILL;
+
+	if (cflags & KMC_NOHASH)
+		cp->cache_flags &= ~(KMF_AUDIT | KMF_FIREWALL);
+
+	if (cflags & KMC_NOMAGAZINE)
+		cp->cache_flags |= KMF_NOMAGAZINE;
+
+	if ((cp->cache_flags & KMF_AUDIT) && !(cflags & KMC_NOTOUCH))
+		cp->cache_flags |= KMF_REDZONE;
+
+	if (!(cp->cache_flags & KMF_AUDIT))
+		cp->cache_flags &= ~KMF_CONTENTS;
+
+	if ((cp->cache_flags & KMF_BUFTAG) && bufsize >= kmem_minfirewall &&
+	    !(cp->cache_flags & KMF_LITE) && !(cflags & KMC_NOHASH))
+		cp->cache_flags |= KMF_FIREWALL;
+
+	if (vmp != kmem_default_arena || kmem_firewall_arena == NULL)
+		cp->cache_flags &= ~KMF_FIREWALL;
+
+	if (cp->cache_flags & KMF_FIREWALL) {
+		cp->cache_flags &= ~KMF_BUFTAG;
+		cp->cache_flags |= KMF_NOMAGAZINE;
+		ASSERT(vmp == kmem_default_arena);
+		vmp = kmem_firewall_arena;
+	}
+
+	/*
+	 * Set cache properties.
+	 */
+	(void) strncpy(cp->cache_name, name, KMEM_CACHE_NAMELEN);
+	strident_canon(cp->cache_name, KMEM_CACHE_NAMELEN + 1);
+	cp->cache_bufsize = bufsize;
+	cp->cache_align = align;
+	cp->cache_constructor = constructor;
+	cp->cache_destructor = destructor;
+	cp->cache_reclaim = reclaim;
+	cp->cache_private = private;
+	cp->cache_arena = vmp;
+	cp->cache_cflags = cflags;
+
+	/*
+	 * Determine the chunk size.
+	 */
+	chunksize = bufsize;
+
+	if (align >= KMEM_ALIGN) {
+		chunksize = P2ROUNDUP(chunksize, KMEM_ALIGN);
+		cp->cache_bufctl = chunksize - KMEM_ALIGN;
+	}
+
+	if (cp->cache_flags & KMF_BUFTAG) {
+		cp->cache_bufctl = chunksize;
+		cp->cache_buftag = chunksize;
+		if (cp->cache_flags & KMF_LITE)
+			chunksize += KMEM_BUFTAG_LITE_SIZE(kmem_lite_count);
+		else
+			chunksize += sizeof (kmem_buftag_t);
+	}
+
+	if (cp->cache_flags & KMF_DEADBEEF) {
+		cp->cache_verify = MIN(cp->cache_buftag, kmem_maxverify);
+		if (cp->cache_flags & KMF_LITE)
+			cp->cache_verify = sizeof (uint64_t);
+	}
+
+	cp->cache_contents = MIN(cp->cache_bufctl, kmem_content_maxsave);
+
+	cp->cache_chunksize = chunksize = P2ROUNDUP(chunksize, align);
+
+	/*
+	 * Now that we know the chunk size, determine the optimal slab size.
+	 */
+
+	size_t vquantum = vmp->vm_quantum;
+
+	if ((cflags & KMC_ARENA_SLAB) == KMC_ARENA_SLAB) {
+		VERIFY3U((vmp->vm_cflags & VMC_NO_QCACHE), ==, VMC_NO_QCACHE);
+		VERIFY3U(vmp->vm_min_import, >, 0);
+		VERIFY3U(vmp->vm_min_import, >=, (2 * vmp->vm_quantum));
+		VERIFY(ISP2(vmp->vm_min_import));
+		vquantum = vmp->vm_min_import >> 1;
+	}
+
+	if (vmp == kmem_firewall_arena) {
+		cp->cache_slabsize = P2ROUNDUP(chunksize, vquantum);
+		cp->cache_mincolor = cp->cache_slabsize - chunksize;
+		cp->cache_maxcolor = cp->cache_mincolor;
+		cp->cache_flags |= KMF_HASH;
+		ASSERT(!(cp->cache_flags & KMF_BUFTAG));
+	} else if ((cflags & KMC_NOHASH) || (!(cflags & KMC_NOTOUCH) &&
+	    !(cp->cache_flags & KMF_AUDIT) &&
+	    chunksize < vquantum /
+	    KMEM_VOID_FRACTION)) {
+		cp->cache_slabsize = vquantum;
+		cp->cache_mincolor = 0;
+		cp->cache_maxcolor =
+		    (cp->cache_slabsize - sizeof (kmem_slab_t)) % chunksize;
+		ASSERT(chunksize + sizeof (kmem_slab_t) <= cp->cache_slabsize);
+		ASSERT(!(cp->cache_flags & KMF_AUDIT));
+	} else {
+		size_t chunks, bestfit, waste, slabsize;
+		size_t minwaste = LONG_MAX;
+
+		for (chunks = 1; chunks <= KMEM_VOID_FRACTION; chunks++) {
+			slabsize = P2ROUNDUP(chunksize * chunks,
+			    vquantum);
+			chunks = slabsize / chunksize;
+			waste = (slabsize % chunksize) / chunks;
+			if (waste < minwaste) {
+				minwaste = waste;
+				bestfit = slabsize;
+			}
+		}
+		if (cflags & KMC_QCACHE)
+			bestfit = VMEM_QCACHE_SLABSIZE(vmp->vm_qcache_max);
+		cp->cache_slabsize = bestfit;
+		cp->cache_mincolor = 0;
+		cp->cache_maxcolor = bestfit % chunksize;
+		cp->cache_flags |= KMF_HASH;
+	}
+
+	cp->cache_maxchunks = (cp->cache_slabsize / cp->cache_chunksize);
+	cp->cache_partial_binshift = highbit(cp->cache_maxchunks / 16) + 1;
+
+	/*
+	 * Disallowing prefill when either the DEBUG or HASH flag is set or when
+	 * there is a constructor avoids some tricky issues with debug setup
+	 * that may be revisited later. We cannot allow prefill in a
+	 * metadata cache because of potential recursion.
+	 */
+	if (vmp == kmem_msb_arena ||
+	    cp->cache_flags & (KMF_HASH | KMF_BUFTAG) ||
+	    cp->cache_constructor != NULL)
+		cp->cache_flags &= ~KMF_PREFILL;
+
+	if (cp->cache_flags & KMF_HASH) {
+		ASSERT(!(cflags & KMC_NOHASH));
+		cp->cache_bufctl_cache = (cp->cache_flags & KMF_AUDIT) ?
+		    kmem_bufctl_audit_cache : kmem_bufctl_cache;
+	}
+
+	if (cp->cache_maxcolor >= vquantum)
+		cp->cache_maxcolor = vquantum - 1;
+
+	cp->cache_color = cp->cache_mincolor;
+
+	/*
+	 * Initialize the rest of the slab layer.
+	 */
+	mutex_init(&cp->cache_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	avl_create(&cp->cache_partial_slabs, kmem_partial_slab_cmp,
+	    sizeof (kmem_slab_t), offsetof(kmem_slab_t, slab_link));
+	/* LINTED: E_TRUE_LOGICAL_EXPR */
+	ASSERT(sizeof (list_node_t) <= sizeof (avl_node_t));
+	/* reuse partial slab AVL linkage for complete slab list linkage */
+	list_create(&cp->cache_complete_slabs,
+	    sizeof (kmem_slab_t), offsetof(kmem_slab_t, slab_link));
+
+	if (cp->cache_flags & KMF_HASH) {
+		cp->cache_hash_table = vmem_alloc(kmem_hash_arena,
+		    KMEM_HASH_INITIAL * sizeof (void *),
+		    VM_SLEEP);
+		bzero(cp->cache_hash_table,
+		    KMEM_HASH_INITIAL * sizeof (void *));
+		cp->cache_hash_mask = KMEM_HASH_INITIAL - 1;
+		cp->cache_hash_shift = highbit((ulong_t)chunksize) - 1;
+	}
+
+	/*
+	 * Initialize the depot.
+	 */
+	mutex_init(&cp->cache_depot_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	for (mtp = kmem_magtype; chunksize <= mtp->mt_minbuf; mtp++)
+		continue;
+
+	cp->cache_magtype = mtp;
+
+	/*
+	 * Initialize the CPU layer.
+	 */
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
+		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
+		mutex_init(&ccp->cc_lock, NULL, MUTEX_DEFAULT, NULL); // XNU
+		ccp->cc_flags = cp->cache_flags;
+		ccp->cc_rounds = -1;
+		ccp->cc_prounds = -1;
+	}
+
+	/*
+	 * Create the cache's kstats.
+	 */
+	if ((cp->cache_kstat = kstat_create("unix", 0, cp->cache_name,
+	    "kmem_cache", KSTAT_TYPE_NAMED,
+	    sizeof (kmem_cache_kstat) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL)) != NULL) {
+		cp->cache_kstat->ks_data = &kmem_cache_kstat;
+		cp->cache_kstat->ks_update = kmem_cache_kstat_update;
+		cp->cache_kstat->ks_private = cp;
+		cp->cache_kstat->ks_lock = &kmem_cache_kstat_lock;
+		kstat_install(cp->cache_kstat);
+	}
+
+	/*
+	 * Add the cache to the global list.  This makes it visible
+	 * to kmem_update(), so the cache must be ready for business.
+	 */
+	mutex_enter(&kmem_cache_lock);
+	list_insert_tail(&kmem_caches, cp);
+	mutex_exit(&kmem_cache_lock);
+
+	if (kmem_ready)
+		kmem_cache_magazine_enable(cp);
+
+	return (cp);
+}
+
+static int
+kmem_move_cmp(const void *buf, const void *p)
+{
+	const kmem_move_t *kmm = p;
+	uintptr_t v1 = (uintptr_t)buf;
+	uintptr_t v2 = (uintptr_t)kmm->kmm_from_buf;
+	return (v1 < v2 ? -1 : (v1 > v2 ? 1 : 0));
+}
+
+static void
+kmem_reset_reclaim_threshold(kmem_defrag_t *kmd)
+{
+	kmd->kmd_reclaim_numer = 1;
+}
+
+/*
+ * Initially, when choosing candidate slabs for buffers to move, we want to be
+ * very selective and take only slabs that are less than
+ * (1 / KMEM_VOID_FRACTION) allocated. If we have difficulty finding candidate
+ * slabs, then we raise the allocation ceiling incrementally. The reclaim
+ * threshold is reset to (1 / KMEM_VOID_FRACTION) as soon as the cache is no
+ * longer fragmented.
+ */
+static void
+kmem_adjust_reclaim_threshold(kmem_defrag_t *kmd, int direction)
+{
+	if (direction > 0) {
+		/* make it easier to find a candidate slab */
+		if (kmd->kmd_reclaim_numer < (KMEM_VOID_FRACTION - 1)) {
+			kmd->kmd_reclaim_numer++;
+		}
+	} else {
+		/* be more selective */
+		if (kmd->kmd_reclaim_numer > 1) {
+			kmd->kmd_reclaim_numer--;
+		}
+	}
+}
+
+uint64_t
+spl_kmem_cache_inuse(kmem_cache_t *cache)
+{
+	return (cache->cache_buftotal);
+}
+
+uint64_t
+spl_kmem_cache_entry_size(kmem_cache_t *cache)
+{
+	return (cache->cache_bufsize);
+}
+
+void
+kmem_cache_set_move(kmem_cache_t *cp,
+    kmem_cbrc_t (*move)(void *, void *, size_t, void *))
+{
+	kmem_defrag_t *defrag;
+
+	ASSERT(move != NULL);
+	/*
+	 * The consolidator does not support NOTOUCH caches because kmem cannot
+	 * initialize their slabs with the 0xbaddcafe memory pattern, which sets
+	 * a low order bit usable by clients to distinguish uninitialized memory
+	 * from known objects (see kmem_slab_create).
+	 */
+	ASSERT(!(cp->cache_cflags & KMC_NOTOUCH));
+	ASSERT(!(cp->cache_cflags & KMC_IDENTIFIER));
+
+	/*
+	 * We should not be holding anyone's cache lock when calling
+	 * kmem_cache_alloc(), so allocate in all cases before acquiring the
+	 * lock.
+	 */
+	defrag = kmem_cache_alloc(kmem_defrag_cache, KM_SLEEP);
+
+	mutex_enter(&cp->cache_lock);
+
+	if (KMEM_IS_MOVABLE(cp)) {
+		if (cp->cache_move == NULL) {
+			ASSERT(cp->cache_slab_alloc == 0);
+
+			cp->cache_defrag = defrag;
+			defrag = NULL; /* nothing to free */
+			bzero(cp->cache_defrag, sizeof (kmem_defrag_t));
+			avl_create(&cp->cache_defrag->kmd_moves_pending,
+			    kmem_move_cmp, sizeof (kmem_move_t),
+			    offsetof(kmem_move_t, kmm_entry));
+			/* LINTED: E_TRUE_LOGICAL_EXPR */
+			ASSERT(sizeof (list_node_t) <= sizeof (avl_node_t));
+			/* reuse the slab's AVL linkage for deadlist linkage */
+			list_create(&cp->cache_defrag->kmd_deadlist,
+			    sizeof (kmem_slab_t),
+			    offsetof(kmem_slab_t, slab_link));
+			kmem_reset_reclaim_threshold(cp->cache_defrag);
+		}
+		cp->cache_move = move;
+	}
+
+	mutex_exit(&cp->cache_lock);
+
+	if (defrag != NULL) {
+		kmem_cache_free(kmem_defrag_cache, defrag); /* unused */
+	}
+}
+
+void
+kmem_qcache_destroy()
+{
+	kmem_cache_t *cp;
+	kmem_cache_t *cache_to_destroy = NULL;
+
+	do {
+		cache_to_destroy = NULL;
+		mutex_enter(&kmem_cache_lock);
+		for (cp = list_head(&kmem_caches); cp != NULL;
+		    cp = list_next(&kmem_caches, cp)) {
+			if (cp->cache_cflags & KMC_QCACHE) {
+				cache_to_destroy = cp;
+				break;
+			}
+		}
+		mutex_exit(&kmem_cache_lock);
+
+		if (cache_to_destroy) {
+			kmem_cache_destroy(cache_to_destroy);
+		}
+	} while (cache_to_destroy);
+}
+
+void
+kmem_cache_destroy(kmem_cache_t *cp)
+{
+	int cpu_seqid;
+
+	/*
+	 * Remove the cache from the global cache list so that no one else
+	 * can schedule tasks on its behalf, wait for any pending tasks to
+	 * complete, purge the cache, and then destroy it.
+	 */
+	mutex_enter(&kmem_cache_lock);
+	list_remove(&kmem_caches, cp);
+	mutex_exit(&kmem_cache_lock);
+
+	if (kmem_taskq != NULL)
+		taskq_wait(kmem_taskq);
+
+	if (kmem_move_taskq != NULL && cp->cache_defrag != NULL)
+		taskq_wait(kmem_move_taskq);
+
+	kmem_cache_magazine_purge(cp);
+
+	mutex_enter(&cp->cache_lock);
+
+	if (cp->cache_buftotal != 0)
+		cmn_err(CE_WARN, "kmem_cache_destroy: '%s' (%p) not empty",
+		    cp->cache_name, (void *)cp);
+	if (cp->cache_defrag != NULL) {
+		avl_destroy(&cp->cache_defrag->kmd_moves_pending);
+		list_destroy(&cp->cache_defrag->kmd_deadlist);
+		kmem_cache_free(kmem_defrag_cache, cp->cache_defrag);
+		cp->cache_defrag = NULL;
+	}
+	/*
+	 * The cache is now dead.  There should be no further activity.  We
+	 * enforce this by setting land mines in the constructor, destructor,
+	 * reclaim, and move routines that induce a kernel text fault if
+	 * invoked.
+	 */
+	cp->cache_constructor = (int (*)(void *, void *, int))1;
+	cp->cache_destructor = (void (*)(void *, void *))2;
+	cp->cache_reclaim = (void (*)(void *))3;
+	cp->cache_move = (kmem_cbrc_t (*)(void *, void *, size_t, void *))4;
+	mutex_exit(&cp->cache_lock);
+
+	kstat_delete(cp->cache_kstat);
+
+	if (cp->cache_hash_table != NULL)
+		vmem_free(kmem_hash_arena, cp->cache_hash_table,
+		    (cp->cache_hash_mask + 1) * sizeof (void *));
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++)
+		mutex_destroy(&cp->cache_cpu[cpu_seqid].cc_lock);
+
+	mutex_destroy(&cp->cache_depot_lock);
+	mutex_destroy(&cp->cache_lock);
+
+	vmem_free(kmem_cache_arena, cp, KMEM_CACHE_SIZE(max_ncpus));
+}
+
+static void
+kmem_alloc_caches_create(const int *array, size_t count,
+    kmem_cache_t **alloc_table, size_t maxbuf,
+    uint_t shift)
+{
+	char name[KMEM_CACHE_NAMELEN + 1];
+	size_t table_unit = (1 << shift); /* range of one alloc_table entry */
+	size_t size = table_unit;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		size_t cache_size = array[i];
+		size_t align = KMEM_ALIGN;
+		kmem_cache_t *cp;
+
+		/* if the table has an entry for maxbuf, we're done */
+		if (size > maxbuf)
+			break;
+
+		/* cache size must be a multiple of the table unit */
+		ASSERT(P2PHASE(cache_size, table_unit) == 0);
+
+		/*
+		 * If they allocate a multiple of the coherency granularity,
+		 * they get a coherency-granularity-aligned address.
+		 */
+		if (IS_P2ALIGNED(cache_size, 64))
+			align = 64;
+		if (IS_P2ALIGNED(cache_size, PAGESIZE))
+			align = PAGESIZE;
+		(void) snprintf(name, sizeof (name),
+		    "kmem_alloc_%lu", cache_size);
+		cp = kmem_cache_create(name, cache_size, align,
+		    NULL, NULL, NULL, NULL, NULL, KMC_KMEM_ALLOC | KMF_HASH);
+
+		while (size <= cache_size) {
+			alloc_table[(size - 1) >> shift] = cp;
+			size += table_unit;
+		}
+	}
+
+	ASSERT(size > maxbuf);		/* i.e. maxbuf <= max(cache_size) */
+}
+
+static void
+kmem_alloc_caches_destroy()
+{
+	kmem_cache_t *cache_to_destroy = NULL;
+	kmem_cache_t *cp = NULL;
+
+	do {
+		cache_to_destroy = NULL;
+
+		// Locate the first cache that has the KMC_KMEM_ALLOC flag.
+		mutex_enter(&kmem_cache_lock);
+
+		for (cp = list_head(&kmem_caches); cp != NULL;
+		    cp = list_next(&kmem_caches, cp)) {
+			if (cp->cache_cflags & KMC_KMEM_ALLOC) {
+				cache_to_destroy = cp;
+				break;
+			}
+		}
+
+		mutex_exit(&kmem_cache_lock);
+
+		// Destroy the cache
+		if (cache_to_destroy) {
+			kmem_cache_destroy(cache_to_destroy);
+		}
+
+	} while (cache_to_destroy);
+}
+
+static void
+kmem_destroy_cache_by_name(const char *substr)
+{
+	kmem_cache_t *cache_to_destroy = NULL;
+	kmem_cache_t *cp = NULL;
+
+	do {
+		cache_to_destroy = NULL;
+
+		// Locate the first cache that has the KMC_KMEM_ALLOC flag.
+		mutex_enter(&kmem_cache_lock);
+
+		for (cp = list_head(&kmem_caches); cp != NULL;
+		    cp = list_next(&kmem_caches, cp)) {
+			if (kmem_strstr(cp->cache_name, substr)) {
+				cache_to_destroy = cp;
+				break;
+			}
+		}
+
+		mutex_exit(&kmem_cache_lock);
+
+		// Destroy the cache
+		if (cache_to_destroy) {
+			kmem_cache_destroy(cache_to_destroy);
+		}
+
+	} while (cache_to_destroy);
+}
+
+static void
+kmem_cache_init(int pass, int use_large_pages)
+{
+	int i;
+	size_t maxbuf;
+	kmem_magtype_t *mtp;
+
+	for (i = 0; i < sizeof (kmem_magtype) / sizeof (*mtp); i++) {
+		char name[KMEM_CACHE_NAMELEN + 1];
+
+		mtp = &kmem_magtype[i];
+		(void) snprintf(name, KMEM_CACHE_NAMELEN, "%s%d",
+		    KMEM_MAGAZINE_PREFIX,
+		    mtp->mt_magsize);
+		mtp->mt_cache = kmem_cache_create(
+		    name,
+		    (mtp->mt_magsize + 1) * sizeof (void *),
+		    mtp->mt_align, NULL, NULL, NULL, NULL,
+		    kmem_msb_arena, KMC_NOHASH);
+	}
+
+	kmem_slab_cache = kmem_cache_create("kmem_slab_cache",
+	    sizeof (kmem_slab_t), 0, NULL, NULL,
+	    NULL, NULL,
+	    kmem_msb_arena, KMC_NOHASH);
+
+	kmem_bufctl_cache = kmem_cache_create("kmem_bufctl_cache",
+	    sizeof (kmem_bufctl_t), 0,
+	    NULL, NULL, NULL, NULL,
+	    kmem_msb_arena, KMC_NOHASH);
+
+	kmem_bufctl_audit_cache = kmem_cache_create("kmem_bufctl_audit_cache",
+	    sizeof (kmem_bufctl_audit_t),
+	    0, NULL, NULL, NULL, NULL,
+	    kmem_msb_arena, KMC_NOHASH);
+
+	if (pass == 2) {
+		kmem_va_arena = vmem_create(KMEM_VA_PREFIX,
+		    NULL, 0, PAGESIZE,
+		    vmem_alloc, vmem_free, heap_arena,
+		    2 * PAGESIZE, VM_SLEEP);
+
+		kmem_default_arena = vmem_create("kmem_default",
+		    NULL, 0, PAGESIZE,
+		    vmem_alloc, vmem_free, kmem_va_arena,
+		    0, VMC_DUMPSAFE | VM_SLEEP);
+
+		/* Figure out what our maximum cache size is */
+		maxbuf = kmem_max_cached;
+		if (maxbuf <= KMEM_MAXBUF) {
+			maxbuf = 0;
+			kmem_max_cached = KMEM_MAXBUF;
+		} else {
+			size_t size = 0;
+			size_t max =
+			    sizeof (kmem_big_alloc_sizes) / sizeof (int);
+			/*
+			 * Round maxbuf up to an existing cache size.  If maxbuf
+			 * is larger than the largest cache, we truncate it to
+			 * the largest cache's size.
+			 */
+			for (i = 0; i < max; i++) {
+				size = kmem_big_alloc_sizes[i];
+				if (maxbuf <= size)
+					break;
+			}
+			kmem_max_cached = maxbuf = size;
+		}
+
+		/*
+		 * The big alloc table may not be completely overwritten, so
+		 * we clear out any stale cache pointers from the first pass.
+		 */
+		bzero(kmem_big_alloc_table, sizeof (kmem_big_alloc_table));
+	} else {
+		/*
+		 * During the first pass, the kmem_alloc_* caches
+		 * are treated as metadata.
+		 */
+		kmem_default_arena = kmem_msb_arena;
+		maxbuf = KMEM_BIG_MAXBUF_32BIT;
+	}
+
+	/*
+	 * Set up the default caches to back kmem_alloc()
+	 */
+	kmem_alloc_caches_create(
+	    kmem_alloc_sizes, sizeof (kmem_alloc_sizes) / sizeof (int),
+	    kmem_alloc_table, KMEM_MAXBUF, KMEM_ALIGN_SHIFT);
+
+	kmem_alloc_caches_create(
+	    kmem_big_alloc_sizes, sizeof (kmem_big_alloc_sizes) / sizeof (int),
+	    kmem_big_alloc_table, maxbuf, KMEM_BIG_SHIFT);
+
+	kmem_big_alloc_table_max = maxbuf >> KMEM_BIG_SHIFT;
+}
+
+struct free_slab {
+	vmem_t *vmp;
+	size_t slabsize;
+	void *slab;
+	list_node_t next;
+};
+
+static list_t freelist;
+
+
+void
+kmem_cache_build_slablist(kmem_cache_t *cp)
+{
+	int cpu_seqid;
+
+	vmem_t *vmp = cp->cache_arena;
+	kmem_slab_t *sp;
+	struct free_slab *fs;
+
+	for (sp = list_head(&cp->cache_complete_slabs); sp != NULL;
+	    sp = list_next(&cp->cache_complete_slabs, sp)) {
+
+		MALLOC(fs, struct free_slab *, sizeof (struct free_slab),
+		    M_TEMP, M_WAITOK);
+		fs->vmp = vmp;
+		fs->slabsize = cp->cache_slabsize;
+		fs->slab = (void *)P2ALIGN((uintptr_t)sp->slab_base,
+		    vmp->vm_quantum);
+		list_link_init(&fs->next);
+		list_insert_tail(&freelist, fs);
+	}
+
+	for (sp = avl_first(&cp->cache_partial_slabs); sp != NULL;
+	    sp = AVL_NEXT(&cp->cache_partial_slabs, sp)) {
+
+		MALLOC(fs, struct free_slab *, sizeof (struct free_slab),
+		    M_TEMP, M_WAITOK);
+		fs->vmp = vmp;
+		fs->slabsize = cp->cache_slabsize;
+		fs->slab = (void *)P2ALIGN((uintptr_t)sp->slab_base,
+		    vmp->vm_quantum);
+		list_link_init(&fs->next);
+		list_insert_tail(&freelist, fs);
+	}
+
+
+	kstat_delete(cp->cache_kstat);
+
+	if (cp->cache_hash_table != NULL)
+		vmem_free(kmem_hash_arena, cp->cache_hash_table,
+		    (cp->cache_hash_mask + 1) * sizeof (void *));
+
+	for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++)
+		mutex_destroy(&cp->cache_cpu[cpu_seqid].cc_lock);
+
+	mutex_destroy(&cp->cache_depot_lock);
+	mutex_destroy(&cp->cache_lock);
+
+	vmem_free(kmem_cache_arena, cp, KMEM_CACHE_SIZE(max_ncpus));
+}
+
+
+static void
+kmem_cache_fini()
+{
+	kmem_cache_t *cp;
+	int i;
+	struct free_slab *fs;
+
+	list_create(&freelist, sizeof (struct free_slab),
+	    offsetof(struct free_slab, next));
+
+	mutex_enter(&kmem_cache_lock);
+
+	while ((cp = list_head(&kmem_caches))) {
+		list_remove(&kmem_caches, cp);
+		mutex_exit(&kmem_cache_lock);
+		kmem_cache_build_slablist(cp);
+		mutex_enter(&kmem_cache_lock);
+	}
+
+	mutex_exit(&kmem_cache_lock);
+
+	i = 0;
+	while ((fs = list_head(&freelist))) {
+		i++;
+		list_remove(&freelist, fs);
+		vmem_free(fs->vmp, fs->slab, fs->slabsize);
+		FREE(fs, M_TEMP);
+
+	}
+	printf("SPL: Released %u slabs\n", i);
+	list_destroy(&freelist);
+}
+
+
+// this is intended to substitute for kmem_avail() in arc.c
+int64_t
+spl_free_wrapper(void)
+{
+	return (spl_free);
+}
+
+// this is intended to substitute for kmem_avail() in arc.c
+// when arc_reclaim_thread() calls spl_free_set_pressure(0);
+int64_t
+spl_free_manual_pressure_wrapper(void)
+{
+	return (spl_free_manual_pressure);
+}
+
+uint64_t
+spl_free_last_pressure_wrapper(void)
+{
+	return (spl_free_last_pressure);
+}
+
+int64_t
+spl_free_set_and_wait_pressure(int64_t new_p, boolean_t fast,
+    clock_t check_interval)
+{
+
+	int64_t snapshot_pressure = 0;
+
+	if (new_p <= 0)
+		return (0);
+
+	spl_free_fast_pressure = fast;
+
+	if (spl_free_manual_pressure >= 0)
+		spl_free_manual_pressure += new_p;
+	else
+		spl_free_manual_pressure = new_p;
+
+	// wait for another thread to reset pressure
+	const uint64_t start = zfs_lbolt();
+	const uint64_t end_by = start + (hz*60);
+	const uint64_t double_at = start + (hz/2);
+	const uint64_t double_again_at = start + hz;
+	bool doubled = false, doubled_again = false;
+	uint64_t now;
+
+	spl_free_last_pressure = start;
+
+	for (; spl_free_manual_pressure != 0; ) {
+		// has another thread set spl_free_manual_pressure?
+		if (spl_free_manual_pressure < new_p)
+			spl_free_manual_pressure = new_p;
+		snapshot_pressure = spl_free_manual_pressure;
+		mutex_enter(&spl_free_thread_lock);
+		cv_timedwait_hires(&spl_free_thread_cv,
+		    &spl_free_thread_lock, check_interval, 0, 0);
+		mutex_exit(&spl_free_thread_lock);
+		now = zfs_lbolt();
+		if (now > end_by) {
+			printf("%s: ERROR: timed out after one minute!\n",
+			    __func__);
+			break;
+		} else if (now > double_again_at && !doubled_again) {
+			doubled_again = true;
+			new_p *= 2;
+		} else if (now > double_at) {
+			doubled = true;
+			new_p *= 2;
+		}
+	}
+	return (snapshot_pressure);
+}
+
+// routinely called by arc_reclaim_thread() with new_p == 0
+void
+spl_free_set_pressure(int64_t new_p)
+{
+	if (new_p > spl_free_manual_pressure || new_p <= 0)
+		spl_free_manual_pressure = new_p;
+	if (new_p == 0) {
+		spl_free_fast_pressure = FALSE;
+		// wake up both spl_free_thread() to recalculate spl_free
+		// and any spl_free_set_and_wait_pressure() threads
+		cv_broadcast(&spl_free_thread_cv);
+	}
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+void
+spl_free_set_pressure_both(int64_t new_p, boolean_t fast)
+{
+	spl_free_fast_pressure = fast;
+	if (new_p > spl_free_manual_pressure || new_p <= 0)
+		spl_free_manual_pressure = new_p;
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+void spl_free_maybe_reap(void);
+
+void
+spl_free_set_emergency_pressure(int64_t new_p)
+{
+	spl_free_fast_pressure = TRUE;
+	if (new_p > spl_free_manual_pressure || new_p <= 0)
+		spl_free_manual_pressure = new_p;
+	spl_free_maybe_reap();
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+void
+spl_free_set_emergency_pressure_additive(int64_t new_p)
+{
+	spl_free_fast_pressure = TRUE;
+	spl_free_manual_pressure += new_p;
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+void
+spl_free_set_pressure_additive(int64_t new_p)
+{
+	spl_free_manual_pressure += new_p;
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+boolean_t
+spl_free_fast_pressure_wrapper()
+{
+	return (spl_free_fast_pressure);
+}
+
+void
+spl_free_set_fast_pressure(boolean_t state)
+{
+	spl_free_fast_pressure = state;
+	spl_free_last_pressure = zfs_lbolt();
+}
+
+void
+spl_free_reap_caches(void)
+{
+	// note: this may take some time
+	static hrtime_t last_reap = 0;
+	const hrtime_t reap_after = SEC2NSEC(60);
+	const hrtime_t curtime = gethrtime();
+
+	if (curtime - last_reap < reap_after)
+		return;
+
+	vmem_qcache_reap(zio_arena_parent);
+	kmem_reap();
+	vmem_qcache_reap(kmem_va_arena);
+}
+
+void
+spl_free_maybe_reap(void)
+{
+	static _Atomic uint64_t last_reap = 0;
+	const  uint64_t lockout_time = 60 * hz;
+
+	uint64_t now = zfs_lbolt();
+	if (now > last_reap + lockout_time) {
+		last_reap = now;
+		spl_free_maybe_reap_flag = true;
+	}
+}
+
+boolean_t
+spl_maybe_send_large_pressure(uint64_t now, uint64_t minutes, boolean_t full)
+{
+	static volatile _Atomic uint64_t  spl_last_large_pressure = 0;
+	const uint64_t interval_ticks = minutes * 60ULL * (uint64_t)hz;
+
+	if (spl_last_large_pressure + interval_ticks > now)
+		return (false);
+
+	spl_last_large_pressure = now;
+
+	const int64_t sixteenth_physmem = (int64_t)real_total_memory / 16LL;
+	const int64_t sixtyfourth_physmem = sixteenth_physmem / 4LL;
+	int64_t howmuch = sixteenth_physmem;
+
+	if (full == false)
+		howmuch = sixtyfourth_physmem;
+
+
+	printf("SPL: %s: %lld bytes at time %llu\n",
+	    __func__, howmuch, now);
+
+	spl_free_set_emergency_pressure(howmuch);
+
+	return (true);
+}
+
+static void
+spl_free_thread()
+{
+	callb_cpr_t cpr;
+	uint64_t last_update = zfs_lbolt();
+	int64_t last_spl_free;
+	double ema_new = 0;
+	double ema_old = 0;
+	double alpha;
+
+	CALLB_CPR_INIT(&cpr, &spl_free_thread_lock, callb_generic_cpr, FTAG);
+
+	spl_free = (int64_t)PAGESIZE *
+	    (int64_t)(vm_page_free_count - vm_page_free_min);
+
+	mutex_enter(&spl_free_thread_lock);
+
+	printf("SPL: beginning spl_free_thread() loop, spl_free == %lld\n",
+	    spl_free);
+
+	uint64_t recent_lowmem = 0;
+	uint64_t last_disequilibrium = 0;
+
+	while (!spl_free_thread_exit) {
+		mutex_exit(&spl_free_thread_lock);
+		boolean_t lowmem = false;
+		boolean_t emergency_lowmem = false;
+		int64_t base;
+		int64_t new_spl_free = 0LL;
+
+		spl_stats.spl_free_wake_count.value.ui64++;
+
+		if (spl_free_maybe_reap_flag == true) {
+			spl_free_maybe_reap_flag = false;
+			spl_free_reap_caches();
+		}
+
+		uint64_t time_now = zfs_lbolt();
+		uint64_t time_now_seconds = 0;
+		if (time_now > hz)
+			time_now_seconds = time_now / hz;
+
+		last_spl_free = spl_free;
+
+		new_spl_free = 0LL;
+
+		/*
+		 * if there is pressure that has not yet reached
+		 * arc_reclaim_thread() then start with a negative
+		 * new_spl_free
+		 */
+		if (spl_free_manual_pressure > 0) {
+			int64_t old_pressure = spl_free_manual_pressure;
+			new_spl_free -= old_pressure * 2LL;
+			lowmem = true;
+			if (spl_free_fast_pressure) {
+				emergency_lowmem = true;
+				new_spl_free -= old_pressure * 4LL;
+			}
+		}
+
+		/*
+		 * can we allocate at least a 64 MiB segment
+		 * from spl_heap_arena? this probes the reserve
+		 * and also the largest imported spans, which
+		 * vmem_alloc can fragment if needed.
+		 */
+		boolean_t reserve_low = false;
+		extern vmem_t *spl_heap_arena;
+		const uint64_t sixtyfour = 64ULL*1024ULL*1024ULL;
+		const uint64_t rvallones = (sixtyfour << 1ULL) - 1ULL;
+		const uint64_t rvmask = ~rvallones;
+		uint64_t rvfreebits = spl_heap_arena->vm_freemap;
+
+		if ((rvfreebits & rvmask) == 0) {
+			reserve_low = true;
+		} else {
+			new_spl_free += (int64_t)sixtyfour;
+		}
+
+		// do we have lots of memory in the spl_heap_arena ?
+
+		boolean_t early_lots_free = false;
+		const uint64_t onetwentyeight = 128ULL*1024ULL*1024ULL;
+		const uint64_t sixteen = 16ULL*1024ULL*1024ULL;
+		if (!reserve_low) {
+			early_lots_free = true;
+		} else if (vmem_size_semi_atomic(spl_heap_arena,
+		    VMEM_FREE) > onetwentyeight) {
+			early_lots_free = true;
+			new_spl_free += (int64_t)sixteen;
+		}
+
+		// do we have lots of memory in the bucket_arenas ?
+
+		extern int64_t vmem_buckets_size(int); // non-locking
+		int64_t buckets_free = vmem_buckets_size(VMEM_FREE);
+		if ((uint64_t)buckets_free != spl_buckets_mem_free)
+			spl_buckets_mem_free = (uint64_t)buckets_free;
+
+		if (buckets_free >= 512LL*1024LL*1024LL) {
+			early_lots_free = true;
+			new_spl_free += (int64_t)sixteen;
+		}
+		if (buckets_free >= 1024LL*1024LL*1024LL) {
+			reserve_low = false;
+			new_spl_free += (int64_t)sixteen;
+		}
+
+		/*
+		 * if we have neither alloced or freed in
+		 * several minutes, then we do not need to
+		 * shrink back if there is a momentary transient
+		 * memory spike (i.e., one that lasts less than a second)
+		 */
+		boolean_t memory_equilibrium = false;
+		const uint64_t five_minutes = 300ULL;
+		const uint64_t one_minute = 60ULL;
+		uint64_t last_xat_alloc_seconds = spl_xat_lastalloc;
+		uint64_t last_xat_free_seconds = spl_xat_lastfree;
+
+		if (last_xat_alloc_seconds + five_minutes > time_now_seconds &&
+		    last_xat_free_seconds + five_minutes > time_now_seconds) {
+			if (last_disequilibrium + one_minute >
+			    time_now_seconds) {
+				memory_equilibrium = true;
+				last_disequilibrium = 0;
+			}
+		} else {
+			last_disequilibrium = time_now_seconds;
+		}
+
+		boolean_t just_alloced = false;
+		if (last_xat_alloc_seconds + 1 > time_now_seconds)
+			just_alloced = true;
+
+		/*
+		 * this is a sign of a period of time of low system
+		 * memory, however XNU's generation of this variable
+		 * is not very predictable, but generally it should be
+		 * taken seriously when it's positive (it is often falsely 0)
+		 */
+		if ((vm_page_free_wanted > 0 && reserve_low &&
+		    !early_lots_free && !memory_equilibrium &&
+		    !just_alloced) || vm_page_free_wanted >= 1024) {
+			int64_t bminus = (int64_t)vm_page_free_wanted *
+			    (int64_t)PAGESIZE * -16LL;
+			if (bminus > -16LL*1024LL*1024LL)
+				bminus = -16LL*1024LL*1024LL;
+			new_spl_free += bminus;
+			lowmem = true;
+			emergency_lowmem = true;
+			// atomic swaps to set these variables used in arc.c
+			int64_t previous_highest_pressure = 0;
+			int64_t new_p = -bminus;
+			previous_highest_pressure = spl_free_manual_pressure;
+			if (new_p > previous_highest_pressure || new_p <= 0) {
+				boolean_t fast = FALSE;
+				if (vm_page_free_wanted > vm_page_free_min / 8)
+					fast = TRUE;
+				spl_free_set_pressure_both(-16LL * new_spl_free,
+				    fast);
+			}
+			last_disequilibrium = time_now_seconds;
+		} else if (vm_page_free_wanted > 0) {
+			int64_t bytes_wanted = (int64_t)vm_page_free_wanted *
+			    (int64_t)PAGESIZE;
+			new_spl_free -= bytes_wanted;
+			if (reserve_low && !early_lots_free) {
+				lowmem = true;
+				if (recent_lowmem == 0) {
+					recent_lowmem = time_now;
+				}
+				if (!memory_equilibrium) {
+					last_disequilibrium = time_now_seconds;
+				}
+			}
+		}
+
+		/*
+		 * these variables are reliably maintained by XNU
+		 * if vm_page_free_count > vm_page_free_min, then XNU
+		 * is scanning pages and we may want to try to free some memory
+		 */
+		int64_t above_min_free_pages = (int64_t)vm_page_free_count -
+		    (int64_t)vm_page_free_min;
+		int64_t above_min_free_bytes = (int64_t)PAGESIZE *
+		    above_min_free_pages;
+
+		/*
+		 * vm_page_free_min normally 3500, page free target
+		 * normally 4000 but not exported so we are not scanning
+		 * if we are 500 pages above vm_page_free_min. even if
+		 * we're scanning we may have plenty of space in the
+		 * reserve arena, in which case we should not react too strongly
+		 */
+
+		if (above_min_free_bytes < (int64_t)PAGESIZE * 500LL &&
+		    reserve_low && !early_lots_free && !memory_equilibrium) {
+			// trigger a reap below
+			lowmem = true;
+		}
+
+		extern volatile unsigned int vm_page_speculative_count;
+		if ((above_min_free_bytes < 0LL && reserve_low &&
+		    !early_lots_free &&	!memory_equilibrium && !just_alloced) ||
+		    above_min_free_bytes <= -4LL*1024LL*1024LL) {
+			int64_t new_p = -1LL * above_min_free_bytes;
+			boolean_t fast = FALSE;
+			emergency_lowmem = true;
+			lowmem = true;
+			recent_lowmem = time_now;
+			last_disequilibrium = time_now_seconds;
+			int64_t spec_bytes = (int64_t)vm_page_speculative_count
+			    * (int64_t)PAGESIZE;
+			if (vm_page_free_wanted > 0 || new_p > spec_bytes) {
+				// force a stronger reaction from ARC if we are
+				// also low on speculative pages (xnu prefetched
+				// file blocks with no clients yet)
+				fast = TRUE;
+			}
+			spl_free_set_pressure_both(new_p, fast);
+		} else if (above_min_free_bytes < 0LL && !early_lots_free) {
+			lowmem = true;
+			if (recent_lowmem == 0)
+				recent_lowmem = time_now;
+			if (!memory_equilibrium)
+				last_disequilibrium = time_now_seconds;
+		}
+
+		new_spl_free += above_min_free_bytes;
+
+		/*
+		 * If we have already detected a memory shortage
+		 * and we have not reaped in a while (a short while
+		 * for emergency_lowmem), then do a kmem_reap() now.
+		 * See http://comments.gmane.org/gmane.os.illumos.devel/22552
+		 * (notably Richard Elling's "A kernel module can call
+		 * kmem_reap() whenever it wishes and some modules,
+		 * like zfs, do so." If we reap, stop processing spl_free
+		 * on this pass, to let the reaps (and arc, if pressure
+		 * has been set above) do their job for a few milliseconds.
+		 */
+		if (emergency_lowmem || lowmem) {
+			static uint64_t last_reap = 0;
+			uint64_t now = time_now;
+			uint64_t elapsed = 60*hz;
+			if (emergency_lowmem)
+				elapsed = 15*hz; // min.freq. kmem_reap_interval
+			if (now - last_reap > elapsed) {
+				last_reap = now;
+				/*
+				 * spl_free_reap_caches() calls functions
+				 * that will acquire locks and can take a while
+				 * so set spl_free to a small positive value
+				 * to stop arc shrinking too much during this
+				 * period when we expect to be freeing up
+				 * arc-usable memory, but low enough that
+				 * arc_no_grow likely will be set.
+				 */
+				const int64_t two_spamax = 32LL * 1024LL *
+				    1024LL;
+				if (spl_free < two_spamax)
+					spl_free = two_spamax; // atomic!
+				spl_free_reap_caches();
+				// we do not have any lock now, so we can jump
+				// to just before the thread-suspending code
+				goto justwait;
+			}
+		}
+
+		/*
+		 * a number or exceptions to reverse the lowmem
+		 * / emergency_lowmem states if we have recently reaped.
+		 * we also take the strong reaction sting out of
+		 * the set pressure by turning off spl_free_fast_pressure,
+		 * since that automatically provokes an arc shrink
+		 * and arc reap.
+		 */
+
+		if (!reserve_low || early_lots_free || memory_equilibrium ||
+		    just_alloced) {
+			lowmem = false;
+			emergency_lowmem = false;
+			spl_free_fast_pressure = FALSE;
+		}
+
+		if (vm_page_speculative_count > 0) {
+			/*
+			 * speculative memory can be squeezed a bit; it is
+			 * file blocks that have been prefetched by xnu but
+			 * are not (yet) in use by any consumer
+			 */
+			if (vm_page_speculative_count / 4 + vm_page_free_count >
+			    vm_page_free_min) {
+				emergency_lowmem = false;
+				spl_free_fast_pressure = FALSE;
+			}
+			if (vm_page_speculative_count / 2 + vm_page_free_count >
+			    vm_page_free_min) {
+				lowmem = false;
+				spl_free_fast_pressure = FALSE;
+			}
+		}
+
+		/*
+		 * Stay in a low memory condition for several seconds
+		 * after we first detect that we are in it, giving the
+		 * system (arc, xnu and userland) time to adapt
+		 */
+		if (!lowmem && recent_lowmem > 0) {
+			if (recent_lowmem + 4*hz < time_now)
+				lowmem = true;
+			else
+				recent_lowmem = 0;
+		}
+
+		/*
+		 * if we are in a lowmem "hangover", cure it with
+		 * pressure, then wait for the pressure to take
+		 * effect in arc.c code. triggered when we have had
+		 * at least one lowmem in the previous few seconds
+		 * -- possibly two (one that causes a reap, one
+		 * that falls through to the 4 second hold above).
+		 */
+		if (recent_lowmem == time_now && early_lots_free &&
+		    reserve_low) {
+			/*
+			 * we can't grab 64 MiB as a single segment,
+			 * but otherwise have ample memory brought in from xnu,
+			 * but recently we had lowmem... and still have lowmem.
+			 * cure this condition with a dose of pressure.
+			 */
+			if (above_min_free_bytes < 0) {
+				int64_t old_p = spl_free_manual_pressure;
+				if (old_p <= -above_min_free_bytes) {
+					recent_lowmem = 0;
+					spl_free_manual_pressure =
+					    -above_min_free_bytes;
+					goto justwait;
+				}
+			}
+		}
+
+		base = new_spl_free;
+
+		// adjust for available memory in spl_heap_arena
+		// cf arc_available_memory()
+		if (!emergency_lowmem) {
+			extern vmem_t *spl_default_arena;
+			int64_t heap_free = (int64_t)vmem_size_semi_atomic(
+			    spl_heap_arena, VMEM_FREE);
+			// grabbed buckets_free up above; we are OK with
+			// change to it in the meanwhile,
+			// it'll get an update on the next run.
+			int64_t combined_free = heap_free + buckets_free;
+
+			if (combined_free != 0) {
+				const int64_t mb = 1024*1024;
+				if (!lowmem && above_min_free_bytes >
+				    (int64_t)PAGESIZE * 10000LL) {
+					if (above_min_free_bytes < 64LL * mb)
+						new_spl_free += combined_free /
+						    16;
+					else if (above_min_free_bytes <
+					    128LL * mb)
+						new_spl_free += combined_free /
+						    8;
+					else if (above_min_free_bytes <
+					    256LL * mb)
+						new_spl_free += combined_free /
+						    4;
+					else
+						new_spl_free += combined_free /
+						    2;
+				} else {
+					new_spl_free -= 16LL * mb;
+				}
+			}
+
+			// memory footprint has gotten really big,
+			// decrease spl_free substantially
+			int64_t total_mem_used = (int64_t)
+			    segkmem_total_mem_allocated;
+			if ((segkmem_total_mem_allocated * 100LL /
+			    real_total_memory) > 70) {
+				new_spl_free -= total_mem_used / 64;
+			} else if ((segkmem_total_mem_allocated * 100LL /
+			    real_total_memory) > 75) {
+				new_spl_free -= total_mem_used / 32;
+				lowmem = true;
+			}
+		}
+
+		// Adjust in the face of a large ARC.
+		// We don't treat (zfs) metadata and non-metadata
+		// differently here, and leave policy with respect
+		// to the relative value of each up to arc.c.
+		// O3X arc.c does not (yet) take these arena sizes into
+		// account like Illumos's does.
+		uint64_t zio_size = vmem_size_semi_atomic(zio_arena_parent,
+		    VMEM_ALLOC | VMEM_FREE);
+		// wrap this in a basic block for lexical scope SSA convenience
+		if (zio_size > 0) {
+			static uint64_t zio_last_too_big = 0;
+			static int64_t imposed_cap = 75;
+			const uint64_t seconds_of_lower_cap = 10*hz;
+			uint64_t now = time_now;
+			uint32_t zio_pct = (uint32_t)(zio_size * 100ULL /
+			    real_total_memory);
+			// if not hungry for memory, shrink towards a
+			// 75% total memory cap on zfs_file_data
+			if (!lowmem && !emergency_lowmem && zio_pct > 75 &&
+			    (now > zio_last_too_big + seconds_of_lower_cap)) {
+				new_spl_free -= zio_size / 64;
+				zio_last_too_big = now;
+				imposed_cap = 75;
+			} else if (lowmem || emergency_lowmem) {
+				// shrink towards stricter caps if we are hungry
+				// for memory
+				const uint32_t lowmem_cap = 25;
+				const uint32_t emergency_lowmem_cap = 5;
+				// we don't want the lowest cap to be so low
+				// that we will not make any use of the fixed
+				// size reserve
+				if (lowmem && zio_pct > lowmem_cap) {
+					new_spl_free -= zio_size / 32;
+					zio_last_too_big = now;
+					imposed_cap = lowmem_cap;
+				}
+				if (emergency_lowmem && zio_pct >
+				    emergency_lowmem_cap) {
+					new_spl_free -= zio_size / 8;
+					zio_last_too_big = now;
+					imposed_cap = emergency_lowmem_cap;
+				}
+			}
+			if (zio_last_too_big != now &&
+			    now < zio_last_too_big + seconds_of_lower_cap &&
+			    zio_pct > imposed_cap) {
+				new_spl_free -= zio_size / 64;
+			}
+		}
+
+		// try to get 1/64 of spl_heap_arena freed up
+		if (emergency_lowmem && new_spl_free >= 0LL) {
+			extern vmem_t *spl_root_arena;
+			uint64_t root_size = vmem_size_semi_atomic(
+			    spl_heap_arena, VMEM_ALLOC | VMEM_FREE);
+			uint64_t root_free = vmem_size_semi_atomic(
+			    spl_heap_arena, VMEM_FREE);
+			int64_t difference = root_size - root_free;
+			int64_t target = root_size / 64;
+			if (difference < target) {
+				new_spl_free -= target;
+			}
+			// and we should definitely not be returning
+			// positive now
+			if (new_spl_free >= 0LL)
+				new_spl_free = -1024LL;
+		}
+
+		double delta = (double)new_spl_free - (double)last_spl_free;
+
+		boolean_t spl_free_is_negative = false;
+
+		if (new_spl_free < 0LL) {
+			spl_stats.spl_spl_free_negative_count.value.ui64++;
+			spl_free_is_negative = true;
+		}
+
+		// NOW set spl_free from calculated new_spl_free
+		spl_free = new_spl_free;
+		// the direct equivalent of :
+		// __c11_atomic_store(&spl_free, new_spl_free,
+		// __ATOMIC_SEQ_CST);
+
+		/*
+		 * Because we're already negative, arc is likely to have
+		 * been signalled already. We can rely on the _maybe_ in
+		 * spl-vmem.c:xnu_alloc_throttled() [XAT] to try to give
+		 * arc a kick with greater probability. However, if we've
+		 * gone negative several times, and have not tried a full
+		 * kick in a long time, do so now; if the full kick is
+		 * refused because there has been a kick too few minutes
+		 * ago, try a gentler kick. We do this outside the lock,
+		 * as spl_maybe_send_large_pressure may need to take a
+		 * mutex, and we forbid further mutex entry when
+		 * spl_free_lock is held.
+		 */
+
+		if (spl_free_is_negative) {
+			static volatile _Atomic uint32_t
+			    negatives_since_last_kick = 0;
+
+			if (negatives_since_last_kick++ > 8) {
+				if (spl_maybe_send_large_pressure(time_now, 360,
+				    true) ||
+				    spl_maybe_send_large_pressure(time_now, 60,
+				    false)) {
+					negatives_since_last_kick = 0;
+				}
+			}
+		}
+
+		if (lowmem)
+			recent_lowmem = time_now;
+
+		// maintain an exponential moving average for the ema kstat
+		if (last_update > hz)
+			alpha = 1.0;
+		else {
+			double td_tick  = (double)(time_now - last_update);
+			alpha = td_tick / (double)(hz*50.0); // roughly 0.02
+		}
+
+		ema_new = (alpha * delta) + (1.0 - alpha)*ema_old;
+		spl_free_delta_ema = ema_new;
+		ema_old = ema_new;
+
+	justwait:
+		mutex_enter(&spl_free_thread_lock);
+		CALLB_CPR_SAFE_BEGIN(&cpr);
+		(void) cv_timedwait_hires(&spl_free_thread_cv,
+		    &spl_free_thread_lock, MSEC2NSEC(10), 0, 0);
+		CALLB_CPR_SAFE_END(&cpr, &spl_free_thread_lock);
+	}
+	spl_free_thread_exit = FALSE;
+	printf("SPL: spl_free_thread_exit set to FALSE " \
+	    "and exiting: cv_broadcasting\n");
+	spl_free_manual_pressure = 0;
+	cv_broadcast(&spl_free_thread_cv);
+	CALLB_CPR_EXIT(&cpr);
+	printf("SPL: %s thread_exit\n", __func__);
+	thread_exit();
+}
+
+
+static int
+spl_kstat_update(kstat_t *ksp, int rw)
+{
+	spl_stats_t *ks = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE) {
+
+		if (ks->spl_spl_free_manual_pressure.value.i64 !=
+		    spl_free_manual_pressure) {
+			spl_free_set_pressure(
+			    ks->spl_spl_free_manual_pressure.value.i64 * 1024 *
+			    1024);
+			if (ks->spl_spl_free_manual_pressure.value.i64 > 0) {
+				spl_free_reap_caches();
+			}
+		}
+
+		if (ks->spl_spl_free_fast_pressure.value.i64 !=
+		    spl_free_fast_pressure) {
+			if (spl_free_wrapper() != 0) {
+				spl_free_set_fast_pressure(TRUE);
+			}
+		}
+
+		if (ks->spl_bucket_tunable_large_span.value.ui64 !=
+		    spl_bucket_tunable_large_span) {
+			spl_set_bucket_tunable_large_span(
+			    ks->spl_bucket_tunable_large_span.value.ui64);
+		}
+
+		if (ks->spl_bucket_tunable_small_span.value.ui64 !=
+		    spl_bucket_tunable_small_span) {
+			spl_set_bucket_tunable_small_span(
+			    ks->spl_bucket_tunable_small_span.value.ui64);
+		}
+
+		if (ks->spl_frag_max_walk.value.ui64 != spl_frag_max_walk) {
+			spl_frag_max_walk = ks->spl_frag_max_walk.value.ui64;
+		}
+
+		if (ks->kmem_free_to_slab_when_fragmented.value.ui64 !=
+		    kmem_free_to_slab_when_fragmented) {
+			kmem_free_to_slab_when_fragmented =
+			    ks->kmem_free_to_slab_when_fragmented.value.ui64;
+		}
+
+	} else {
+		ks->spl_os_alloc.value.ui64 = segkmem_total_mem_allocated;
+		ks->spl_active_threads.value.ui64 = zfs_threads;
+		ks->spl_active_mutex.value.ui64 = zfs_active_mutex;
+		ks->spl_active_rwlock.value.ui64 = zfs_active_rwlock;
+		ks->spl_active_tsd.value.ui64 = spl_tsd_size();
+		ks->spl_spl_free.value.i64 = spl_free;
+		ks->spl_spl_free_manual_pressure.value.i64 =
+		    spl_free_manual_pressure;
+		ks->spl_spl_free_fast_pressure.value.i64 =
+		    spl_free_fast_pressure;
+		ks->spl_spl_free_delta_ema.value.i64 = spl_free_delta_ema;
+		ks->spl_osif_malloc_success.value.ui64 =
+		    stat_osif_malloc_success;
+		ks->spl_osif_malloc_bytes.value.ui64 = stat_osif_malloc_bytes;
+		ks->spl_osif_free.value.ui64 = stat_osif_free;
+		ks->spl_osif_free_bytes.value.ui64 = stat_osif_free_bytes;
+		ks->spl_bucket_non_pow2_allocs.value.ui64 =
+		    spl_bucket_non_pow2_allocs;
+
+		ks->spl_vmem_unconditional_allocs.value.ui64 =
+		    spl_vmem_unconditional_allocs;
+		ks->spl_vmem_unconditional_alloc_bytes.value.ui64 =
+		    spl_vmem_unconditional_alloc_bytes;
+		ks->spl_vmem_conditional_allocs.value.ui64 =
+		    spl_vmem_conditional_allocs;
+		ks->spl_vmem_conditional_alloc_bytes.value.ui64 =
+		    spl_vmem_conditional_alloc_bytes;
+		ks->spl_vmem_conditional_alloc_deny.value.ui64 =
+		    spl_vmem_conditional_alloc_deny;
+		ks->spl_vmem_conditional_alloc_deny_bytes.value.ui64 =
+		    spl_vmem_conditional_alloc_deny_bytes;
+
+		ks->spl_xat_success.value.ui64 = spl_xat_success;
+		ks->spl_xat_late_success.value.ui64 = spl_xat_late_success;
+		ks->spl_xat_late_success_nosleep.value.ui64 =
+		    spl_xat_late_success_nosleep;
+		ks->spl_xat_pressured.value.ui64 = spl_xat_pressured;
+		ks->spl_xat_bailed.value.ui64 = spl_xat_bailed;
+		ks->spl_xat_bailed_contended.value.ui64 =
+		    spl_xat_bailed_contended;
+		ks->spl_xat_lastalloc.value.ui64 = spl_xat_lastalloc;
+		ks->spl_xat_lastfree.value.ui64 = spl_xat_lastfree;
+		ks->spl_xat_forced.value.ui64 = spl_xat_forced;
+		ks->spl_xat_sleep.value.ui64 = spl_xat_sleep;
+		ks->spl_xat_late_deny.value.ui64 = spl_xat_late_deny;
+		ks->spl_xat_no_waiters.value.ui64 = spl_xat_no_waiters;
+		ks->spl_xft_wait.value.ui64 = spl_xft_wait;
+
+		ks->spl_vba_parent_memory_appeared.value.ui64 =
+		    spl_vba_parent_memory_appeared;
+		ks->spl_vba_parent_memory_blocked.value.ui64 =
+		    spl_vba_parent_memory_blocked;
+		ks->spl_vba_hiprio_blocked.value.ui64 = spl_vba_hiprio_blocked;
+		ks->spl_vba_cv_timeout.value.ui64 = spl_vba_cv_timeout;
+		ks->spl_vba_loop_timeout.value.ui64 = spl_vba_loop_timeout;
+		ks->spl_vba_cv_timeout_blocked.value.ui64 =
+		    spl_vba_cv_timeout_blocked;
+		ks->spl_vba_loop_timeout_blocked.value.ui64 =
+		    spl_vba_loop_timeout_blocked;
+		ks->spl_vba_sleep.value.ui64 = spl_vba_sleep;
+		ks->spl_vba_loop_entries.value.ui64 = spl_vba_loop_entries;
+
+		ks->spl_bucket_tunable_large_span.value.ui64 =
+		    spl_bucket_tunable_large_span;
+		ks->spl_bucket_tunable_small_span.value.ui64 =
+		    spl_bucket_tunable_small_span;
+
+		ks->spl_buckets_mem_free.value.ui64 = spl_buckets_mem_free;
+		ks->spl_arc_no_grow_bits.value.ui64 = spl_arc_no_grow_bits;
+		ks->spl_arc_no_grow_count.value.ui64 = spl_arc_no_grow_count;
+
+		ks->spl_frag_max_walk.value.ui64 = spl_frag_max_walk;
+		ks->spl_frag_walked_out.value.ui64 = spl_frag_walked_out;
+		ks->spl_frag_walk_cnt.value.ui64 = spl_frag_walk_cnt;
+
+		ks->spl_arc_reclaim_avoided.value.ui64 =
+		    spl_arc_reclaim_avoided;
+
+		ks->kmem_free_to_slab_when_fragmented.value.ui64 =
+		    kmem_free_to_slab_when_fragmented;
+	}
+
+	return (0);
+}
+
+void
+spl_kmem_init(uint64_t xtotal_memory)
+{
+	int old_kmem_flags = kmem_flags;
+	int use_large_pages = 0;
+	size_t maxverify, minfirewall;
+
+	printf("SPL: KMEM starting. Total memory %llu\n", xtotal_memory);
+
+	// Initialise the kstat lock
+	mutex_init(&kmem_cache_lock, "kmem_cache_lock", MUTEX_DEFAULT, NULL);
+	mutex_init(&kmem_flags_lock, "kmem_flags_lock", MUTEX_DEFAULT, NULL);
+	mutex_init(&kmem_cache_kstat_lock, "kmem_kstat_lock", MUTEX_DEFAULT,
+	    NULL);
+
+	spl_kstat_init();
+
+
+	/*
+	 * Small-memory systems (< 24 MB) can't handle kmem_flags overhead.
+	 */
+	if (physmem < btop(24 << 20) && !(old_kmem_flags & KMF_STICKY))
+		kmem_flags = 0;
+
+	/*
+	 * Don't do firewalled allocations if the heap is less than 1TB
+	 * (i.e. on a 32-bit kernel)
+	 * The resulting VM_NEXTFIT allocations would create too much
+	 * fragmentation in a small heap.
+	 */
+	maxverify = minfirewall = PAGESIZE / 2;
+
+
+	/* LINTED */
+	ASSERT(sizeof (kmem_cpu_cache_t) == KMEM_CPU_CACHE_SIZE);
+
+	list_create(&kmem_caches, sizeof (kmem_cache_t),
+	    offsetof(kmem_cache_t, cache_link));
+
+	kernelheap_init();
+
+	kmem_metadata_arena = vmem_create("kmem_metadata", NULL, 0, PAGESIZE,
+	    vmem_alloc, vmem_free, heap_arena, 8 * PAGESIZE,
+	    VM_SLEEP | VMC_NO_QCACHE);
+
+	kmem_msb_arena = vmem_create("kmem_msb", NULL, 0,
+	    PAGESIZE, vmem_alloc, vmem_free, kmem_metadata_arena, 0,
+	    VMC_DUMPSAFE | VM_SLEEP);
+
+	kmem_cache_arena = vmem_create("kmem_cache", NULL, 0, KMEM_ALIGN,
+	    vmem_alloc, vmem_free, kmem_metadata_arena, 0, VM_SLEEP);
+
+	kmem_hash_arena = vmem_create("kmem_hash", NULL, 0, KMEM_ALIGN,
+	    vmem_alloc, vmem_free, kmem_metadata_arena, 0, VM_SLEEP);
+
+	kmem_log_arena = vmem_create("kmem_log", NULL, 0, KMEM_ALIGN,
+	    vmem_alloc, vmem_free, kmem_metadata_arena, 0, VM_SLEEP);
+
+	/* temporary oversize arena for mod_read_system_file */
+	kmem_oversize_arena = vmem_create("kmem_oversize", NULL, 0, PAGESIZE,
+	    vmem_alloc, vmem_free, heap_arena, 0, VM_SLEEP);
+
+	// statically declared above kmem_reap_interval = 15 * hz;
+
+	/*
+	 * Read /etc/system.  This is a chicken-and-egg problem because
+	 * kmem_flags may be set in /etc/system, but mod_read_system_file()
+	 * needs to use the allocator.  The simplest solution is to create
+	 * all the standard kmem caches, read /etc/system, destroy all the
+	 * caches we just created, and then create them all again in light
+	 * of the (possibly) new kmem_flags and other kmem tunables.
+	 */
+
+	if (old_kmem_flags & KMF_STICKY)
+		kmem_flags = old_kmem_flags;
+
+	if (!(kmem_flags & KMF_AUDIT))
+		vmem_seg_size = offsetof(vmem_seg_t, vs_thread);
+
+	if (kmem_maxverify == 0)
+		kmem_maxverify = maxverify;
+
+	if (kmem_minfirewall == 0)
+		kmem_minfirewall = minfirewall;
+
+	/*
+	 * give segkmem a chance to figure out if we are using large pages
+	 * for the kernel heap
+	 */
+	// use_large_pages = segkmem_lpsetup();
+	use_large_pages = 0;
+
+	/*
+	 * To protect against corruption, we keep the actual number of callers
+	 * KMF_LITE records seperate from the tunable.  We arbitrarily clamp
+	 * to 16, since the overhead for small buffers quickly gets out of
+	 * hand.
+	 *
+	 * The real limit would depend on the needs of the largest KMC_NOHASH
+	 * cache.
+	 */
+	kmem_lite_count = MIN(MAX(0, kmem_lite_pcs), 16);
+	kmem_lite_pcs = kmem_lite_count;
+
+	kmem_cache_init(2, use_large_pages);
+
+	if (kmem_flags & (KMF_AUDIT | KMF_RANDOMIZE)) {
+		if (kmem_transaction_log_size == 0)
+			kmem_transaction_log_size = MIN(kmem_maxavail() / 50ULL,
+			    PAGESIZE<<4);
+		kmem_transaction_log = kmem_log_init(kmem_transaction_log_size);
+	}
+
+	if (kmem_flags & (KMF_CONTENTS | KMF_RANDOMIZE)) {
+		if (kmem_content_log_size == 0)
+			kmem_content_log_size = MIN(kmem_maxavail() / 50ULL,
+			    PAGESIZE<<4);
+		kmem_content_log = kmem_log_init(kmem_content_log_size);
+	}
+
+	kmem_failure_log = kmem_log_init(kmem_failure_log_size);
+
+	kmem_slab_log = kmem_log_init(kmem_slab_log_size);
+
+	spl_tsd_init();
+	spl_rwlock_init();
+	spl_taskq_init();
+
+	/*
+	 * Warn about invalid or dangerous values of kmem_flags.
+	 * Always warn about unsupported values.
+	 */
+	if (((kmem_flags & ~(KMF_AUDIT | KMF_DEADBEEF | KMF_REDZONE |
+	    KMF_CONTENTS | KMF_LITE)) != 0) ||
+	    ((kmem_flags & KMF_LITE) && kmem_flags != KMF_LITE))
+		cmn_err(CE_WARN, "kmem_flags set to unsupported value 0x%x. "
+		    "See the Solaris Tunable Parameters Reference Manual.",
+		    kmem_flags);
+
+#ifdef DEBUG
+	if ((kmem_flags & KMF_DEBUG) == 0)
+		cmn_err(CE_NOTE, "kmem debugging disabled.");
+#else
+	/*
+	 * For non-debug kernels, the only "normal" flags are 0, KMF_LITE,
+	 * KMF_REDZONE, and KMF_CONTENTS (the last because it is only enabled
+	 * if KMF_AUDIT is set). We should warn the user about the performance
+	 * penalty of KMF_AUDIT or KMF_DEADBEEF if they are set and KMF_LITE
+	 * isn't set (since that disables AUDIT).
+	 */
+	if (!(kmem_flags & KMF_LITE) &&
+	    (kmem_flags & (KMF_AUDIT | KMF_DEADBEEF)) != 0)
+		cmn_err(CE_WARN, "High-overhead kmem debugging features "
+		    "enabled (kmem_flags = 0x%x).  Performance degradation "
+		    "and large memory overhead possible. See the Solaris "
+		    "Tunable Parameters Reference Manual.", kmem_flags);
+#endif /* not DEBUG */
+
+	segkmem_zio_init();
+
+	kmem_cache_applyall(kmem_cache_magazine_enable, NULL, TQ_SLEEP);
+
+	kmem_ready = 1;
+
+	// Install spl kstats
+	spl_ksp = kstat_create("spl", 0, "spl_misc", "misc", KSTAT_TYPE_NAMED,
+	    sizeof (spl_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
+
+	if (spl_ksp != NULL) {
+		spl_ksp->ks_data = &spl_stats;
+		spl_ksp->ks_update = spl_kstat_update;
+		kstat_install(spl_ksp);
+	}
+}
+
+void
+spl_kmem_fini(void)
+{
+
+	kmem_cache_applyall(kmem_cache_magazine_disable, NULL, TQ_SLEEP);
+
+	kstat_delete(spl_ksp);
+
+	kmem_log_fini(kmem_slab_log);
+	kmem_log_fini(kmem_failure_log);
+
+	if (kmem_flags & (KMF_CONTENTS | KMF_RANDOMIZE)) {
+		if (kmem_content_log_size == 0)
+			kmem_content_log_size = kmem_maxavail() / 50;
+		kmem_log_fini(kmem_content_log);
+	}
+
+	if (kmem_flags & (KMF_AUDIT | KMF_RANDOMIZE)) {
+		if (kmem_transaction_log_size == 0)
+			kmem_transaction_log_size = kmem_maxavail() / 50;
+		kmem_log_fini(kmem_transaction_log);
+	}
+
+	// Destroy all the "general allocation" caches
+	kmem_alloc_caches_destroy();
+
+	// Destroy the VA associated caches
+	kmem_destroy_cache_by_name(KMEM_VA_PREFIX);
+
+	kmem_qcache_destroy();
+	// Destroy metadata caches
+	kmem_cache_destroy(kmem_bufctl_cache);
+	kmem_cache_destroy(kmem_bufctl_audit_cache);
+	kmem_cache_destroy(kmem_slab_cache); // Dont think this one
+
+	// Some caches cannot be destroyed as
+	// they mutually reference each other.
+	// So we explicitly pull them apart piece-by-piece.
+	kmem_cache_fini();
+
+	segkmem_zio_fini();
+
+	// Now destroy the vmem arenas used by kmem.
+	vmem_destroy(kmem_default_arena);
+	vmem_destroy(kmem_va_arena);
+	vmem_destroy(kmem_oversize_arena);
+	vmem_destroy(kmem_log_arena);
+	vmem_destroy(kmem_hash_arena);
+	vmem_destroy(kmem_cache_arena);
+	vmem_destroy(kmem_msb_arena);
+	vmem_destroy(kmem_metadata_arena);
+
+	kernelheap_fini();
+
+	list_destroy(&kmem_caches);
+
+	mutex_destroy(&kmem_cache_kstat_lock);
+	mutex_destroy(&kmem_flags_lock);
+	mutex_destroy(&kmem_cache_lock);
+}
+
+static void
+kmem_move_init(void)
+{
+	kmem_defrag_cache = kmem_cache_create("kmem_defrag_cache",
+	    sizeof (kmem_defrag_t), 0, NULL, NULL, NULL, NULL,
+	    kmem_msb_arena, KMC_NOHASH);
+	kmem_move_cache = kmem_cache_create("kmem_move_cache",
+	    sizeof (kmem_move_t), 0, NULL, NULL, NULL, NULL,
+	    kmem_msb_arena, KMC_NOHASH);
+
+	/*
+	 * kmem guarantees that move callbacks are sequential and that even
+	 * across multiple caches no two moves ever execute simultaneously.
+	 * Move callbacks are processed on a separate taskq so that client code
+	 * does not interfere with internal maintenance tasks.
+	 */
+	kmem_move_taskq = taskq_create("kmem_move_taskq", 1,
+	    minclsyspri, 100, INT_MAX, TASKQ_PREPOPULATE);
+}
+
+void
+kmem_move_fini(void)
+{
+
+	taskq_wait(kmem_move_taskq);
+	taskq_destroy(kmem_move_taskq);
+	kmem_move_taskq = 0;
+
+	kmem_cache_destroy(kmem_move_cache);
+	kmem_cache_destroy(kmem_defrag_cache);
+
+}
+
+void
+spl_kmem_thread_init(void)
+{
+	kmem_move_init();
+
+	// Initialize the spl_free locks
+	mutex_init(&spl_free_thread_lock, "spl_free_thead_lock", MUTEX_DEFAULT,
+	    NULL);
+
+	kmem_taskq = taskq_create("kmem_taskq", 1, minclsyspri,
+	    300, INT_MAX, TASKQ_PREPOPULATE);
+
+	spl_free_thread_exit = FALSE;
+	(void) cv_init(&spl_free_thread_cv, NULL, CV_DEFAULT, NULL);
+	(void) thread_create(NULL, 0, spl_free_thread, 0, 0, 0, 0, 92);
+}
+
+void
+spl_kmem_thread_fini(void)
+{
+	shutting_down = 1;
+
+	mutex_enter(&spl_free_thread_lock);
+	spl_free_thread_exit = TRUE;
+	while (spl_free_thread_exit) {
+		cv_signal(&spl_free_thread_cv);
+		cv_wait(&spl_free_thread_cv, &spl_free_thread_lock);
+	}
+	mutex_exit(&spl_free_thread_lock);
+	cv_destroy(&spl_free_thread_cv);
+	mutex_destroy(&spl_free_thread_lock);
+
+	bsd_untimeout(kmem_update,  0);
+	bsd_untimeout(kmem_reap_timeout, &kmem_reaping);
+	bsd_untimeout(kmem_reap_timeout, &kmem_reaping_idspace);
+
+	taskq_wait(kmem_taskq);
+
+	taskq_destroy(kmem_taskq);
+	kmem_taskq = 0;
+
+	kmem_move_fini();
+
+}
+
+void
+spl_kmem_mp_init(void)
+{
+	kmem_update_timeout(NULL);
+}
+
+/*
+ * Return the slab of the allocated buffer, or NULL if the buffer is not
+ * allocated. This function may be called with a known slab address to determine
+ * whether or not the buffer is allocated, or with a NULL slab address to obtain
+ * an allocated buffer's slab.
+ */
+static kmem_slab_t *
+kmem_slab_allocated(kmem_cache_t *cp, kmem_slab_t *sp, void *buf)
+{
+	kmem_bufctl_t *bcp, *bufbcp;
+
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	ASSERT(sp == NULL || KMEM_SLAB_MEMBER(sp, buf));
+
+	if (cp->cache_flags & KMF_HASH) {
+		for (bcp = *KMEM_HASH(cp, buf);
+		    (bcp != NULL) && (bcp->bc_addr != buf);
+		    bcp = bcp->bc_next) {
+			continue;
+		}
+		ASSERT(sp != NULL && bcp != NULL ? sp == bcp->bc_slab : 1);
+		return (bcp == NULL ? NULL : bcp->bc_slab);
+	}
+
+	if (sp == NULL) {
+		sp = KMEM_SLAB(cp, buf);
+	}
+	bufbcp = KMEM_BUFCTL(cp, buf);
+	for (bcp = sp->slab_head;
+	    (bcp != NULL) && (bcp != bufbcp);
+	    bcp = bcp->bc_next) {
+		continue;
+	}
+	return (bcp == NULL ? sp : NULL);
+}
+
+static boolean_t
+kmem_slab_is_reclaimable(kmem_cache_t *cp, kmem_slab_t *sp, int flags)
+{
+	long refcnt = sp->slab_refcnt;
+
+	ASSERT(cp->cache_defrag != NULL);
+
+	/*
+	 * For code coverage we want to be able to move an object within the
+	 * same slab (the only partial slab) even if allocating the destination
+	 * buffer resulted in a completely allocated slab.
+	 */
+	if (flags & KMM_DEBUG) {
+		return ((flags & KMM_DESPERATE) ||
+		    ((sp->slab_flags & KMEM_SLAB_NOMOVE) == 0));
+	}
+
+	/* If we're desperate, we don't care if the client said NO. */
+	if (flags & KMM_DESPERATE) {
+		return (refcnt < sp->slab_chunks); /* any partial */
+	}
+
+	if (sp->slab_flags & KMEM_SLAB_NOMOVE) {
+		return (B_FALSE);
+	}
+
+	if ((refcnt == 1) || kmem_move_any_partial) {
+		return (refcnt < sp->slab_chunks);
+	}
+
+	/*
+	 * The reclaim threshold is adjusted at each kmem_cache_scan() so that
+	 * slabs with a progressively higher percentage of used buffers can be
+	 * reclaimed until the cache as a whole is no longer fragmented.
+	 *
+	 *	sp->slab_refcnt   kmd_reclaim_numer
+	 *	--------------- < ------------------
+	 *	sp->slab_chunks   KMEM_VOID_FRACTION
+	 */
+	return ((refcnt * KMEM_VOID_FRACTION) <
+	    (sp->slab_chunks * cp->cache_defrag->kmd_reclaim_numer));
+}
+
+/*
+ * May be called from the kmem_move_taskq, from kmem_cache_move_notify_task(),
+ * or when the buffer is freed.
+ */
+static void
+kmem_slab_move_yes(kmem_cache_t *cp, kmem_slab_t *sp, void *from_buf)
+{
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	ASSERT(KMEM_SLAB_MEMBER(sp, from_buf));
+
+	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
+		return;
+	}
+
+	if (sp->slab_flags & KMEM_SLAB_NOMOVE) {
+		if (KMEM_SLAB_OFFSET(sp, from_buf) == sp->slab_stuck_offset) {
+			avl_remove(&cp->cache_partial_slabs, sp);
+			sp->slab_flags &= ~KMEM_SLAB_NOMOVE;
+			sp->slab_stuck_offset = (uint32_t)-1;
+			avl_add(&cp->cache_partial_slabs, sp);
+		}
+	} else {
+		sp->slab_later_count = 0;
+		sp->slab_stuck_offset = (uint32_t)-1;
+	}
+}
+
+static void
+kmem_slab_move_no(kmem_cache_t *cp, kmem_slab_t *sp, void *from_buf)
+{
+	ASSERT(taskq_member(kmem_move_taskq, curthread));
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	ASSERT(KMEM_SLAB_MEMBER(sp, from_buf));
+
+	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
+		return;
+	}
+
+	avl_remove(&cp->cache_partial_slabs, sp);
+	sp->slab_later_count = 0;
+	sp->slab_flags |= KMEM_SLAB_NOMOVE;
+	sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp, from_buf);
+	avl_add(&cp->cache_partial_slabs, sp);
+}
+
+static void kmem_move_end(kmem_cache_t *, kmem_move_t *);
+
+/*
+ * The move callback takes two buffer addresses, the buffer to be moved, and a
+ * newly allocated and constructed buffer selected by kmem as the destination.
+ * It also takes the size of the buffer and an optional user argument specified
+ * at cache creation time. kmem guarantees that the buffer to be moved has not
+ * been unmapped by the virtual memory subsystem. Beyond that, it cannot
+ * guarantee the present whereabouts of the buffer to be moved, so it is up to
+ * the client to safely determine whether or not it is still using the buffer.
+ * The client must not free either of the buffers passed to the move callback,
+ * since kmem wants to free them directly to the slab layer. The client response
+ * tells kmem which of the two buffers to free:
+ *
+ * YES		kmem frees the old buffer (the move was successful)
+ * NO		kmem frees the new buffer, marks the slab of the old buffer
+ *              non-reclaimable to avoid bothering the client again
+ * LATER	kmem frees the new buffer, increments slab_later_count
+ * DONT_KNOW	kmem frees the new buffer
+ * DONT_NEED	kmem frees both the old buffer and the new buffer
+ *
+ * The pending callback argument now being processed contains both of the
+ * buffers (old and new) passed to the move callback function, the slab of the
+ * old buffer, and flags related to the move request, such as whether or not the
+ * system was desperate for memory.
+ *
+ * Slabs are not freed while there is a pending callback, but instead are kept
+ * on a deadlist, which is drained after the last callback completes. This means
+ * that slabs are safe to access until kmem_move_end(), no matter how many of
+ * their buffers have been freed. Once slab_refcnt reaches zero, it stays at
+ * zero for as long as the slab remains on the deadlist and until the slab is
+ * freed.
+ */
+static void
+kmem_move_buffer(kmem_move_t *callback)
+{
+	kmem_cbrc_t response;
+	kmem_slab_t *sp = callback->kmm_from_slab;
+	kmem_cache_t *cp = sp->slab_cache;
+	boolean_t free_on_slab;
+
+	ASSERT(taskq_member(kmem_move_taskq, curthread));
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+	ASSERT(KMEM_SLAB_MEMBER(sp, callback->kmm_from_buf));
+
+	/*
+	 * The number of allocated buffers on the slab may have changed since we
+	 * last checked the slab's reclaimability (when the pending move was
+	 * enqueued), or the client may have responded NO when asked to move
+	 * another buffer on the same slab.
+	 */
+	if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
+		kmem_slab_free(cp, callback->kmm_to_buf);
+		kmem_move_end(cp, callback);
+		return;
+	}
+
+	/*
+	 * Checking the slab layer is easy, so we might as well do that here
+	 * in case we can avoid bothering the client.
+	 */
+	mutex_enter(&cp->cache_lock);
+	free_on_slab = (kmem_slab_allocated(cp, sp,
+	    callback->kmm_from_buf) == NULL);
+	mutex_exit(&cp->cache_lock);
+
+	if (free_on_slab) {
+		kmem_slab_free(cp, callback->kmm_to_buf);
+		kmem_move_end(cp, callback);
+		return;
+	}
+
+	if (cp->cache_flags & KMF_BUFTAG) {
+		/*
+		 * Make kmem_cache_alloc_debug() apply the constructor for us.
+		 */
+		if (kmem_cache_alloc_debug(cp, callback->kmm_to_buf,
+		    KM_NOSLEEP, 1, caller()) != 0) {
+			kmem_move_end(cp, callback);
+			return;
+		}
+	} else if (cp->cache_constructor != NULL &&
+	    cp->cache_constructor(callback->kmm_to_buf, cp->cache_private,
+	    KM_NOSLEEP) != 0) {
+		atomic_inc_64(&cp->cache_alloc_fail);
+		kmem_slab_free(cp, callback->kmm_to_buf);
+		kmem_move_end(cp, callback);
+		return;
+	}
+
+	cp->cache_defrag->kmd_callbacks++;
+	cp->cache_defrag->kmd_thread = spl_current_thread();
+	cp->cache_defrag->kmd_from_buf = callback->kmm_from_buf;
+	cp->cache_defrag->kmd_to_buf = callback->kmm_to_buf;
+	DTRACE_PROBE2(kmem__move__start, kmem_cache_t *, cp, kmem_move_t *,
+	    callback);
+
+	response = cp->cache_move(callback->kmm_from_buf,
+	    callback->kmm_to_buf, cp->cache_bufsize, cp->cache_private);
+
+	DTRACE_PROBE3(kmem__move__end, kmem_cache_t *, cp, kmem_move_t *,
+	    callback, kmem_cbrc_t, response);
+	cp->cache_defrag->kmd_thread = NULL;
+	cp->cache_defrag->kmd_from_buf = NULL;
+	cp->cache_defrag->kmd_to_buf = NULL;
+
+	if (response == KMEM_CBRC_YES) {
+		cp->cache_defrag->kmd_yes++;
+		kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
+		/* slab safe to access until kmem_move_end() */
+		if (sp->slab_refcnt == 0)
+			cp->cache_defrag->kmd_slabs_freed++;
+		mutex_enter(&cp->cache_lock);
+		kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
+		mutex_exit(&cp->cache_lock);
+		kmem_move_end(cp, callback);
+		return;
+	}
+
+	switch (response) {
+		case KMEM_CBRC_NO:
+			cp->cache_defrag->kmd_no++;
+			mutex_enter(&cp->cache_lock);
+			kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
+			mutex_exit(&cp->cache_lock);
+			break;
+		case KMEM_CBRC_LATER:
+			cp->cache_defrag->kmd_later++;
+			mutex_enter(&cp->cache_lock);
+			if (!KMEM_SLAB_IS_PARTIAL(sp)) {
+				mutex_exit(&cp->cache_lock);
+				break;
+			}
+
+			if (++sp->slab_later_count >= KMEM_DISBELIEF) {
+				kmem_slab_move_no(cp, sp,
+				    callback->kmm_from_buf);
+			} else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
+				sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
+				    callback->kmm_from_buf);
+			}
+			mutex_exit(&cp->cache_lock);
+			break;
+		case KMEM_CBRC_DONT_NEED:
+			cp->cache_defrag->kmd_dont_need++;
+			kmem_slab_free_constructed(cp, callback->kmm_from_buf,
+			    B_FALSE);
+			if (sp->slab_refcnt == 0)
+				cp->cache_defrag->kmd_slabs_freed++;
+			mutex_enter(&cp->cache_lock);
+			kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
+			mutex_exit(&cp->cache_lock);
+			break;
+		case KMEM_CBRC_DONT_KNOW:
+			/*
+			 * If we don't know if we can move this buffer or not,
+			 * we'll just assume that we can't:  if the buffer is
+			 * in fact free, then it is sitting in one of the
+			 * per-CPU magazines or in a full magazine in the depot
+			 * layer.  Either way, because defrag is induced in the
+			 * same logic that reaps a cache, it's likely that full
+			 * magazines will be returned to the system soon
+			 * (thereby accomplishing what we're trying to
+			 * accomplish here: return those magazines to their
+			 * slabs). Given this, any work that we might do now to
+			 * locate a buffer in a magazine is wasted (and
+			 * expensive!) work; we bump a counter in this case and
+			 * otherwise assume that we can't move it.
+			 */
+			cp->cache_defrag->kmd_dont_know++;
+			break;
+		default:
+			panic("'%s' (%p) unexpected move callback "
+			    "response %d\n", cp->cache_name, (void *)cp,
+			    response);
+	}
+
+	kmem_slab_free_constructed(cp, callback->kmm_to_buf, B_FALSE);
+	kmem_move_end(cp, callback);
+}
+
+/* Return B_FALSE if there is insufficient memory for the move request. */
+static boolean_t
+kmem_move_begin(kmem_cache_t *cp, kmem_slab_t *sp, void *buf, int flags)
+{
+	void *to_buf;
+	avl_index_t index;
+	kmem_move_t *callback, *pending;
+	ulong_t n;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+	ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
+
+	callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
+
+	if (callback == NULL)
+		return (B_FALSE);
+
+	callback->kmm_from_slab = sp;
+	callback->kmm_from_buf = buf;
+	callback->kmm_flags = flags;
+
+	mutex_enter(&cp->cache_lock);
+
+	n = avl_numnodes(&cp->cache_partial_slabs);
+	if ((n == 0) || ((n == 1) && !(flags & KMM_DEBUG))) {
+		mutex_exit(&cp->cache_lock);
+		kmem_cache_free(kmem_move_cache, callback);
+		return (B_TRUE); /* there is no need for the move request */
+	}
+
+	pending = avl_find(&cp->cache_defrag->kmd_moves_pending, buf, &index);
+	if (pending != NULL) {
+		/*
+		 * If the move is already pending and we're desperate now,
+		 * update the move flags.
+		 */
+		if (flags & KMM_DESPERATE) {
+			pending->kmm_flags |= KMM_DESPERATE;
+		}
+		mutex_exit(&cp->cache_lock);
+		kmem_cache_free(kmem_move_cache, callback);
+		return (B_TRUE);
+	}
+
+	to_buf = kmem_slab_alloc_impl(cp, avl_first(&cp->cache_partial_slabs),
+	    B_FALSE);
+	callback->kmm_to_buf = to_buf;
+	avl_insert(&cp->cache_defrag->kmd_moves_pending, callback, index);
+
+	mutex_exit(&cp->cache_lock);
+
+	if (!taskq_dispatch(kmem_move_taskq, (task_func_t *)kmem_move_buffer,
+	    callback, TQ_NOSLEEP)) {
+		mutex_enter(&cp->cache_lock);
+		avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
+		mutex_exit(&cp->cache_lock);
+		kmem_slab_free(cp, to_buf);
+		kmem_cache_free(kmem_move_cache, callback);
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static void
+kmem_move_end(kmem_cache_t *cp, kmem_move_t *callback)
+{
+	avl_index_t index;
+
+	ASSERT(cp->cache_defrag != NULL);
+	ASSERT(taskq_member(kmem_move_taskq, curthread));
+	ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
+
+	mutex_enter(&cp->cache_lock);
+	VERIFY(avl_find(&cp->cache_defrag->kmd_moves_pending,
+	    callback->kmm_from_buf, &index) != NULL);
+	avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
+	if (avl_is_empty(&cp->cache_defrag->kmd_moves_pending)) {
+		list_t *deadlist = &cp->cache_defrag->kmd_deadlist;
+		kmem_slab_t *sp;
+
+		/*
+		 * The last pending move completed. Release all slabs
+		 * from the front of the dead list except for any slab
+		 * at the tail that needs to be released from the context
+		 * of kmem_move_buffers(). kmem deferred unmapping the
+		 * buffers on these slabs in order to guarantee that
+		 * buffers passed to the move callback have been touched
+		 * only by kmem or by the client itself.
+		 */
+		while ((sp = list_remove_head(deadlist)) != NULL) {
+			if (sp->slab_flags & KMEM_SLAB_MOVE_PENDING) {
+				list_insert_tail(deadlist, sp);
+				break;
+			}
+			cp->cache_defrag->kmd_deadcount--;
+			cp->cache_slab_destroy++;
+			mutex_exit(&cp->cache_lock);
+			kmem_slab_destroy(cp, sp);
+			mutex_enter(&cp->cache_lock);
+		}
+	}
+	mutex_exit(&cp->cache_lock);
+	kmem_cache_free(kmem_move_cache, callback);
+}
+
+/*
+ * Move buffers from least used slabs first by scanning backwards from the end
+ * of the partial slab list. Scan at most max_scan candidate slabs and move
+ * buffers from at most max_slabs slabs (0 for all partial slabs in both cases).
+ * If desperate to reclaim memory, move buffers from any partial slab, otherwise
+ * skip slabs with a ratio of allocated buffers at or above the current
+ * threshold. Return the number of unskipped slabs (at most max_slabs, -1 if the
+ * scan is aborted) so that the caller can adjust the reclaimability threshold
+ * depending on how many reclaimable slabs it finds.
+ *
+ * kmem_move_buffers() drops and reacquires cache_lock every time it issues a
+ * move request, since it is not valid for kmem_move_begin() to call
+ * kmem_cache_alloc() or taskq_dispatch() with cache_lock held.
+ */
+static int
+kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
+    int flags)
+{
+	kmem_slab_t *sp;
+	void *buf;
+	int i, j; /* slab index, buffer index */
+	int s; /* reclaimable slabs */
+	int b; /* allocated (movable) buffers on reclaimable slab */
+	boolean_t success;
+	int refcnt;
+	int nomove;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	ASSERT(kmem_move_cache != NULL);
+	ASSERT(cp->cache_move != NULL && cp->cache_defrag != NULL);
+	ASSERT((flags & KMM_DEBUG) ? !avl_is_empty(&cp->cache_partial_slabs) :
+	    avl_numnodes(&cp->cache_partial_slabs) > 1);
+
+	if (kmem_move_blocked) {
+		return (0);
+	}
+
+	if (kmem_move_fulltilt) {
+		flags |= KMM_DESPERATE;
+	}
+
+	if (max_scan == 0 || (flags & KMM_DESPERATE)) {
+		/*
+		 * Scan as many slabs as needed to find the desired number of
+		 * candidate slabs.
+		 */
+		max_scan = (size_t)-1;
+	}
+
+	if (max_slabs == 0 || (flags & KMM_DESPERATE)) {
+		/* Find as many candidate slabs as possible. */
+		max_slabs = (size_t)-1;
+	}
+
+	sp = avl_last(&cp->cache_partial_slabs);
+	ASSERT(KMEM_SLAB_IS_PARTIAL(sp));
+	for (i = 0, s = 0; (i < max_scan) && (s < max_slabs) && (sp != NULL) &&
+	    ((sp != avl_first(&cp->cache_partial_slabs)) ||
+	    (flags & KMM_DEBUG));
+	    sp = AVL_PREV(&cp->cache_partial_slabs, sp), i++) {
+
+		if (!kmem_slab_is_reclaimable(cp, sp, flags)) {
+			continue;
+		}
+		s++;
+
+		/* Look for allocated buffers to move. */
+		for (j = 0, b = 0, buf = sp->slab_base;
+		    (j < sp->slab_chunks) && (b < sp->slab_refcnt);
+		    buf = (((char *)buf) + cp->cache_chunksize), j++) {
+
+			if (kmem_slab_allocated(cp, sp, buf) == NULL) {
+				continue;
+			}
+
+			b++;
+
+			/*
+			 * Prevent the slab from being destroyed while we drop
+			 * cache_lock and while the pending move is not yet
+			 * registered. Flag the pending move while
+			 * kmd_moves_pending may still be empty, since we can't
+			 * yet rely on a non-zero pending move count to prevent
+			 * the slab from being destroyed.
+			 */
+			ASSERT(!(sp->slab_flags & KMEM_SLAB_MOVE_PENDING));
+			sp->slab_flags |= KMEM_SLAB_MOVE_PENDING;
+			/*
+			 * Recheck refcnt and nomove after reacquiring the lock,
+			 * since these control the order of partial slabs, and
+			 * we want to know if we can pick up the scan where we
+			 * left off.
+			 */
+			refcnt = sp->slab_refcnt;
+			nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
+			mutex_exit(&cp->cache_lock);
+
+			success = kmem_move_begin(cp, sp, buf, flags);
+
+			/*
+			 * Now, before the lock is reacquired, kmem could
+			 * process all pending move requests and purge the
+			 * deadlist, so that upon reacquiring the lock, sp has
+			 * been remapped. Or, the client may free all the
+			 * objects on the slab while the pending moves are still
+			 * on the taskq. Therefore, the KMEM_SLAB_MOVE_PENDING
+			 * flag causes the slab to be put at the end of the
+			 * deadlist and prevents it from being destroyed, since
+			 * we plan to destroy it here after reacquiring the
+			 * lock.
+			 */
+			mutex_enter(&cp->cache_lock);
+			ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
+			sp->slab_flags &= ~KMEM_SLAB_MOVE_PENDING;
+
+			if (sp->slab_refcnt == 0) {
+				list_t *deadlist =
+				    &cp->cache_defrag->kmd_deadlist;
+				list_remove(deadlist, sp);
+
+				if (!avl_is_empty(
+				    &cp->cache_defrag->kmd_moves_pending)) {
+					/*
+					 * A pending move makes it unsafe to
+					 * destroy the slab, because even though
+					 * the move is no longer needed, the
+					 * context where that is determined
+					 * requires the slab to exist.
+					 * Fortunately, a pending move also
+					 * means we don't need to destroy the
+					 * slab here, since it will get
+					 * destroyed along with any other slabs
+					 * on the deadlist after the last
+					 * pending move completes.
+					 */
+					list_insert_head(deadlist, sp);
+					return (-1);
+				}
+
+				/*
+				 * Destroy the slab now if it was completely
+				 * freed while we dropped cache_lock and there
+				 * are no pending moves. Since slab_refcnt
+				 * cannot change once it reaches zero, no new
+				 * pending moves from that slab are possible.
+				 */
+				cp->cache_defrag->kmd_deadcount--;
+				cp->cache_slab_destroy++;
+				mutex_exit(&cp->cache_lock);
+				kmem_slab_destroy(cp, sp);
+				mutex_enter(&cp->cache_lock);
+				/*
+				 * Since we can't pick up the scan where we left
+				 * off, abort the scan and say nothing about the
+				 * number of reclaimable slabs.
+				 */
+				return (-1);
+			}
+
+			if (!success) {
+				/*
+				 * Abort the scan if there is not enough memory
+				 * for the request and say nothing about the
+				 * number of reclaimable slabs.
+				 */
+				return (-1);
+			}
+
+			/*
+			 * The slab's position changed while the lock was
+			 * dropped, so we don't know where we are in the
+			 * sequence any more.
+			 */
+			if (sp->slab_refcnt != refcnt) {
+				/*
+				 * If this is a KMM_DEBUG move, the slab_refcnt
+				 * may have changed because we allocated a
+				 * destination buffer on the same slab. In that
+				 * case, we're not interested in counting it.
+				 */
+				return (-1);
+			}
+			if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove)
+				return (-1);
+
+			/*
+			 * Generating a move request allocates a destination
+			 * buffer from the slab layer, bumping the first partial
+			 * slab if it is completely allocated. If the current
+			 * slab becomes the first partial slab as a result, we
+			 * can't continue to scan backwards.
+			 *
+			 * If this is a KMM_DEBUG move and we allocated the
+			 * destination buffer from the last partial slab, then
+			 * the buffer we're moving is on the same slab and our
+			 * slab_refcnt has changed, causing us to return before
+			 * reaching here if there are no partial slabs left.
+			 */
+			ASSERT(!avl_is_empty(&cp->cache_partial_slabs));
+			if (sp == avl_first(&cp->cache_partial_slabs)) {
+				/*
+				 * We're not interested in a second KMM_DEBUG
+				 * move.
+				 */
+				goto end_scan;
+			}
+		}
+	}
+end_scan:
+
+	return (s);
+}
+
+typedef struct kmem_move_notify_args {
+	kmem_cache_t *kmna_cache;
+	void *kmna_buf;
+} kmem_move_notify_args_t;
+
+static void
+kmem_cache_move_notify_task(void *arg)
+{
+	kmem_move_notify_args_t *args = arg;
+	kmem_cache_t *cp = args->kmna_cache;
+	void *buf = args->kmna_buf;
+	kmem_slab_t *sp;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+	ASSERT(list_link_active(&cp->cache_link));
+
+	zfs_kmem_free(args, sizeof (kmem_move_notify_args_t));
+	mutex_enter(&cp->cache_lock);
+	sp = kmem_slab_allocated(cp, NULL, buf);
+
+	/* Ignore the notification if the buffer is no longer allocated. */
+	if (sp == NULL) {
+		mutex_exit(&cp->cache_lock);
+		return;
+	}
+
+	/* Ignore the notification if there's no reason to move the buffer. */
+	if (avl_numnodes(&cp->cache_partial_slabs) > 1) {
+		/*
+		 * So far the notification is not ignored. Ignore the
+		 * notification if the slab is not marked by an earlier refusal
+		 * to move a buffer.
+		 */
+		if (!(sp->slab_flags & KMEM_SLAB_NOMOVE) &&
+		    (sp->slab_later_count == 0)) {
+			mutex_exit(&cp->cache_lock);
+			return;
+		}
+
+		kmem_slab_move_yes(cp, sp, buf);
+		ASSERT(!(sp->slab_flags & KMEM_SLAB_MOVE_PENDING));
+		sp->slab_flags |= KMEM_SLAB_MOVE_PENDING;
+		mutex_exit(&cp->cache_lock);
+		/* see kmem_move_buffers() about dropping the lock */
+		(void) kmem_move_begin(cp, sp, buf, KMM_NOTIFY);
+		mutex_enter(&cp->cache_lock);
+		ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
+		sp->slab_flags &= ~KMEM_SLAB_MOVE_PENDING;
+		if (sp->slab_refcnt == 0) {
+			list_t *deadlist = &cp->cache_defrag->kmd_deadlist;
+			list_remove(deadlist, sp);
+
+			if (!avl_is_empty(
+			    &cp->cache_defrag->kmd_moves_pending)) {
+				list_insert_head(deadlist, sp);
+				mutex_exit(&cp->cache_lock);
+				return;
+			}
+
+			cp->cache_defrag->kmd_deadcount--;
+			cp->cache_slab_destroy++;
+			mutex_exit(&cp->cache_lock);
+			kmem_slab_destroy(cp, sp);
+			return;
+		}
+	} else {
+		kmem_slab_move_yes(cp, sp, buf);
+	}
+	mutex_exit(&cp->cache_lock);
+}
+
+void
+kmem_cache_move_notify(kmem_cache_t *cp, void *buf)
+{
+	kmem_move_notify_args_t *args;
+
+	args = zfs_kmem_alloc(sizeof (kmem_move_notify_args_t), KM_NOSLEEP);
+	if (args != NULL) {
+		args->kmna_cache = cp;
+		args->kmna_buf = buf;
+		if (!taskq_dispatch(kmem_taskq,
+		    (task_func_t *)kmem_cache_move_notify_task, args,
+		    TQ_NOSLEEP))
+			zfs_kmem_free(args, sizeof (kmem_move_notify_args_t));
+	}
+}
+
+static void
+kmem_cache_defrag(kmem_cache_t *cp)
+{
+	size_t n;
+
+	ASSERT(cp->cache_defrag != NULL);
+
+	mutex_enter(&cp->cache_lock);
+	n = avl_numnodes(&cp->cache_partial_slabs);
+	if (n > 1) {
+		/* kmem_move_buffers() drops and reacquires cache_lock */
+		cp->cache_defrag->kmd_defrags++;
+		(void) kmem_move_buffers(cp, n, 0, KMM_DESPERATE);
+	}
+	mutex_exit(&cp->cache_lock);
+}
+
+/* Is this cache above the fragmentation threshold? */
+static boolean_t
+kmem_cache_frag_threshold(kmem_cache_t *cp, uint64_t nfree)
+{
+	/*
+	 *	nfree		kmem_frag_numer
+	 * ------------------ > ---------------
+	 * cp->cache_buftotal	kmem_frag_denom
+	 */
+	return ((nfree * kmem_frag_denom) >
+	    (cp->cache_buftotal * kmem_frag_numer));
+}
+
+static boolean_t
+kmem_cache_is_fragmented(kmem_cache_t *cp, boolean_t *doreap)
+{
+	boolean_t fragmented;
+	uint64_t nfree;
+
+	ASSERT(MUTEX_HELD(&cp->cache_lock));
+	*doreap = B_FALSE;
+
+	if (kmem_move_fulltilt) {
+		if (avl_numnodes(&cp->cache_partial_slabs) > 1) {
+			return (B_TRUE);
+		}
+	} else {
+		if ((cp->cache_complete_slab_count + avl_numnodes(
+		    &cp->cache_partial_slabs)) < kmem_frag_minslabs) {
+			return (B_FALSE);
+		}
+	}
+
+	nfree = cp->cache_bufslab;
+	fragmented = ((avl_numnodes(&cp->cache_partial_slabs) > 1) &&
+	    kmem_cache_frag_threshold(cp, nfree));
+
+	/*
+	 * Free buffers in the magazine layer appear allocated from the point of
+	 * view of the slab layer. We want to know if the slab layer would
+	 * appear fragmented if we included free buffers from magazines that
+	 * have fallen out of the working set.
+	 */
+	if (!fragmented) {
+		long reap;
+
+		mutex_enter(&cp->cache_depot_lock);
+		reap = MIN(cp->cache_full.ml_reaplimit, cp->cache_full.ml_min);
+		reap = MIN(reap, cp->cache_full.ml_total);
+		mutex_exit(&cp->cache_depot_lock);
+
+		nfree += ((uint64_t)reap * cp->cache_magtype->mt_magsize);
+		if (kmem_cache_frag_threshold(cp, nfree)) {
+			*doreap = B_TRUE;
+		}
+	}
+
+	return (fragmented);
+}
+
+/* Called periodically from kmem_taskq */
+static void
+kmem_cache_scan(kmem_cache_t *cp)
+{
+	boolean_t reap = B_FALSE;
+	kmem_defrag_t *kmd;
+
+	ASSERT(taskq_member(kmem_taskq, curthread));
+
+	mutex_enter(&cp->cache_lock);
+
+	kmd = cp->cache_defrag;
+	if (kmd->kmd_consolidate > 0) {
+		kmd->kmd_consolidate--;
+		mutex_exit(&cp->cache_lock);
+		kmem_cache_reap(cp);
+		return;
+	}
+
+	if (kmem_cache_is_fragmented(cp, &reap)) {
+		size_t slabs_found;
+
+		/*
+		 * Consolidate reclaimable slabs from the end of the partial
+		 * slab list (scan at most kmem_reclaim_scan_range slabs to find
+		 * reclaimable slabs). Keep track of how many candidate slabs we
+		 * looked for and how many we actually found so we can adjust
+		 * the definition of a candidate slab if we're having trouble
+		 * finding them.
+		 *
+		 * kmem_move_buffers() drops and reacquires cache_lock.
+		 */
+		kmd->kmd_scans++;
+		slabs_found = kmem_move_buffers(cp, kmem_reclaim_scan_range,
+		    kmem_reclaim_max_slabs, 0);
+		kmd->kmd_slabs_sought += kmem_reclaim_max_slabs;
+		kmd->kmd_slabs_found += slabs_found;
+
+		if (++kmd->kmd_tries >= kmem_reclaim_scan_range) {
+			kmd->kmd_tries = 0;
+
+			/*
+			 * If we had difficulty finding candidate slabs in
+			 * previous scans, adjust the threshold so that
+			 * candidates are easier to find.
+			 */
+			if (kmd->kmd_slabs_found == kmd->kmd_slabs_sought) {
+				kmem_adjust_reclaim_threshold(kmd, -1);
+			} else if ((kmd->kmd_slabs_found * 2) <
+			    kmd->kmd_slabs_sought) {
+				kmem_adjust_reclaim_threshold(kmd, 1);
+			}
+			kmd->kmd_slabs_sought = 0;
+			kmd->kmd_slabs_found = 0;
+		}
+	} else {
+		kmem_reset_reclaim_threshold(cp->cache_defrag);
+#ifdef	DEBUG
+		if (!avl_is_empty(&cp->cache_partial_slabs)) {
+			/*
+			 * In a debug kernel we want the consolidator to
+			 * run occasionally even when there is plenty of
+			 * memory.
+			 */
+			uint16_t debug_rand;
+
+			/*
+			 * smd: note that this only gets called for the
+			 * dnode cache because only the dnode cache has
+			 * kmem_cache_set_move() applied to it
+			 * brendon says move is voluntary and "tricky"
+			 * the reason this is not called is because the source
+			 * is kmem_cache_update(), that only calls this
+			 * function (kmem_cache_scan())
+			 * if there is a move/defrag (same thing) associated
+			 * with it so hoist some of this code up to to
+			 * kmem_cache_update
+			 */
+
+			(void) random_get_bytes((uint8_t *)&debug_rand, 2);
+			if (!kmem_move_noreap &&
+			    ((debug_rand % kmem_mtb_reap) == 0)) {
+				mutex_exit(&cp->cache_lock);
+				kmem_mtb_reap_count++;
+				return;
+			} else if ((debug_rand % kmem_mtb_move) == 0) {
+				kmd->kmd_scans++;
+				(void) kmem_move_buffers(cp,
+				    kmem_reclaim_scan_range, 1, KMM_DEBUG);
+			}
+		}
+#endif	/* DEBUG */
+	}
+
+	mutex_exit(&cp->cache_lock);
+
+}
+
+// ===============================================================
+// Status
+// ===============================================================
+
+
+size_t
+kmem_size(void)
+{
+	return (total_memory); // smd
+}
+
+// this is used in arc_reclaim_needed.  if 1, reclaim is needed.
+// returning 1 has the effect of throttling ARC, so be careful.
+int
+spl_vm_pool_low(void)
+{
+	bool m = spl_minimal_physmem_p_logic();
+
+	if (m)
+		return (0);
+	else
+		return (1);
+}
+
+// ===============================================================
+// String handling
+// ===============================================================
+
+char *
+kmem_strdup(const char *str)
+{
+	char *buf;
+	int len;
+	len = strlen(str) + 1;
+	buf = kmem_alloc(len, KM_SLEEP);
+	strlcpy(buf, str, len);
+	return (buf);
+}
+
+void
+kmem_strfree(char *str)
+{
+	zfs_kmem_free(str, strlen(str) + 1);
+}
+
+char *
+kvasprintf(const char *fmt, va_list ap)
+{
+	unsigned int len;
+	char *p;
+	va_list aq;
+
+	va_copy(aq, ap);
+	len = vsnprintf(NULL, 0, fmt, aq);
+	va_end(aq);
+	p = zfs_kmem_alloc(len+1, KM_SLEEP);
+	if (!p)
+		return (NULL);
+
+	vsnprintf(p, len+1, fmt, ap);
+
+	return (p);
+}
+
+char *
+kmem_vasprintf(const char *fmt, va_list ap)
+{
+	va_list aq;
+	char *ptr;
+
+	do {
+		va_copy(aq, ap);
+		ptr = kvasprintf(fmt, aq);
+		va_end(aq);
+	} while (ptr == NULL);
+
+	return (ptr);
+}
+
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+	va_list ap;
+	char *ptr;
+
+	do {
+		va_start(ap, fmt);
+		ptr = kvasprintf(fmt, ap);
+		va_end(ap);
+	} while (ptr == NULL);
+
+	return (ptr);
+}
+
+char *
+kmem_strstr(const char *in, const char *str)
+{
+	char c;
+	size_t len;
+
+	c = *str++;
+	if (!c)
+		return ((char *)in);	// Trivial empty string case
+
+	len = strlen(str);
+	do {
+		char sc;
+
+		do {
+			sc = *in++;
+			if (!sc)
+				return ((char *)0);
+		} while (sc != c);
+	} while (strncmp(in, str, len) != 0);
+
+	return ((char *)(in - 1));
+}
+
+
+// suppress timer and related logic for this kmem cache can live here
+// three new per-kmem-cache stats: counters: non-vba-success non-vba-fail;
+// flag: arc_no_grow
+// from zfs/include/sys/spa.h
+
+#define	SPA_MINBLOCKSHIFT 9
+#define	SPA_MAXBLOCKSHIFT 24
+#define	SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
+#define	SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
+
+typedef struct {
+	_Atomic(kmem_cache_t *)cp_metadata;
+	_Atomic(kmem_cache_t *)cp_filedata;
+	uint16_t pointed_to;
+	_Atomic int64_t suppress_count;
+	_Atomic uint64_t last_bumped;
+} ksupp_t;
+
+typedef struct {
+	ksupp_t *ks_entry;
+} iksupp_t;
+
+ksupp_t ksvec[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT] =
+	{ { NULL, NULL, false, 0, 0 } };
+iksupp_t iksvec[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT] =
+	{ { NULL } };
+
+static bool spl_zio_no_grow_inited = false;
+
+/*
+ * Test that cp is in ks->cp_metadata or ks->cp_filedata; if so just return
+ * otherwise, choose the first (and possibly second)  NULL
+ * and try to set it to cp.
+ * If successful, return. otherwise, sanity check that
+ * nobody has set ks->cp_metadata or ks->cp_filedata to cp already, and
+ * that ks->cp_metadata != ks->cp_filedata.
+ */
+
+static void
+ks_set_cp(ksupp_t *ks, kmem_cache_t *cp, const size_t cachenum)
+{
+
+	ASSERT(cp != NULL);
+	ASSERT(ks != NULL);
+
+	if (ks->cp_metadata == cp || ks->cp_filedata == cp)
+		return;
+
+	const uint64_t b = cachenum;
+
+	bool cp_is_metadata = false;
+
+	vmem_t *vmp = cp->cache_arena;
+
+	ASSERT(vmp == zio_metadata_arena || vmp == zio_arena);
+
+	if (vmp == zio_metadata_arena)
+		cp_is_metadata = true;
+
+	if (cp_is_metadata) {
+		for (uint32_t i = 0; ; i++) {
+			if (i >= 1000000) {
+				panic("SPL: %s: iterated out trying to set "
+				    "ks->cp_metadata (%s)\n", __func__,
+				    cp->cache_name);
+			}
+			kmem_cache_t *expected = NULL;
+			if (__c11_atomic_compare_exchange_strong(
+			    &ks->cp_metadata, &expected, cp,
+			    __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+				printf("SPL: %s: set iskvec[%llu].ks->"
+				    "cp_metadata (%s) OK\n", __func__,
+				    b, cp->cache_name);
+				return;
+			} else if (ks->cp_metadata == cp) {
+				return;
+			} else if (ks->cp_metadata == NULL) {
+				continue;
+			} else {
+				panic("%s: CAS failed for iksvec[%llu]."
+				    "ks->cp_metadata: %s wanted %s set\n",
+				    __func__, b, cp->cache_name,
+				    ks->cp_metadata->cache_name);
+			}
+		}
+	} else {
+		for (int32_t j = 0; ; j++) {
+			if (j >= 1000000) {
+				panic("SPL: %s: iterated out trying to set "
+				    "ks->cp_filedata (%s)\n", __func__,
+				    cp->cache_name);
+			}
+			kmem_cache_t *expected = NULL;
+			if (__c11_atomic_compare_exchange_strong(
+			    &ks->cp_filedata, &expected, cp,
+			    __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+				printf("SPL: %s: set iskvec[%llu].ks->"
+				    "cp_filedata (%s) OK\n", __func__,
+				    b, cp->cache_name);
+				return;
+			} else if (ks->cp_filedata == cp) {
+				return;
+			} else if (ks->cp_filedata == NULL) {
+				continue;
+			} else {
+				panic("%s: CAS failed for iksvec[%llu].ks->"
+				    "cp_metadata: %s wanted %s set\n",
+				    __func__, b, cp->cache_name,
+				    ks->cp_filedata->cache_name);
+			}
+		}
+	}
+}
+
+void
+spl_zio_no_grow_init(void)
+{
+	// this is the logic from zio.c:zio_init()
+
+	ASSERT(spl_zio_no_grow_inited == false);
+
+	size_t c = 0;
+
+	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
+		size_t size = (c+1) << SPA_MINBLOCKSHIFT;
+		size_t p2 = size;
+		size_t align = 0;
+
+		while (!ISP2(p2))
+			p2 &= p2 - 1;
+
+		if (size <= 4 * SPA_MINBLOCKSIZE) {
+			align = SPA_MINBLOCKSIZE;
+		} else if (size <= 128 * 1024 && IS_P2ALIGNED(size, p2 >> 4)) {
+			align = MIN(p2 >> 4, PAGESIZE);
+		} else if (IS_P2ALIGNED(size, p2 >> 3)) {
+			align = MIN(p2 >> 3, PAGESIZE);
+		}
+
+		if (align != 0) {
+			iksvec[c].ks_entry = &ksvec[c];
+			iksvec[c].ks_entry->pointed_to++;
+		}
+	}
+
+	while (--c != 0) {
+		ASSERT(iksvec[c].ks_entry != NULL);
+		ASSERT(iksvec[c].ks_entry->pointed_to > 0);
+		if (iksvec[c - 1].ks_entry == NULL) {
+			iksvec[c - 1].ks_entry = iksvec[c].ks_entry;
+			iksvec[c - 1].ks_entry->pointed_to++;
+		}
+	}
+
+	spl_zio_no_grow_inited = true;
+
+	printf("SPL: %s done.\n", __func__);
+}
+
+static void
+spl_zio_no_grow_clear()
+{
+	for (size_t c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
+		ksupp_t *ks = iksvec[c].ks_entry;
+		ks->cp_metadata = NULL;
+		ks->cp_filedata = NULL;
+		ks->pointed_to = false;
+		ks->suppress_count = 0;
+		ks->last_bumped = 0;
+		iksvec[c].ks_entry = NULL;
+	}
+}
+
+void
+spl_zio_no_grow_fini(void)
+{
+	// zio_fini() is at its end, so the kmem_caches are gone,
+	// consequently this is safe.
+	spl_zio_no_grow_inited = false;
+	spl_zio_no_grow_clear();
+	spl_zio_no_grow_init();
+}
+
+static void
+spl_zio_set_no_grow(const size_t size, kmem_cache_t *cp, const size_t cachenum)
+{
+	ASSERT(spl_zio_no_grow_inited == true);
+	ASSERT(iksvec[cachenum].ks_entry != NULL);
+
+	ksupp_t *ks = iksvec[cachenum].ks_entry;
+
+	// maybe update size->cp mapping vector
+
+	ks_set_cp(ks, cp, cachenum);
+
+	if (ks->cp_metadata != cp && ks->cp_filedata != cp) {
+		panic("ks_cp_set bad for %s", cp->cache_name);
+	}
+
+	// suppress the bucket for two allocations (is _Atomic)
+	ks->suppress_count += 2;
+	ks->last_bumped = zfs_lbolt();
+}
+
+bool
+spl_zio_is_suppressed(const size_t size, const uint64_t now,
+    const boolean_t buf_is_metadata, kmem_cache_t **zp)
+{
+
+	ASSERT(spl_zio_no_grow_inited == true);
+
+	const size_t cachenum = (size - 1) >> SPA_MINBLOCKSHIFT;
+
+	VERIFY3U(cachenum, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+	ksupp_t *ks = iksvec[cachenum].ks_entry;
+
+	if (ks == NULL) {
+		return (false);
+	} else if (ks->pointed_to < 1) {
+		ASSERT(ks->pointed_to > 0); // throw an assertion
+		printf("SPL: %s: ERROR: iksvec[%llu].ks_entry->pointed_to "
+		    "== %u for size %llu\n", __func__, (uint64_t)cachenum,
+		    ks->pointed_to, (uint64_t)size);
+		return (false);
+	} else if (ks->suppress_count == 0) {
+		return (false);
+	} else {
+		const uint64_t two_minutes = 120 * hz;
+		if (ks->last_bumped + two_minutes >= now) {
+			ks->suppress_count = 0;
+			ks->last_bumped = now;
+			return (false);
+		} else {
+			ks->suppress_count--;
+		}
+		if (buf_is_metadata) {
+			if (ks->cp_metadata == NULL) {
+				ks_set_cp(ks, zp[cachenum], cachenum);
+				if (ks->cp_metadata != NULL) {
+					atomic_inc_64(
+					    &ks->cp_metadata->arc_no_grow);
+				} else {
+					printf("WARNING: %s: ks_set_cp->"
+					    "metadata == NULL after "
+					    "ks_set_cp !size = %lu\n",
+					    __func__, size);
+				}
+			} else {
+				atomic_inc_64(&ks->cp_metadata->arc_no_grow);
+			}
+		} else {
+			if (ks->cp_filedata == NULL) {
+				ks_set_cp(ks, zp[cachenum], cachenum);
+				if (ks->cp_filedata != NULL) {
+					atomic_inc_64(
+					    &ks->cp_filedata->arc_no_grow);
+				} else {
+					printf("WARNING: %s: "
+					    "ks_set_cp->filedata == NULL "
+					    "after ks_set_cp !"
+					    "size = %lu\n",
+					    __func__, size);
+				}
+			} else {
+				atomic_inc_64(&ks->cp_filedata->arc_no_grow);
+			}
+
+		}
+		return (true);
+	}
+}
+
+
+/*
+ * spl_zio_kmem_cache_alloc(): try to get an allocation without descending
+ * to the bucket layer, and if that fails, set a flag for spl_arc_no_grow()
+ * then perform the allocation normally.
+ */
+
+void *
+spl_zio_kmem_cache_alloc(kmem_cache_t *cp, int kmflag, size_t size,
+    size_t cachenum)
+{
+	// called by:
+	// spl_zio_kmem_cache_alloc(zio_buf_cache[size], kmflag, size, cachenum)
+	// spl_zio_kmem_cache_alloc(zio_data_buf_cache[size], kmflag, size,
+	// cachenum)
+	// those are e.g.
+	// kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPAMINBLOCKSHIFT]
+	// and are indexed as size_t cachenum = (size - 1) >> SPA_MIN~BLOCKSHIFT
+	// VERIFY3U(cachenum, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+	// try to get memory from no lower than the bucket_heap
+	void *m = kmem_cache_alloc(cp, kmflag | KM_NO_VBA | KM_NOSLEEP);
+
+	if (m != NULL) {
+		atomic_inc_64(&cp->no_vba_success);
+		return (m);
+	}
+
+	atomic_inc_64(&cp->no_vba_fail);
+
+	// we will have to go below the bucket_heap to a bucket arena.
+	// if the bucket arena cannot obviously satisfy the allocation,
+	// and xnu is tight for memory, then we turn on the no_grow suppression
+
+	extern vmem_t *spl_vmem_bucket_arena_by_size(size_t);
+	extern uint64_t vmem_xnu_useful_bytes_free(void);
+	extern int vmem_canalloc_atomic(vmem_t *, size_t);
+
+	vmem_t *bvmp = spl_vmem_bucket_arena_by_size(size);
+
+	if (! vmem_canalloc_atomic(bvmp, size) &&
+	    vmem_xnu_useful_bytes_free() < 16ULL*1024ULL*1024ULL) {
+		spl_zio_set_no_grow(size, cp, cachenum);
+		atomic_inc_64(&cp->arc_no_grow_set);
+	}
+
+	// perform the allocation as requested
+	void *n = kmem_cache_alloc(cp, kmflag);
+
+	return (n);
+}
+
+/*
+ * return true if the reclaim thread should be awakened
+ * because we do not have enough memory on hand
+ */
+boolean_t
+spl_arc_reclaim_needed(const size_t bytes, kmem_cache_t **zp)
+{
+
+	/*
+	 * fast path:
+	 * if our argument is 0, then do the equivalent of
+	 * if (arc_available_memory() < 0) return (B_TRUE);
+	 * which is traditional arc.c appraoch
+	 * so we can arc_reclaim_needed() -> spl_arc_reclaim_needed(0)
+	 * if we desire.
+	 */
+	if (bytes == 0 && spl_free < 0) {
+		return (B_TRUE);
+	}
+
+	// copy some code from zio_buf_alloc()
+	size_t c = (bytes - 1) >> SPA_MINBLOCKSHIFT;
+	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+
+	// if there is free memory in the kmem cache slab layer
+	// then we do not have to reclaim
+
+	if (zp[c]->cache_bufslab > 1) {
+		if (spl_free < 0)
+			atomic_inc_64(&spl_arc_reclaim_avoided);
+		return (B_FALSE);
+	}
+
+	extern uint64_t vmem_xnu_useful_bytes_free(void);
+	const uint64_t min_threshold = 64ULL*1024ULL*1024ULL;
+	const uint64_t pm_pct = real_total_memory >> 8;
+	const uint64_t high_threshold = MAX(min_threshold, (uint64_t)pm_pct);
+	const uint64_t low_threshold = bytes;
+
+	const uint64_t f = vmem_xnu_useful_bytes_free();
+
+	if (f <= low_threshold) {
+		return (B_TRUE);
+	} else if (f > high_threshold) {
+		if (spl_free < 0)
+			atomic_inc_64(&spl_arc_reclaim_avoided);
+		return (B_FALSE);
+	}
+
+	if (spl_free < 0) {
+		return (B_TRUE);
+	} else {
+		return (B_FALSE);
+	}
+}
+
+/* small auxiliary function since we do not export struct kmem_cache to zfs */
+size_t
+kmem_cache_bufsize(kmem_cache_t *cp)
+{
+	return (cp->cache_bufsize);
+}
+
+/*
+ * check that we would not have KMERR_BADCACHE error in the event
+ * we did kmem_cache_free(cp, buf) in a DEBUG setting
+ *
+ * returns: NULL if the buf is not found in any cache
+ *          cparg if the buf is found in cparg
+ *          a pointer to the cache the buf is found in, if not cparg
+ */
+
+kmem_cache_t *
+kmem_cache_buf_in_cache(kmem_cache_t *cparg, void *bufarg)
+{
+	kmem_cache_t *cp = cparg;
+	kmem_slab_t *sp;
+	void *buf = bufarg;
+
+	sp = kmem_findslab(cp, buf);
+	if (sp == NULL) {
+		for (cp = list_tail(&kmem_caches); cp != NULL;
+		    cp = list_prev(&kmem_caches, cp)) {
+			if ((sp = kmem_findslab(cp, buf)) != NULL)
+				break;
+		}
+	}
+
+	if (sp == NULL) {
+		printf("SPL: %s: KMERR_BADADDR orig cache = %s\n",
+		    __func__, cparg->cache_name);
+		return (NULL);
+	}
+
+	if (cp == NULL) {
+		printf("SPL: %s: ERROR cp == NULL; cparg == %s",
+		    __func__, cparg->cache_name);
+		return (NULL);
+	}
+
+	if (cp != cparg) {
+		printf("SPL: %s: KMERR_BADCACHE arg cache = %s but found "
+		    "in %s instead\n",
+		    __func__, cparg->cache_name, cp->cache_name);
+		return (cp);
+	}
+
+	ASSERT(cp == cparg);
+
+	return (cp);
+}
diff --git a/module/os/macos/spl/spl-kstat.c b/module/os/macos/spl/spl-kstat.c
new file mode 100644
index 0000000000..11a478c375
--- /dev/null
+++ b/module/os/macos/spl/spl-kstat.c
@@ -0,0 +1,721 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ * Copyright (C) 2014 Brendon Humphrey <brendon.humphrey@mac.com>
+ *
+ */
+
+/*
+ * Provides an implementation of kstat that is backed by OSX sysctls.
+ */
+
+#include <sys/kstat.h>
+#include <sys/debug.h>
+#include <sys/thread.h>
+#include <sys/cmn_err.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+/*
+ * We need to get dynamically allocated memory from the kernel allocator
+ * (Our needs are small, we wont blow the zone_map).
+ */
+extern void *kalloc(vm_size_t size);
+extern void kfree(void *data, vm_size_t size);
+
+/*
+ * Statically declared toplevel OID that all kstats
+ * will hang off.
+ */
+struct sysctl_oid_list sysctl__kstat_children;
+SYSCTL_DECL(_kstat);
+SYSCTL_NODE(, OID_AUTO, kstat, CTLFLAG_RW, 0, "kstat tree");
+
+/*
+ * Sysctl node tree structure.
+ *
+ * These are wired into the OSX sysctl structure
+ * and also stored a list/tree/whatever for easy
+ * location and destruction at shutdown time.
+ */
+typedef struct sysctl_tree_node {
+	char			tn_kstat_name[KSTAT_STRLEN];
+	struct sysctl_oid_list	tn_children;
+	struct sysctl_oid	tn_oid;
+	struct sysctl_tree_node	*tn_next;
+} sysctl_tree_node_t;
+
+/*
+ * Each named kstats consists of one or more named
+ * fields which are implemented as OIDs parented
+ * off the kstat OID.
+ *
+ * To implement the kstat interface, we need to be able
+ * to call the update() function on the kstat to
+ * allow the owner to populate the kstat values from
+ * internal data.
+ *
+ * To do this we need the address of the kstat_named_t
+ * which contains the data value, and the owning kstat_t.
+ *
+ * OIDs allow a single void* user argument, so we will
+ * use a structure that contains both values and
+ * point to that.
+ */
+typedef struct sysctl_leaf {
+	kstat_t			*l_ksp;
+	kstat_named_t		*l_named;
+	struct sysctl_oid	l_oid;		/* kstats are backed w/sysctl */
+	char			l_name[64];	/* Name of the related sysctl */
+	int			l_oid_registered;	/* !0 = registered */
+} sysctl_leaf_t;
+
+/*
+ * Extended kstat structure -- for internal use only.
+ */
+typedef struct ekstat {
+	kstat_t			e_ks;		/* the kstat itself */
+	size_t			e_size;		/* total allocation size */
+	kthread_t		*e_owner;	/* thread holding this kstat */
+	kcondvar_t		e_cv;		/* wait for owner == NULL */
+	/* contains the named values from the kstat */
+	struct sysctl_oid_list	e_children;
+	struct sysctl_oid	e_oid;		/* the kstat is itself an OID */
+	/* array of OIDs that implement the children */
+	sysctl_leaf_t		*e_vals;
+	uint64_t		e_num_vals;	/* size of e_vals array */
+} ekstat_t;
+
+struct sysctl_tree_node		*tree_nodes = 0;
+struct sysctl_oid 		*e_sysctl = 0;
+
+static void
+kstat_set_string(char *dst, const char *src)
+{
+	bzero(dst, KSTAT_STRLEN);
+	(void) strlcpy(dst, src, KSTAT_STRLEN);
+}
+
+static struct sysctl_oid *
+get_oid_with_name(struct sysctl_oid_list *list, char *name)
+{
+	struct sysctl_oid *oidp;
+
+	SLIST_FOREACH(oidp, list, oid_link) {
+		if (strcmp(name, oidp->oid_name) == 0) {
+			return (oidp);
+		}
+	}
+
+	return (0);
+}
+
+static void
+init_oid_tree_node(struct sysctl_oid_list *parent, char *name,
+    sysctl_tree_node_t *node)
+{
+	strlcpy(node->tn_kstat_name, name, KSTAT_STRLEN);
+
+	node->tn_oid.oid_parent = parent;
+	node->tn_oid.oid_link.sle_next = 0;
+	node->tn_oid.oid_number = OID_AUTO;
+	node->tn_oid.oid_arg2 = 0;
+	node->tn_oid.oid_name = &node->tn_kstat_name[0];
+	node->tn_oid.oid_descr = "";
+	node->tn_oid.oid_version = SYSCTL_OID_VERSION;
+	node->tn_oid.oid_refcnt = 0;
+	node->tn_oid.oid_handler = 0;
+	node->tn_oid.oid_kind = CTLTYPE_NODE|CTLFLAG_RW|CTLFLAG_OID2;
+	node->tn_oid.oid_fmt = "N";
+	node->tn_oid.oid_arg1 = (void*)(&node->tn_children);
+
+	sysctl_register_oid(&node->tn_oid);
+
+	node->tn_next = tree_nodes;
+	tree_nodes = node;
+}
+
+static struct sysctl_oid_list *
+get_kstat_parent(struct sysctl_oid_list *root, char *module_name,
+    char *class_name)
+{
+	struct sysctl_oid *the_module = 0;
+	struct sysctl_oid *the_class = 0;
+	sysctl_tree_node_t *new_node = 0;
+	struct sysctl_oid_list *container = root;
+
+	/*
+	 * Locate/create the module
+	 */
+	the_module = get_oid_with_name(root, module_name);
+
+	if (!the_module) {
+		new_node = kalloc(sizeof (sysctl_tree_node_t));
+		bzero(new_node, sizeof (sysctl_tree_node_t));
+		init_oid_tree_node(root, module_name, new_node);
+		the_module = &new_node->tn_oid;
+	}
+
+	/*
+	 * Locate/create the class
+	 */
+	container = the_module->oid_arg1;
+	the_class = get_oid_with_name(container, class_name);
+
+	if (!the_class) {
+		new_node = kalloc(sizeof (sysctl_tree_node_t));
+		bzero(new_node, sizeof (sysctl_tree_node_t));
+		init_oid_tree_node(container, class_name, new_node);
+		the_class = &new_node->tn_oid;
+	}
+
+	container = the_class->oid_arg1;
+	return (container);
+}
+
+static int
+kstat_handle_i64 SYSCTL_HANDLER_ARGS
+{
+    int error = 0;
+	sysctl_leaf_t *params = (sysctl_leaf_t *)(arg1);
+	kstat_named_t *named = params->l_named;
+	kstat_t *ksp  = params->l_ksp;
+	kmutex_t *lock = ksp->ks_lock;
+	int lock_needs_release = 0;
+
+	if (lock && !MUTEX_NOT_HELD(lock)) {
+		mutex_enter(lock);
+		lock_needs_release = 1;
+	}
+
+	if (!error && req->newptr) {
+		/*
+		 * Write request - first read add current values for the kstat
+		 * (remember that is sysctl is likely only one of many
+		 *  values that make up the kstat).
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+
+		/* Copy the new value from user space */
+		(void) copyin(req->newptr, &named->value.i64,
+		    sizeof (named->value.i64));
+
+		/* and invoke the update operation */
+		if (ksp->ks_update) {
+			error = ksp->ks_update(ksp, KSTAT_WRITE);
+		}
+	} else {
+		/*
+		 * Read request
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+		error = SYSCTL_OUT(req, &named->value.i64, sizeof (int64_t));
+	}
+
+	if (lock_needs_release) {
+		mutex_exit(lock);
+	}
+
+	return (error);
+}
+
+static int
+kstat_handle_ui64 SYSCTL_HANDLER_ARGS
+{
+	int error = 0;
+	sysctl_leaf_t *params = (sysctl_leaf_t *)(arg1);
+	kstat_named_t *named = params->l_named;
+	kstat_t *ksp  = params->l_ksp;
+	kmutex_t *lock = ksp->ks_lock;
+	int lock_needs_release = 0;
+
+	if (lock && !MUTEX_NOT_HELD(lock)) {
+		mutex_enter(lock);
+		lock_needs_release = 1;
+	}
+
+	if (!error && req->newptr) {
+		/*
+		 * Write request - first read add current values for the kstat
+		 * (remember that is sysctl is likely only one of many
+		 *  values that make up the kstat).
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+
+		/* Copy the new value from user space */
+		(void) copyin(req->newptr, &named->value.ui64,
+		    sizeof (named->value.ui64));
+
+		/* and invoke the update operation */
+		if (ksp->ks_update) {
+			error = ksp->ks_update(ksp, KSTAT_WRITE);
+		}
+	} else {
+		/*
+		 * Read request
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+		error = SYSCTL_OUT(req, &named->value.ui64, sizeof (uint64_t));
+	}
+
+	if (lock_needs_release) {
+		mutex_exit(lock);
+	}
+
+    return (error);
+}
+
+static int
+kstat_handle_string SYSCTL_HANDLER_ARGS
+{
+	int error = 0;
+	sysctl_leaf_t *params = (sysctl_leaf_t *)(arg1);
+	kstat_named_t *named = params->l_named;
+	kstat_t *ksp  = params->l_ksp;
+	kmutex_t *lock = ksp->ks_lock;
+	int lock_needs_release = 0;
+
+	if (lock && !MUTEX_NOT_HELD(lock)) {
+		mutex_enter(lock);
+		lock_needs_release = 1;
+	}
+	if (!error && req->newptr) {
+		/*
+		 * Write request - first read add current values for the kstat
+		 * (remember that is sysctl is likely only one of many
+		 *  values that make up the kstat).
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+
+		/* Copy the new value from user space */
+		/*
+		 * This should use copyinstr when copying in string from
+		 * userland Fix this before attempting to use type STRING
+		 * with kstat
+		 */
+		named->value.string.addr.ptr = (char *)(req->newptr);
+		named->value.string.len = strlen((char *)(req->newptr))+1;
+
+		/* and invoke the update operation */
+		if (ksp->ks_update) {
+			error = ksp->ks_update(ksp, KSTAT_WRITE);
+		}
+	} else {
+		/*
+		 * Read request
+		 */
+		if (ksp->ks_update) {
+			ksp->ks_update(ksp, KSTAT_READ);
+		}
+		error = SYSCTL_OUT(req, named->value.string.addr.ptr,
+		    named->value.string.len);
+	}
+
+	if (lock_needs_release) {
+		mutex_exit(lock);
+	}
+
+	return (error);
+}
+
+kstat_t *
+kstat_create(char *ks_module, int ks_instance, char *ks_name, char *ks_class,
+    uchar_t ks_type, ulong_t ks_ndata, uchar_t ks_flags)
+{
+	kstat_t *ksp = 0;
+	ekstat_t *e = 0;
+	size_t size = 0;
+
+	/*
+	 * Allocate memory for the new kstat header.
+	 */
+	size = sizeof (ekstat_t);
+	e = (ekstat_t *)kalloc(size);
+	bzero(e, size);
+	if (e == NULL) {
+		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
+		    "insufficient kernel memory",
+		    ks_module, ks_instance, ks_name);
+		return (NULL);
+	}
+	e->e_size = size;
+
+	cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
+
+	/*
+	 * Initialize as many fields as we can.  The caller may reset
+	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
+	 * Creators of virtual kstats may also reset ks_data.  It is
+	 * also up to the caller to initialize the kstat data section,
+	 * if necessary.  All initialization must be complete before
+	 * calling kstat_install().
+	 */
+	ksp = &e->e_ks;
+
+	ksp->ks_crtime		= gethrtime();
+	kstat_set_string(ksp->ks_module, ks_module);
+	ksp->ks_instance	= ks_instance;
+	kstat_set_string(ksp->ks_name, ks_name);
+	ksp->ks_type		= ks_type;
+	kstat_set_string(ksp->ks_class, ks_class);
+	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
+	ksp->ks_ndata		= ks_ndata;
+	ksp->ks_snaptime	= ksp->ks_crtime;
+	ksp->ks_lock		= 0;
+
+	/*
+	 * Initialise the sysctl that represents this kstat
+	 */
+	e->e_children.slh_first = 0;
+
+	e->e_oid.oid_parent = get_kstat_parent(&sysctl__kstat_children,
+	    ksp->ks_module, ksp->ks_class);
+	e->e_oid.oid_link.sle_next = 0;
+	e->e_oid.oid_number = OID_AUTO;
+	e->e_oid.oid_arg2 = 0;
+	e->e_oid.oid_name = ksp->ks_name;
+	e->e_oid.oid_descr = "";
+	e->e_oid.oid_version = SYSCTL_OID_VERSION;
+	e->e_oid.oid_refcnt = 0;
+	e->e_oid.oid_handler = 0;
+	e->e_oid.oid_kind = CTLTYPE_NODE|CTLFLAG_RW|CTLFLAG_OID2;
+	e->e_oid.oid_fmt = "N";
+	e->e_oid.oid_arg1 = (void*)(&e->e_children);
+
+	/* If VIRTUAL we allocate memory to store data */
+	if (ks_flags & KSTAT_FLAG_VIRTUAL)
+		ksp->ks_data    = NULL;
+	else
+		ksp->ks_data    = (void *)kmem_zalloc(
+		    sizeof (kstat_named_t) * ks_ndata, KM_SLEEP);
+
+
+	sysctl_register_oid(&e->e_oid);
+
+	return (ksp);
+}
+
+void
+kstat_install(kstat_t *ksp)
+{
+	ekstat_t *e = (ekstat_t *)ksp;
+	kstat_named_t *named_base = 0;
+	sysctl_leaf_t *vals_base = 0;
+	sysctl_leaf_t *params = 0;
+	int oid_permissions = CTLFLAG_RD;
+
+	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
+
+		if (ksp->ks_flags & KSTAT_FLAG_WRITABLE) {
+			oid_permissions |= CTLFLAG_RW;
+		}
+
+		// Create the leaf node OID objects
+		e->e_vals = (sysctl_leaf_t *)kalloc(ksp->ks_ndata *
+		    sizeof (sysctl_leaf_t));
+		bzero(e->e_vals, ksp->ks_ndata * sizeof (sysctl_leaf_t));
+		e->e_num_vals = ksp->ks_ndata;
+
+		named_base = (kstat_named_t *)(ksp->ks_data);
+		vals_base = e->e_vals;
+
+		for (int i = 0; i < ksp->ks_ndata; i++) {
+			int oid_valid = 1;
+
+			kstat_named_t *named = &named_base[i];
+			sysctl_leaf_t *val = &vals_base[i];
+
+			// Perform basic initialisation of the sysctl.
+			//
+			// The sysctl: kstat.<module>.<class>.<name>.<data name>
+			snprintf(val->l_name, KSTAT_STRLEN, "%s", named->name);
+
+			val->l_oid.oid_parent = &e->e_children;
+			val->l_oid.oid_link.sle_next = 0;
+			val->l_oid.oid_number = OID_AUTO;
+			val->l_oid.oid_arg2 = 0;
+			val->l_oid.oid_name = val->l_name;
+			val->l_oid.oid_descr = "";
+			val->l_oid.oid_version = SYSCTL_OID_VERSION;
+			val->l_oid.oid_refcnt = 0;
+
+			// Based on the kstat type flags, provide location
+			// of data item and associated type and handler
+			// flags to the sysctl.
+			switch (named->data_type) {
+				case KSTAT_DATA_INT64:
+					params = (sysctl_leaf_t *)kalloc(
+					    sizeof (sysctl_leaf_t));
+					params->l_named = named;
+					params->l_ksp = ksp;
+
+					val->l_oid.oid_handler =
+					    kstat_handle_i64;
+					val->l_oid.oid_kind = CTLTYPE_QUAD |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "Q";
+					val->l_oid.oid_arg1 = (void*)params;
+					params = 0;
+					break;
+				case KSTAT_DATA_UINT64:
+					params = (sysctl_leaf_t *)kalloc(
+					    sizeof (sysctl_leaf_t));
+					params->l_named = named;
+					params->l_ksp = ksp;
+
+					val->l_oid.oid_handler =
+					    kstat_handle_ui64;
+					val->l_oid.oid_kind = CTLTYPE_QUAD |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "Q";
+					val->l_oid.oid_arg1 = (void*)params;
+					break;
+				case KSTAT_DATA_INT32:
+					val->l_oid.oid_handler =
+					    sysctl_handle_int;
+					val->l_oid.oid_kind = CTLTYPE_INT |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "I";
+					val->l_oid.oid_arg1 = &named->value.i32;
+					break;
+				case KSTAT_DATA_UINT32:
+					val->l_oid.oid_handler =
+					    sysctl_handle_int;
+					val->l_oid.oid_kind = CTLTYPE_INT |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "IU";
+					val->l_oid.oid_arg1 =
+					    &named->value.ui32;
+					break;
+				case KSTAT_DATA_LONG:
+					val->l_oid.oid_handler =
+					    sysctl_handle_long;
+					val->l_oid.oid_kind = CTLTYPE_INT |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "L";
+					val->l_oid.oid_arg1 = &named->value.l;
+					break;
+				case KSTAT_DATA_ULONG:
+					val->l_oid.oid_handler =
+					    sysctl_handle_long;
+					val->l_oid.oid_kind = CTLTYPE_INT |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "L";
+					val->l_oid.oid_arg1 = &named->value.ul;
+					break;
+				case KSTAT_DATA_STRING:
+					params = (sysctl_leaf_t *)kalloc(
+					    sizeof (sysctl_leaf_t));
+					params->l_named = named;
+					params->l_ksp = ksp;
+					val->l_oid.oid_handler =
+					    kstat_handle_string;
+					val->l_oid.oid_kind = CTLTYPE_STRING |
+					    oid_permissions | CTLFLAG_OID2;
+					val->l_oid.oid_fmt = "S";
+					val->l_oid.oid_arg1 = (void*)params;
+					break;
+
+				case KSTAT_DATA_CHAR:
+				default:
+					oid_valid = 0;
+					break;
+			}
+
+			/*
+			 * Finally publish the OID, provided that there were
+			 * no issues initialising it.
+			 */
+			if (oid_valid) {
+				sysctl_register_oid(&val->l_oid);
+				val->l_oid_registered = 1;
+			} else {
+				val->l_oid_registered = 0;
+			}
+		}
+	}
+
+	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
+}
+
+static void
+remove_child_sysctls(ekstat_t *e)
+{
+	kstat_t *ksp = &e->e_ks;
+	kstat_named_t *named_base = (kstat_named_t *)(ksp->ks_data);
+	sysctl_leaf_t *vals_base = e->e_vals;
+
+	for (int i = 0; i < ksp->ks_ndata; i++) {
+		if (vals_base[i].l_oid_registered) {
+			sysctl_unregister_oid(&vals_base[i].l_oid);
+			vals_base[i].l_oid_registered = 0;
+		}
+
+		if (named_base[i].data_type == KSTAT_DATA_INT64 ||
+		    named_base[i].data_type == KSTAT_DATA_UINT64 ||
+		    named_base[i].data_type == KSTAT_DATA_STRING) {
+
+			sysctl_leaf_t *leaf = (sysctl_leaf_t *)
+			    vals_base[i].l_oid.oid_arg1;
+			kfree(leaf, sizeof (sysctl_leaf_t));
+		}
+	}
+}
+
+void
+kstat_delete(kstat_t *ksp)
+{
+	ekstat_t *e = (ekstat_t *)ksp;
+	kmutex_t *lock = ksp->ks_lock;
+	int lock_needs_release = 0;
+
+	// destroy the sysctl
+	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
+
+		if (lock && MUTEX_NOT_HELD(lock)) {
+			mutex_enter(lock);
+			lock_needs_release = 1;
+		}
+
+		remove_child_sysctls(e);
+
+		if (lock_needs_release) {
+			mutex_exit(lock);
+		}
+	}
+
+	sysctl_unregister_oid(&e->e_oid);
+
+	if (e->e_vals) {
+		kfree(e->e_vals, sizeof (sysctl_leaf_t) * e->e_num_vals);
+	}
+
+	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
+		kmem_free(ksp->ks_data, sizeof (kstat_named_t) * ksp->ks_ndata);
+
+	cv_destroy(&e->e_cv);
+	kfree(e, e->e_size);
+}
+
+void
+kstat_named_setstr(kstat_named_t *knp, const char *src)
+{
+	if (knp->data_type != KSTAT_DATA_STRING)
+		panic("kstat_named_setstr('%p', '%p'): "
+		    "named kstat is not of type KSTAT_DATA_STRING",
+		    (void *)knp, (void *)src);
+
+	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
+	if (src != NULL)
+		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
+	else
+		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
+}
+
+void
+kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
+{
+	kstat_set_string(knp->name, name);
+	knp->data_type = data_type;
+
+	if (data_type == KSTAT_DATA_STRING)
+		kstat_named_setstr(knp, NULL);
+}
+
+
+void
+kstat_waitq_enter(kstat_io_t *kiop)
+{
+}
+
+void
+kstat_waitq_exit(kstat_io_t *kiop)
+{
+}
+
+void
+kstat_runq_enter(kstat_io_t *kiop)
+{
+}
+
+void
+kstat_runq_exit(kstat_io_t *kiop)
+{
+}
+
+void
+__kstat_set_raw_ops(kstat_t *ksp,
+    int (*headers)(char *buf, size_t size),
+    int (*data)(char *buf, size_t size, void *data),
+    void *(*addr)(kstat_t *ksp, loff_t index))
+{
+}
+
+void
+spl_kstat_init()
+{
+	/*
+	 * Create the kstat root OID
+	 */
+	sysctl_register_oid(&sysctl__kstat);
+}
+
+void
+spl_kstat_fini()
+{
+	/*
+	 * Destroy the kstat module/class/name tree
+	 *
+	 * Done in two passes, first unregisters all
+	 * of the oids, second releases all the memory.
+	 */
+
+	sysctl_tree_node_t *iter = tree_nodes;
+	while (iter) {
+		sysctl_tree_node_t *tn = iter;
+		iter = tn->tn_next;
+		sysctl_unregister_oid(&tn->tn_oid);
+	}
+
+	while (tree_nodes) {
+		sysctl_tree_node_t *tn = tree_nodes;
+		tree_nodes = tn->tn_next;
+		kfree(tn, sizeof (sysctl_tree_node_t));
+	}
+
+	/*
+	 * Destroy the root oid
+	 */
+	sysctl_unregister_oid(&sysctl__kstat);
+}
diff --git a/module/os/macos/spl/spl-list.c b/module/os/macos/spl/spl-list.c
new file mode 100644
index 0000000000..ede7a29c42
--- /dev/null
+++ b/module/os/macos/spl/spl-list.c
@@ -0,0 +1,197 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Generic doubly-linked list implementation
+ */
+
+#include <sys/list.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+
+
+#define	list_insert_after_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_prev = node;				\
+	lnew->list_next = node->list_next;		\
+	node->list_next->list_prev = lnew;		\
+	node->list_next = lnew;				\
+}
+
+#define	list_insert_before_node(list, node, object) {	\
+	list_node_t *lnew = list_d2l(list, object);	\
+	lnew->list_next = node;				\
+	lnew->list_prev = node->list_prev;		\
+	node->list_prev->list_next = lnew;		\
+	node->list_prev = lnew;				\
+}
+
+void
+list_create(list_t *list, size_t size, size_t offset)
+{
+	ASSERT(list);
+	ASSERT(size > 0);
+	ASSERT(size >= offset + sizeof (list_node_t));
+
+	list->list_size = size;
+	list->list_offset = offset;
+	list->list_head.list_next = list->list_head.list_prev =
+	    &list->list_head;
+}
+
+void
+list_destroy(list_t *list)
+{
+	list_node_t *node = &list->list_head;
+
+	ASSERT(list);
+	ASSERT(list->list_head.list_next == node);
+	ASSERT(list->list_head.list_prev == node);
+
+	node->list_next = node->list_prev = NULL;
+}
+
+void
+list_insert_after(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_head(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_after_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_before(list_t *list, void *object, void *nobject)
+{
+	if (object == NULL) {
+		list_insert_tail(list, nobject);
+	} else {
+		list_node_t *lold = list_d2l(list, object);
+		list_insert_before_node(list, lold, nobject);
+	}
+}
+
+void
+list_insert_head(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_after_node(list, lold, object);
+}
+
+void
+list_insert_tail(list_t *list, void *object)
+{
+	list_node_t *lold = &list->list_head;
+	list_insert_before_node(list, lold, object);
+}
+
+void
+list_remove(list_t *list, void *object)
+{
+	list_node_t *lold = list_d2l(list, object);
+	ASSERT(!list_empty(list));
+	ASSERT(lold->list_next != NULL);
+	lold->list_prev->list_next = lold->list_next;
+	lold->list_next->list_prev = lold->list_prev;
+	lold->list_next = lold->list_prev = NULL;
+}
+
+
+void *
+list_head(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_next));
+}
+
+void *
+list_tail(list_t *list)
+{
+	if (list_empty(list))
+		return (NULL);
+	return (list_object(list, list->list_head.list_prev));
+}
+
+void *
+list_next(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_next != &list->list_head)
+		return (list_object(list, node->list_next));
+
+	return (NULL);
+}
+
+void *
+list_prev(list_t *list, void *object)
+{
+	list_node_t *node = list_d2l(list, object);
+
+	if (node->list_prev != &list->list_head)
+		return (list_object(list, node->list_prev));
+
+	return (NULL);
+}
+
+/*
+ *  Insert src list after dst list. Empty src list thereafter.
+ */
+void
+list_move_tail(list_t *dst, list_t *src)
+{
+	list_node_t *dstnode = &dst->list_head;
+	list_node_t *srcnode = &src->list_head;
+
+	ASSERT(dst->list_size == src->list_size);
+	ASSERT(dst->list_offset == src->list_offset);
+
+	if (list_empty(src))
+		return;
+
+	dstnode->list_prev->list_next = srcnode->list_next;
+	srcnode->list_next->list_prev = dstnode->list_prev;
+	dstnode->list_prev = srcnode->list_prev;
+	srcnode->list_prev->list_next = dstnode;
+
+	/* empty src list */
+	srcnode->list_next = srcnode->list_prev = srcnode;
+}
+
+int
+list_link_active(list_node_t *link)
+{
+	return (link->list_next != NULL);
+}
+
+int
+list_is_empty(list_t *list)
+{
+	return (list_empty(list));
+}
diff --git a/module/os/macos/spl/spl-mutex.c b/module/os/macos/spl/spl-mutex.c
new file mode 100644
index 0000000000..b7b47f42c4
--- /dev/null
+++ b/module/os/macos/spl/spl-mutex.c
@@ -0,0 +1,415 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013,2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/mutex.h>
+#include <sys/atomic.h>
+#include <mach/mach_types.h>
+#include <mach/kern_return.h>
+#include <kern/thread.h>
+#include <string.h>
+#include <sys/debug.h>
+#include <kern/debug.h>
+#include <sys/thread.h>
+
+// Not defined in headers
+extern boolean_t lck_mtx_try_lock(lck_mtx_t *lck);
+
+
+static lck_attr_t	*zfs_lock_attr = NULL;
+static lck_grp_attr_t	*zfs_group_attr = NULL;
+
+static lck_grp_t *zfs_mutex_group = NULL;
+
+uint64_t zfs_active_mutex = 0;
+
+#ifdef SPL_DEBUG_MUTEX
+#include <sys/list.h>
+static list_t mutex_list;
+static kmutex_t mutex_list_mutex;
+
+
+struct leak {
+	list_node_t	mutex_leak_node;
+
+#define	SPL_DEBUG_MUTEX_MAXCHAR 32
+	char		location_file[SPL_DEBUG_MUTEX_MAXCHAR];
+	char		location_function[SPL_DEBUG_MUTEX_MAXCHAR];
+	uint64_t	location_line;
+	void		*mp;
+
+	uint64_t	wdlist_locktime;	// time lock was taken
+	char		wdlist_file[32];	// storing holder
+	uint64_t	wdlist_line;
+};
+
+static int wdlist_exit = 0;
+
+void
+spl_wdlist_settime(void *mpleak, uint64_t value)
+{
+	struct leak *leak = (struct leak *)mpleak;
+	if (!leak)
+		return;
+	leak->wdlist_locktime = value;
+}
+
+inline static void
+spl_wdlist_check(void *ignored)
+{
+	struct leak *mp;
+	printf("SPL: Mutex watchdog is alive\n");
+
+	while (!wdlist_exit) {
+		delay(hz * SPL_MUTEX_WATCHDOG_SLEEP);
+		uint64_t noe = gethrestime_sec();
+		lck_mtx_lock((lck_mtx_t *)&mutex_list_mutex.m_lock);
+		for (mp = list_head(&mutex_list);
+		    mp;
+		    mp = list_next(&mutex_list, mp)) {
+			uint64_t locktime = mp->wdlist_locktime;
+			if ((locktime > 0) && (noe > locktime) &&
+			    noe - locktime >= SPL_MUTEX_WATCHDOG_TIMEOUT) {
+				printf("SPL: mutex (%p) held for %llus by "
+				    "'%s':%llu\n", mp, noe -
+				    mp->wdlist_locktime, mp->wdlist_file,
+				    mp->wdlist_line);
+			} // if old
+		} // for all
+		lck_mtx_unlock((lck_mtx_t *)&mutex_list_mutex.m_lock);
+	} // while not exit
+
+	printf("SPL: watchdog thread exit\n");
+	wdlist_exit = 2;
+	thread_exit();
+}
+#endif
+
+
+int
+spl_mutex_subsystem_init(void)
+{
+	zfs_lock_attr = lck_attr_alloc_init();
+	zfs_group_attr = lck_grp_attr_alloc_init();
+	zfs_mutex_group  = lck_grp_alloc_init("zfs-mutex", zfs_group_attr);
+
+#ifdef SPL_DEBUG_MUTEX
+	{
+		unsigned char mutex[128];
+		int i;
+
+		memset(mutex, 0xAF, sizeof (mutex));
+		lck_mtx_init((lck_mtx_t *)&mutex[0], zfs_mutex_group,
+		    zfs_lock_attr);
+		for (i = sizeof (mutex) -1; i >= 0; i--)
+			if (mutex[i] != 0xAF)
+				break;
+
+		printf("SPL: mutex size is %u\n", i+1);
+
+	}
+
+	list_create(&mutex_list, sizeof (struct leak),
+	    offsetof(struct leak, mutex_leak_node));
+	lck_mtx_init((lck_mtx_t *)&mutex_list_mutex.m_lock, zfs_mutex_group,
+	    zfs_lock_attr);
+	mutex_list_mutex.m_initialised = MUTEX_INIT;
+
+	(void) thread_create(NULL, 0, spl_wdlist_check, 0, 0, 0, 0, 92);
+#endif
+	return (0);
+}
+
+
+
+void
+spl_mutex_subsystem_fini(void)
+{
+#ifdef SPL_DEBUG_MUTEX
+	uint64_t total = 0;
+	printf("Dumping leaked mutex allocations...\n");
+
+	wdlist_exit = 1;
+
+	mutex_enter(&mutex_list_mutex);
+	while (1) {
+		struct leak *leak, *runner;
+		uint32_t found;
+
+		leak = list_head(&mutex_list);
+
+		if (leak) {
+			list_remove(&mutex_list, leak);
+		}
+		if (!leak)
+			break;
+
+		// Run through list and count up how many times this leak is
+		// found, removing entries as we go.
+		for (found = 1, runner = list_head(&mutex_list);
+		    runner;
+		    runner = runner ? list_next(&mutex_list, runner) :
+		    list_head(&mutex_list)) {
+
+			if (strcmp(leak->location_file, runner->location_file)
+			    == 0 && strcmp(leak->location_function,
+			    runner->location_function) == 0 &&
+			    leak->location_line == runner->location_line) {
+				// Same place
+				found++;
+				list_remove(&mutex_list, runner);
+				FREE(runner, M_TEMP);
+				runner = NULL;
+			} // if same
+
+		} // for all nodes
+
+		printf("  mutex %p : %s %s %llu : # leaks: %u\n",
+		    leak->mp,
+		    leak->location_file,
+		    leak->location_function,
+		    leak->location_line,
+		    found);
+
+		FREE(leak, M_TEMP);
+		total += found;
+
+	}
+	mutex_exit(&mutex_list_mutex);
+	printf("Dumped %llu leaked allocations. Wait for watchdog "
+	    "to exit..\n", total);
+
+	while (wdlist_exit != 2)
+		delay(hz>>4);
+
+	lck_mtx_destroy((lck_mtx_t *)&mutex_list_mutex.m_lock, zfs_mutex_group);
+	list_destroy(&mutex_list);
+#endif
+
+	lck_attr_free(zfs_lock_attr);
+	zfs_lock_attr = NULL;
+
+	lck_grp_attr_free(zfs_group_attr);
+	zfs_group_attr = NULL;
+
+	lck_grp_free(zfs_mutex_group);
+	zfs_mutex_group = NULL;
+}
+
+
+
+#ifdef SPL_DEBUG_MUTEX
+void
+spl_mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc,
+    const char *file, const char *fn, int line)
+#else
+void
+spl_mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
+#endif
+{
+	ASSERT(type != MUTEX_SPIN);
+	ASSERT(ibc == NULL);
+
+#ifdef SPL_DEBUG_MUTEX
+	VERIFY3U(mp->m_initialised, !=, MUTEX_INIT);
+#endif
+
+	lck_mtx_init((lck_mtx_t *)&mp->m_lock, zfs_mutex_group, zfs_lock_attr);
+    mp->m_owner = NULL;
+
+	atomic_inc_64(&zfs_active_mutex);
+
+#ifdef SPL_DEBUG_MUTEX
+	mp->m_initialised = MUTEX_INIT;
+
+	struct leak *leak;
+
+	MALLOC(leak, struct leak *,
+	    sizeof (struct leak),  M_TEMP, M_WAITOK);
+
+	if (leak) {
+		bzero(leak, sizeof (struct leak));
+		strlcpy(leak->location_file, file, SPL_DEBUG_MUTEX_MAXCHAR);
+		strlcpy(leak->location_function, fn, SPL_DEBUG_MUTEX_MAXCHAR);
+		leak->location_line = line;
+		leak->mp = mp;
+
+		mutex_enter(&mutex_list_mutex);
+		list_link_init(&leak->mutex_leak_node);
+		list_insert_tail(&mutex_list, leak);
+		mp->leak = leak;
+		mutex_exit(&mutex_list_mutex);
+	}
+	leak->wdlist_locktime = 0;
+	leak->wdlist_file[0] = 0;
+	leak->wdlist_line = 0;
+#endif
+}
+
+void
+spl_mutex_destroy(kmutex_t *mp)
+{
+	if (!mp)
+		return;
+
+#ifdef SPL_DEBUG_MUTEX
+	VERIFY3U(mp->m_initialised, ==, MUTEX_INIT);
+#endif
+
+	if (mp->m_owner != 0)
+		panic("SPL: releasing held mutex");
+
+	lck_mtx_destroy((lck_mtx_t *)&mp->m_lock, zfs_mutex_group);
+
+	atomic_dec_64(&zfs_active_mutex);
+
+#ifdef SPL_DEBUG_MUTEX
+	mp->m_initialised = MUTEX_DESTROYED;
+
+	if (mp->leak) {
+		struct leak *leak = (struct leak *)mp->leak;
+		mutex_enter(&mutex_list_mutex);
+		list_remove(&mutex_list, leak);
+		mp->leak = NULL;
+		mutex_exit(&mutex_list_mutex);
+		FREE(leak, M_TEMP);
+	}
+#endif
+}
+
+
+
+#ifdef SPL_DEBUG_MUTEX
+void
+spl_mutex_enter(kmutex_t *mp, char *file, int line)
+#else
+void
+spl_mutex_enter(kmutex_t *mp)
+#endif
+{
+#ifdef SPL_DEBUG_MUTEX
+	VERIFY3U(mp->m_initialised, ==, MUTEX_INIT);
+#endif
+
+    if (mp->m_owner == current_thread())
+		panic("mutex_enter: locking against myself!");
+
+#ifdef DEBUG
+	if (*((uint64_t *)mp) == 0xdeadbeefdeadbeef) {
+		panic("SPL: mutex_enter");
+	}
+#endif
+
+    lck_mtx_lock((lck_mtx_t *)&mp->m_lock);
+    mp->m_owner = current_thread();
+
+#ifdef SPL_DEBUG_MUTEX
+	if (mp->leak) {
+		struct leak *leak = (struct leak *)mp->leak;
+		leak->wdlist_locktime = gethrestime_sec();
+		strlcpy(leak->wdlist_file, file, sizeof (leak->wdlist_file));
+		leak->wdlist_line = line;
+	}
+#endif
+
+}
+
+void
+spl_mutex_exit(kmutex_t *mp)
+{
+#ifdef DEBUG
+	if (*((uint64_t *)mp) == 0xdeadbeefdeadbeef) {
+		panic("SPL: mutex_exit");
+	}
+#endif
+
+#ifdef SPL_DEBUG_MUTEX
+	VERIFY3U(mp->m_initialised, ==, MUTEX_INIT);
+#endif
+
+#ifdef SPL_DEBUG_MUTEX
+	if (mp->leak) {
+		struct leak *leak = (struct leak *)mp->leak;
+		uint64_t locktime = leak->wdlist_locktime;
+		uint64_t noe = gethrestime_sec();
+		if ((locktime > 0) && (noe > locktime) &&
+		    noe - locktime >= SPL_MUTEX_WATCHDOG_TIMEOUT) {
+			printf("SPL: mutex (%p) finally released after %llus "
+			    "by '%s':%llu\n", leak, noe - leak->wdlist_locktime,
+			    leak->wdlist_file, leak->wdlist_line);
+		}
+		leak->wdlist_locktime = 0;
+		leak->wdlist_file[0] = 0;
+		leak->wdlist_line = 0;
+	}
+#endif
+	mp->m_owner = NULL;
+	lck_mtx_unlock((lck_mtx_t *)&mp->m_lock);
+}
+
+
+int
+spl_mutex_tryenter(kmutex_t *mp)
+{
+	int held;
+
+#ifdef SPL_DEBUG_MUTEX
+	VERIFY3U(mp->m_initialised, ==, MUTEX_INIT);
+#endif
+
+	if (mp->m_owner == current_thread())
+		panic("mutex_tryenter: locking against myself!");
+
+	held = lck_mtx_try_lock((lck_mtx_t *)&mp->m_lock);
+	if (held) {
+		mp->m_owner = current_thread();
+
+#ifdef SPL_DEBUG_MUTEX
+	if (mp->leak) {
+		struct leak *leak = (struct leak *)mp->leak;
+		leak->wdlist_locktime = gethrestime_sec();
+		strlcpy(leak->wdlist_file, "tryenter",
+		    sizeof (leak->wdlist_file));
+		leak->wdlist_line = 123;
+	}
+#endif
+
+	}
+	return (held);
+}
+
+int
+spl_mutex_owned(kmutex_t *mp)
+{
+	return (mp->m_owner == current_thread());
+}
+
+struct kthread *
+spl_mutex_owner(kmutex_t *mp)
+{
+	return (mp->m_owner);
+}
diff --git a/module/os/macos/spl/spl-osx.c b/module/os/macos/spl/spl-osx.c
new file mode 100644
index 0000000000..b65cb66e4d
--- /dev/null
+++ b/module/os/macos/spl/spl-osx.c
@@ -0,0 +1,488 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <mach/mach_types.h>
+#include <libkern/libkern.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/utsname.h>
+#include <sys/ioctl.h>
+#include <sys/taskq.h>
+#include <kern/processor.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#define	_task_user_
+#include <IOKit/IOLib.h>
+
+#include <zfs_config.h>
+#include <sys/systeminfo.h>
+
+extern int system_inshutdown;
+
+static utsname_t utsname_static = { { 0 } };
+
+unsigned int max_ncpus = 0;
+uint64_t  total_memory = 0;
+uint64_t  real_total_memory = 0;
+
+// Size in bytes of the memory allocated in seg_kmem
+extern uint64_t		segkmem_total_mem_allocated;
+
+extern char hostname[MAXHOSTNAMELEN];
+
+utsname_t *
+utsname(void)
+{
+	return (&utsname_static);
+}
+
+/*
+ * Solaris delay is in ticks (hz) and Darwin uses microsecs
+ * 1 HZ is 10 milliseconds
+ */
+void
+osx_delay(int ticks)
+{
+	if (ticks < 2) {
+		// IODelay spins and takes microseconds as an argument
+		// don't spend more than 10msec spinning.
+		IODelay(ticks * 10000);
+		return;
+	}
+
+	// ticks are 10 msec units
+	int64_t ticks_to_go = (int64_t)ticks * 10LL;
+	// zfs_lbolt() is in 10 mec units
+	int64_t start_tick = (int64_t)zfs_lbolt();
+	int64_t end_tick = start_tick + (int64_t)ticks_to_go;
+
+	do {
+		IOSleep(ticks_to_go);
+		int64_t cur_tick = (int64_t)zfs_lbolt();
+		ticks_to_go = (end_tick - cur_tick);
+	} while (ticks_to_go > 0);
+
+}
+
+
+uint32_t
+zone_get_hostid(void *zone)
+{
+	size_t len;
+	uint32_t myhostid = 0;
+
+	len = sizeof (myhostid);
+	sysctlbyname("kern.hostid", &myhostid, &len, NULL, 0);
+	return (myhostid);
+}
+
+extern void *(*__ihook_malloc)(size_t size);
+extern void (*__ihook_free)(void *);
+
+const char *
+spl_panicstr(void)
+{
+	return (NULL);
+}
+
+int
+spl_system_inshutdown(void)
+{
+	return (system_inshutdown);
+}
+
+#include <mach-o/loader.h>
+typedef struct mach_header_64	kernel_mach_header_t;
+#include <mach-o/nlist.h>
+typedef struct nlist_64			kernel_nlist_t;
+
+typedef struct segment_command_64 kernel_segment_command_t;
+
+typedef struct _loaded_kext_summary {
+	char		name[KMOD_MAX_NAME];
+	uuid_t		uuid;
+	uint64_t	address;
+	uint64_t	size;
+	uint64_t	version;
+	uint32_t	loadTag;
+	uint32_t	flags;
+	uint64_t	reference_list;
+} OSKextLoadedKextSummary;
+
+typedef struct _loaded_kext_summary_header {
+    uint32_t version;
+    uint32_t entry_size;
+    uint32_t numSummaries;
+    uint32_t reserved; /* explicit alignment for gdb  */
+    OSKextLoadedKextSummary summaries[0];
+} OSKextLoadedKextSummaryHeader;
+
+extern OSKextLoadedKextSummaryHeader * gLoadedKextSummaries;
+
+typedef struct _cframe_t {
+	struct _cframe_t	*prev;
+	uintptr_t			caller;
+#if PRINT_ARGS_FROM_STACK_FRAME
+	unsigned			args[0];
+#endif
+} cframe_t;
+
+extern kernel_mach_header_t _mh_execute_header;
+
+extern kmod_info_t *kmod; /* the list of modules */
+
+extern addr64_t  kvtophys(vm_offset_t va);
+
+static int
+panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search,
+    const char *module_name)
+{
+	kernel_nlist_t			*sym = NULL;
+	struct load_command		*cmd;
+	kernel_segment_command_t	*orig_ts = NULL, *orig_le = NULL;
+	struct symtab_command		*orig_st = NULL;
+	unsigned int			i;
+	char				*strings, *bestsym = NULL;
+	vm_address_t			bestaddr = 0, diff, curdiff;
+
+	/*
+	 * Assume that if it's loaded and linked into the kernel,
+	 * it's a valid Mach-O
+	 */
+	cmd = (struct load_command *)&mh[1];
+	for (i = 0; i < mh->ncmds; i++) {
+		if (cmd->cmd == LC_SEGMENT_64) {
+			kernel_segment_command_t *orig_sg =
+			    (kernel_segment_command_t *)cmd;
+
+			if (strncmp(SEG_TEXT, orig_sg->segname,
+			    sizeof (orig_sg->segname)) == 0)
+				orig_ts = orig_sg;
+			else if (strncmp(SEG_LINKEDIT, orig_sg->segname,
+			    sizeof (orig_sg->segname)) == 0)
+				orig_le = orig_sg;
+			/* pre-Lion i386 kexts have a single unnamed segment */
+			else if (strncmp("", orig_sg->segname,
+			    sizeof (orig_sg->segname)) == 0)
+				orig_ts = orig_sg;
+		} else if (cmd->cmd == LC_SYMTAB)
+			orig_st = (struct symtab_command *)cmd;
+
+		cmd = (struct load_command *)((uintptr_t)cmd + cmd->cmdsize);
+	}
+
+	if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
+		return (0);
+
+	if ((search < orig_ts->vmaddr) ||
+	    (search >= orig_ts->vmaddr + orig_ts->vmsize)) {
+		/* search out of range for this mach header */
+		return (0);
+	}
+
+	sym = (kernel_nlist_t *)(uintptr_t)(orig_le->vmaddr +
+	    orig_st->symoff - orig_le->fileoff);
+	strings = (char *)(uintptr_t)(orig_le->vmaddr +
+	    orig_st->stroff - orig_le->fileoff);
+	diff = search;
+
+	for (i = 0; i < orig_st->nsyms; i++) {
+		if (sym[i].n_type & N_STAB) continue;
+
+		if (sym[i].n_value <= search) {
+			curdiff = search - (vm_address_t)sym[i].n_value;
+			if (curdiff < diff) {
+				diff = curdiff;
+				bestaddr = sym[i].n_value;
+				bestsym = strings + sym[i].n_un.n_strx;
+			}
+		}
+	}
+
+	if (bestsym != NULL) {
+		if (diff != 0) {
+			printf("%s : %s + 0x%lx", module_name, bestsym,
+			    (unsigned long)diff);
+		} else {
+			printf("%s : %s", module_name, bestsym);
+		}
+		return (1);
+	}
+	return (0);
+}
+
+
+static void
+panic_print_kmod_symbol_name(vm_address_t search)
+{
+	uint_t i;
+
+	if (gLoadedKextSummaries == NULL)
+		return;
+	for (i = 0; i < gLoadedKextSummaries->numSummaries; ++i) {
+		OSKextLoadedKextSummary *summary =
+		    gLoadedKextSummaries->summaries + i;
+
+		if ((search >= summary->address) &&
+		    (search < (summary->address + summary->size))) {
+			kernel_mach_header_t *header =
+			    (kernel_mach_header_t *)(uintptr_t)summary->address;
+			if (panic_print_macho_symbol_name(header, search,
+			    summary->name) == 0) {
+				printf("%s + %llu", summary->name,
+				    (unsigned long)search - summary->address);
+			}
+			break;
+		}
+	}
+}
+
+
+static void
+panic_print_symbol_name(vm_address_t search)
+{
+	/* try searching in the kernel */
+	if (panic_print_macho_symbol_name(&_mh_execute_header,
+	    search, "mach_kernel") == 0) {
+		/* that failed, now try to search for the right kext */
+		panic_print_kmod_symbol_name(search);
+	}
+}
+
+
+void
+spl_backtrace(char *thesignal)
+{
+	void *stackptr;
+
+	printf("SPL: backtrace \"%s\"\n", thesignal);
+
+#if defined(__i386__)
+	__asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
+#elif defined(__x86_64__)
+	__asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
+#endif
+
+	int frame_index;
+	int nframes = 16;
+	cframe_t *frame = (cframe_t *)stackptr;
+
+	for (frame_index = 0; frame_index < nframes; frame_index++) {
+		vm_offset_t curframep = (vm_offset_t)frame;
+		if (!curframep)
+			break;
+		if (curframep & 0x3) {
+			printf("SPL: Unaligned frame\n");
+			break;
+		}
+		if (!kvtophys(curframep) ||
+		    !kvtophys(curframep + sizeof (cframe_t) - 1)) {
+			printf("SPL: No mapping exists for frame pointer\n");
+			break;
+		}
+		printf("SPL: %p : 0x%lx ", frame, frame->caller);
+		panic_print_symbol_name((vm_address_t)frame->caller);
+		printf("\n");
+		frame = frame->prev;
+	}
+}
+
+int
+getpcstack(uintptr_t *pcstack, int pcstack_limit)
+{
+	int  depth = 0;
+	void *stackptr;
+
+#if defined(__i386__)
+	__asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
+#elif defined(__x86_64__)
+	__asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
+#endif
+
+	int frame_index;
+	int nframes = pcstack_limit;
+	cframe_t *frame = (cframe_t *)stackptr;
+
+	for (frame_index = 0; frame_index < nframes; frame_index++) {
+		vm_offset_t curframep = (vm_offset_t)frame;
+		if (!curframep)
+			break;
+		if (curframep & 0x3) {
+			break;
+		}
+		if (!kvtophys(curframep) ||
+		    !kvtophys(curframep + sizeof (cframe_t) - 1)) {
+			break;
+		}
+		pcstack[depth++] = frame->caller;
+		frame = frame->prev;
+	}
+
+	return (depth);
+}
+
+void
+print_symbol(uintptr_t symbol)
+{
+	printf("SPL: ");
+	panic_print_symbol_name((vm_address_t)(symbol));
+	printf("\n");
+}
+
+int
+ddi_copyin(const void *from, void *to, size_t len, int flags)
+{
+	int ret = 0;
+
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL)
+		bcopy(from, to, len);
+	else
+		ret = copyin((user_addr_t)from, (void *)to, len);
+
+	return (ret);
+}
+
+int
+ddi_copyout(const void *from, void *to, size_t len, int flags)
+{
+	int ret = 0;
+
+	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
+	if (flags & FKIOCTL) {
+		bcopy(from, to, len);
+	} else {
+		ret = copyout(from, (user_addr_t)to, len);
+	}
+
+	return (ret);
+}
+
+/*
+ * Technically, this call does not exist in illumos, but we use it for
+ * consistency.
+ */
+int
+ddi_copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
+{
+	int ret;
+	size_t local_done;
+
+#undef copyinstr
+	ret = copyinstr((user_addr_t)uaddr, kaddr, len, &local_done);
+	if (done != NULL)
+		*done = local_done;
+	return (ret);
+}
+
+kern_return_t
+spl_start(kmod_info_t *ki, void *d)
+{
+	printf("SPL: loading\n");
+
+	int ncpus;
+	size_t len = sizeof (ncpus);
+
+	/*
+	 * Boot load time is excessively early, so we have to wait
+	 * until certain subsystems are available. Surely there is
+	 * a more elegant way to do this wait?
+	 */
+
+	while (current_proc() == NULL) {
+		printf("SPL: waiting for kernel init...\n");
+		delay(hz>>1);
+	}
+
+	while (1) {
+		len = sizeof (total_memory);
+		sysctlbyname("hw.memsize", &total_memory, &len, NULL, 0);
+		if (total_memory != 0) break;
+
+		printf("SPL: waiting for sysctl...\n");
+		delay(hz>>1);
+	}
+
+	sysctlbyname("hw.logicalcpu_max", &max_ncpus, &len, NULL, 0);
+	if (!max_ncpus) max_ncpus = 1;
+
+	/*
+	 * Setting the total memory to physmem * 50% here, since kmem is
+	 * not in charge of all memory and we need to leave some room for
+	 * the OS X allocator. We internally add pressure if we step over it
+	 */
+	real_total_memory = total_memory;
+	total_memory = total_memory * 50ULL / 100ULL;
+	physmem = total_memory / PAGE_SIZE;
+
+	len = sizeof (utsname_static.sysname);
+	sysctlbyname("kern.ostype", &utsname_static.sysname, &len, NULL, 0);
+
+	/*
+	 * For some reason, (CTLFLAG_KERN is not set) looking up hostname
+	 * returns 1. So we set it to uuid just to give it *something*.
+	 * As it happens, ZFS sets the nodename on init.
+	 */
+	len = sizeof (utsname_static.nodename);
+	sysctlbyname("kern.uuid", &utsname_static.nodename, &len, NULL, 0);
+
+	len = sizeof (utsname_static.release);
+	sysctlbyname("kern.osrelease", &utsname_static.release, &len, NULL, 0);
+
+	len = sizeof (utsname_static.version);
+	sysctlbyname("kern.version", &utsname_static.version, &len, NULL, 0);
+
+	strlcpy(utsname_static.nodename, hostname,
+	    sizeof (utsname_static.nodename));
+
+	spl_mutex_subsystem_init();
+	spl_kmem_init(total_memory);
+	spl_vnode_init();
+	spl_kmem_thread_init();
+	spl_kmem_mp_init();
+
+	return (KERN_SUCCESS);
+}
+
+kern_return_t
+spl_stop(kmod_info_t *ki, void *d)
+{
+	spl_kmem_thread_fini();
+	spl_vnode_fini();
+	spl_taskq_fini();
+	spl_rwlock_fini();
+	spl_tsd_fini();
+	spl_kmem_fini();
+	spl_kstat_fini();
+	spl_mutex_subsystem_fini();
+
+	return (KERN_SUCCESS);
+}
diff --git a/module/os/macos/spl/spl-policy.c b/module/os/macos/spl/spl-policy.c
new file mode 100644
index 0000000000..1b63dd9b57
--- /dev/null
+++ b/module/os/macos/spl/spl-policy.c
@@ -0,0 +1,178 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#include <sys/cred.h>
+#include <sys/policy.h>
+#include <sys/priv.h>
+
+int
+spl_priv_check_cred(kauth_cred_t cred, int priv, __unused int flags)
+{
+	int error;
+
+	if (kauth_cred_getuid(cred) == 0) {
+		error = 0;
+		goto out;
+	}
+
+	/*
+	 * The default is deny, so if no policies have granted it, reject
+	 * with a privilege error here.
+	 */
+	error = EPERM;
+out:
+	return (error);
+}
+
+int
+secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_VFS_UNMOUNT, 0));
+}
+
+int
+secpolicy_nfs(const cred_t *cr)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_NFS_DAEMON, 0));
+}
+
+int
+secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_ZFS_POOL_CONFIG, 0));
+}
+
+int
+secpolicy_zfs(const cred_t *cr)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_VFS_MOUNT, 0));
+}
+
+int
+secpolicy_zinject(const cred_t *cr)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_ZFS_INJECT, 0));
+}
+
+int
+secpolicy_vnode_any_access(const cred_t *cr, vnode_t *vp, uid_t owner)
+{
+	// FIXME
+	return (0);
+}
+
+int
+secpolicy_vnode_access2(const cred_t *cr, vnode_t *vp, uid_t owner,
+    mode_t curmode, mode_t wantmode)
+{
+	// FIXME
+	return (0);
+}
+
+int
+secpolicy_vnode_setattr(cred_t *cr, struct vnode *vp, vattr_t *vap,
+    const vattr_t *ovap, int flags,
+    int unlocked_access(void *, int, cred_t *),
+    void *node)
+{
+	// FIXME
+	return (0);
+}
+
+int
+secpolicy_vnode_stky_modify(const cred_t *cred)
+{
+	return (EPERM);
+}
+
+int
+secpolicy_setid_setsticky_clear(vnode_t *vp, vattr_t *vap, const vattr_t *ovap,
+    cred_t *cr)
+{
+	// FIXME
+	return (0);
+}
+
+int
+secpolicy_vnode_remove(struct vnode *vp, const cred_t *cr)
+{
+	return (0);
+}
+
+int
+secpolicy_vnode_create_gid(const cred_t *cred)
+{
+	return (0);
+}
+
+int
+secpolicy_vnode_setids_setgids(struct vnode *vp, const cred_t *cr,
+    gid_t gid)
+{
+	return (0);
+}
+
+int
+secpolicy_vnode_setdac(struct vnode *vp, const cred_t *cr, uid_t u)
+{
+	return (0);
+}
+
+int
+secpolicy_vnode_chown(struct vnode *vp, const cred_t *cr, uid_t u)
+{
+	return (0);
+}
+
+int
+secpolicy_vnode_setid_retain(const cred_t *cr, int fal)
+{
+	return (0);
+}
+
+int
+secpolicy_xvattr(vattr_t *vap, uid_t uid, const cred_t *cr, mode_t mod)
+{
+	return (0);
+}
+
+int
+secpolicy_setid_clear(vattr_t *vap, const cred_t *cr)
+{
+	return (0);
+}
+
+int
+secpolicy_basic_link(const cred_t *cr)
+{
+	return (0);
+}
+
+int
+secpolicy_fs_mount_clearopts(const cred_t *cr, struct mount *mp)
+{
+	return (0);
+}
+
+int
+secpolicy_fs_mount(const cred_t *cr, struct vnode *vp, struct mount *mp)
+{
+	return (spl_priv_check_cred((kauth_cred_t)cr, PRIV_VFS_MOUNT, 0));
+}
diff --git a/module/os/macos/spl/spl-proc.c b/module/os/macos/spl/spl-proc.c
new file mode 100644
index 0000000000..9c90559f0f
--- /dev/null
+++ b/module/os/macos/spl/spl-proc.c
@@ -0,0 +1,30 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/systeminfo.h>
+#include <sys/kstat.h>
+#include <sys/debug.h>
+
+struct proc {
+	void *nothing;
+};
+
+struct proc p0 = {0};
diff --git a/module/os/macos/spl/spl-proc_list.c b/module/os/macos/spl/spl-proc_list.c
new file mode 100644
index 0000000000..4228f31339
--- /dev/null
+++ b/module/os/macos/spl/spl-proc_list.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/list.h>
+#include <sys/mutex.h>
+#include <sys/procfs_list.h>
+
+void
+seq_printf(struct seq_file *m, const char *fmt, ...)
+{
+}
+
+void
+procfs_list_install(const char *module,
+    const char *name,
+    mode_t mode,
+    procfs_list_t *procfs_list,
+    int (*show)(struct seq_file *f, void *p),
+    int (*show_header)(struct seq_file *f),
+    int (*clear)(procfs_list_t *procfs_list),
+    size_t procfs_list_node_off)
+{
+	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&procfs_list->pl_list,
+	    procfs_list_node_off + sizeof (procfs_list_node_t),
+	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
+	procfs_list->pl_next_id = 1;
+	procfs_list->pl_node_offset = procfs_list_node_off;
+}
+
+void
+procfs_list_uninstall(procfs_list_t *procfs_list)
+{
+}
+
+void
+procfs_list_destroy(procfs_list_t *procfs_list)
+{
+	ASSERT(list_is_empty(&procfs_list->pl_list));
+	list_destroy(&procfs_list->pl_list);
+	mutex_destroy(&procfs_list->pl_lock);
+}
+
+#define	NODE_ID(procfs_list, obj)			\
+	(((procfs_list_node_t *)(((char *)obj) +	\
+	(procfs_list)->pl_node_offset))->pln_id)
+
+void
+procfs_list_add(procfs_list_t *procfs_list, void *p)
+{
+	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
+	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
+	list_insert_tail(&procfs_list->pl_list, p);
+}
diff --git a/module/os/macos/spl/spl-processor.c b/module/os/macos/spl/spl-processor.c
new file mode 100644
index 0000000000..6587aa7aee
--- /dev/null
+++ b/module/os/macos/spl/spl-processor.c
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/processor.h>
+#include <i386/cpuid.h>
+
+extern int cpu_number(void);
+
+uint32_t
+getcpuid()
+{
+	return ((uint32_t)cpu_number());
+}
+
+uint64_t
+spl_cpuid_features(void)
+{
+	i386_cpu_info_t *info;
+
+	info = cpuid_info();
+	return (info->cpuid_features);
+}
+
+uint64_t
+spl_cpuid_leaf7_features(void)
+{
+	i386_cpu_info_t *info;
+
+	info = cpuid_info();
+	return (info->cpuid_leaf7_features);
+}
diff --git a/module/os/macos/spl/spl-rwlock.c b/module/os/macos/spl/spl-rwlock.c
new file mode 100644
index 0000000000..22fc260080
--- /dev/null
+++ b/module/os/macos/spl/spl-rwlock.c
@@ -0,0 +1,397 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/rwlock.h>
+#include <kern/debug.h>
+#include <sys/atomic.h>
+#include <sys/mutex.h>
+
+#include <mach/mach_types.h>
+#include <mach/kern_return.h>
+#include <kern/thread.h>
+#include <string.h>
+#include <sys/thread.h>
+#include <sys/debug.h>
+
+
+static lck_attr_t	*zfs_rwlock_attr = NULL;
+static lck_grp_attr_t	*zfs_rwlock_group_attr = NULL;
+static lck_grp_t	*zfs_rwlock_group = NULL;
+
+uint64_t zfs_active_rwlock = 0;
+
+#ifdef SPL_DEBUG_RWLOCK
+#include <sys/list.h>
+static list_t rwlock_list;
+static kmutex_t rwlock_list_mutex;
+struct leak {
+	list_node_t	rwlock_leak_node;
+
+#define	SPL_DEBUG_RWLOCK_MAXCHAR 32
+	char location_file[SPL_DEBUG_RWLOCK_MAXCHAR];
+	char location_function[SPL_DEBUG_RWLOCK_MAXCHAR];
+	uint64_t location_line;
+	void *mp;
+
+	uint64_t wdlist_locktime; // time lock was taken
+	char wdlist_file[32]; // storing holder
+	uint64_t wdlist_line;
+};
+
+#endif
+
+/*
+ * We run rwlock with DEBUG on for now, as it protects against
+ * uninitialised access etc, and almost no cost.
+ */
+#ifndef DEBUG
+#define	DEBUG
+#endif
+
+#ifdef DEBUG
+int
+rw_isinit(krwlock_t *rwlp)
+{
+	if (rwlp->rw_pad != 0x012345678)
+		return (0);
+	return (1);
+}
+#endif
+
+
+#ifdef SPL_DEBUG_RWLOCK
+void
+rw_initx(krwlock_t *rwlp, char *name, krw_type_t type, __unused void *arg,
+    const char *file, const char *fn, int line)
+#else
+void
+rw_init(krwlock_t *rwlp, char *name, krw_type_t type, __unused void *arg)
+#endif
+{
+	ASSERT(type != RW_DRIVER);
+
+#ifdef DEBUG
+	VERIFY3U(rwlp->rw_pad, !=, 0x012345678);
+#endif
+
+	lck_rw_init((lck_rw_t *)&rwlp->rw_lock[0],
+	    zfs_rwlock_group, zfs_rwlock_attr);
+	rwlp->rw_owner = NULL;
+	rwlp->rw_readers = 0;
+#ifdef DEBUG
+	rwlp->rw_pad = 0x012345678;
+#endif
+	atomic_inc_64(&zfs_active_rwlock);
+
+#ifdef SPL_DEBUG_RWLOCK
+	struct leak *leak;
+
+	MALLOC(leak, struct leak *,
+	    sizeof (struct leak),  M_TEMP, M_WAITOK);
+
+	if (leak) {
+		bzero(leak, sizeof (struct leak));
+		strlcpy(leak->location_file, file, SPL_DEBUG_RWLOCK_MAXCHAR);
+		strlcpy(leak->location_function, fn, SPL_DEBUG_RWLOCK_MAXCHAR);
+		leak->location_line = line;
+		leak->mp = rwlp;
+
+		mutex_enter(&rwlock_list_mutex);
+		list_link_init(&leak->rwlock_leak_node);
+		list_insert_tail(&rwlock_list, leak);
+		rwlp->leak = leak;
+		mutex_exit(&rwlock_list_mutex);
+	}
+	leak->wdlist_locktime = 0;
+	leak->wdlist_file[0] = 0;
+	leak->wdlist_line = 0;
+#endif
+}
+
+void
+rw_destroy(krwlock_t *rwlp)
+{
+#ifdef DEBUG
+	VERIFY3U(rwlp->rw_pad, ==, 0x012345678);
+#endif
+
+	lck_rw_destroy((lck_rw_t *)&rwlp->rw_lock[0], zfs_rwlock_group);
+#ifdef DEBUG
+	rwlp->rw_pad = 0x99;
+#endif
+	atomic_dec_64(&zfs_active_rwlock);
+	ASSERT(rwlp->rw_owner == NULL);
+	ASSERT(rwlp->rw_readers == 0);
+
+#ifdef SPL_DEBUG_RWLOCK
+	if (rwlp->leak) {
+		struct leak *leak = (struct leak *)rwlp->leak;
+		mutex_enter(&rwlock_list_mutex);
+		list_remove(&rwlock_list, leak);
+		rwlp->leak = NULL;
+		mutex_exit(&rwlock_list_mutex);
+		FREE(leak, M_TEMP);
+	}
+#endif
+}
+
+void
+rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+#ifdef DEBUG
+	if (rwlp->rw_pad != 0x012345678)
+		panic("rwlock %p not initialised\n", rwlp);
+#endif
+
+	if (rw == RW_READER) {
+		lck_rw_lock_shared((lck_rw_t *)&rwlp->rw_lock[0]);
+		atomic_inc_32((volatile uint32_t *)&rwlp->rw_readers);
+		ASSERT(rwlp->rw_owner == 0);
+	} else {
+		if (rwlp->rw_owner == current_thread())
+			panic("rw_enter: locking against myself!");
+		lck_rw_lock_exclusive((lck_rw_t *)&rwlp->rw_lock[0]);
+		ASSERT(rwlp->rw_owner == 0);
+		ASSERT(rwlp->rw_readers == 0);
+		rwlp->rw_owner = current_thread();
+	}
+}
+
+/*
+ * kernel private from osfmk/kern/locks.h
+ */
+extern boolean_t lck_rw_try_lock(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
+
+int
+rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	int held = 0;
+
+#ifdef DEBUG
+	if (rwlp->rw_pad != 0x012345678)
+		panic("rwlock %p not initialised\n", rwlp);
+#endif
+
+	if (rw == RW_READER) {
+		held = lck_rw_try_lock((lck_rw_t *)&rwlp->rw_lock[0],
+		    LCK_RW_TYPE_SHARED);
+		if (held)
+			atomic_inc_32((volatile uint32_t *)&rwlp->rw_readers);
+	} else {
+		if (rwlp->rw_owner == current_thread())
+			panic("rw_tryenter: locking against myself!");
+		held = lck_rw_try_lock((lck_rw_t *)&rwlp->rw_lock[0],
+		    LCK_RW_TYPE_EXCLUSIVE);
+		if (held)
+			rwlp->rw_owner = current_thread();
+	}
+
+	return (held);
+}
+
+/*
+ * It appears a difference between Darwin's
+ * lck_rw_lock_shared_to_exclusive() and Solaris's rw_tryupgrade() and
+ * FreeBSD's sx_try_upgrade() is that on failure to upgrade, the prior
+ * held shared/reader lock is lost on Darwin, but retained on
+ * Solaris/FreeBSD. We could re-acquire the lock in this situation,
+ * but it enters a possibility of blocking, when tryupgrade is meant
+ * to be non-blocking.
+ * Also note that XNU's lck_rw_lock_shared_to_exclusive() is always
+ * blocking (when waiting on readers), which means we can not use it.
+ */
+int
+rw_tryupgrade(krwlock_t *rwlp)
+{
+	int held = 0;
+
+	if (rwlp->rw_owner == current_thread())
+		panic("rw_enter: locking against myself!");
+
+	/* More readers than us? give up */
+	if (rwlp->rw_readers != 1)
+		return (0);
+
+	/*
+	 * It is ON. We need to drop our READER lock, and try to
+	 * grab the WRITER as quickly as possible.
+	 */
+	atomic_dec_32((volatile uint32_t *)&rwlp->rw_readers);
+	lck_rw_unlock_shared((lck_rw_t *)&rwlp->rw_lock[0]);
+
+	/* Grab the WRITER lock */
+	held = lck_rw_try_lock((lck_rw_t *)&rwlp->rw_lock[0],
+	    LCK_RW_TYPE_EXCLUSIVE);
+
+	if (held) {
+		/* Looks like we won */
+		rwlp->rw_owner = current_thread();
+		ASSERT(rwlp->rw_readers == 0);
+		return (1);
+	}
+
+	/*
+	 * The worst has happened, we failed to grab WRITE lock, either
+	 * due to another WRITER lock, or, some READER came along.
+	 * IllumOS implementation returns with the READER lock again
+	 * so we need to grab it.
+	 */
+	rw_enter(rwlp, RW_READER);
+	return (0);
+
+}
+
+void
+rw_exit(krwlock_t *rwlp)
+{
+	if (rwlp->rw_owner == current_thread()) {
+		rwlp->rw_owner = NULL;
+		ASSERT(rwlp->rw_readers == 0);
+		lck_rw_unlock_exclusive((lck_rw_t *)&rwlp->rw_lock[0]);
+	} else {
+		atomic_dec_32((volatile uint32_t *)&rwlp->rw_readers);
+		ASSERT(rwlp->rw_owner == 0);
+		lck_rw_unlock_shared((lck_rw_t *)&rwlp->rw_lock[0]);
+	}
+}
+
+
+int
+rw_lock_held(krwlock_t *rwlp)
+{
+	/*
+	 * ### not sure about this one ###
+	 */
+	return (rwlp->rw_owner == current_thread() || rwlp->rw_readers > 0);
+}
+
+int
+rw_write_held(krwlock_t *rwlp)
+{
+	return (rwlp->rw_owner == current_thread());
+}
+
+void
+rw_downgrade(krwlock_t *rwlp)
+{
+	if (rwlp->rw_owner != current_thread())
+		panic("SPL: rw_downgrade not WRITE lock held\n");
+	rwlp->rw_owner = NULL;
+	lck_rw_lock_exclusive_to_shared((lck_rw_t *)&rwlp->rw_lock[0]);
+	atomic_inc_32((volatile uint32_t *)&rwlp->rw_readers);
+}
+
+int
+spl_rwlock_init(void)
+{
+	zfs_rwlock_attr = lck_attr_alloc_init();
+	zfs_rwlock_group_attr = lck_grp_attr_alloc_init();
+	zfs_rwlock_group = lck_grp_alloc_init("zfs-rwlock",
+	    zfs_rwlock_group_attr);
+
+#ifdef SPL_DEBUG_RWLOCK
+	list_create(&rwlock_list, sizeof (struct leak),
+	    offsetof(struct leak, rwlock_leak_node));
+	lck_mtx_init((lck_mtx_t *)&rwlock_list_mutex.m_lock,
+	    zfs_rwlock_group, zfs_rwlock_attr);
+#endif
+
+	return (0);
+}
+
+void
+spl_rwlock_fini(void)
+{
+
+#ifdef SPL_DEBUG_RWLOCK
+	uint64_t total = 0;
+	printf("Dumping leaked rwlock allocations...\n");
+
+	mutex_enter(&rwlock_list_mutex);
+	while (1) {
+		struct leak *leak, *runner;
+		uint32_t found;
+
+		leak = list_head(&rwlock_list);
+
+		if (leak) {
+			list_remove(&rwlock_list, leak);
+		}
+		if (!leak) break;
+
+		// Run through list and count up how many times this leak is
+		// found, removing entries as we go.
+		for (found = 1, runner = list_head(&rwlock_list);
+		    runner;
+		    runner = runner ? list_next(&rwlock_list, runner) :
+		    list_head(&rwlock_list)) {
+
+			if (strcmp(leak->location_file, runner->location_file)
+			    == 0 &&
+			    strcmp(leak->location_function,
+			    runner->location_function) == 0 &&
+			    leak->location_line == runner->location_line) {
+				// Same place
+				found++;
+				list_remove(&rwlock_list, runner);
+				FREE(runner, M_TEMP);
+				runner = NULL;
+			} // if same
+
+		} // for all nodes
+
+		printf("  rwlock %p : %s %s %llu : # leaks: %u\n",
+		    leak->mp,
+		    leak->location_file,
+		    leak->location_function,
+		    leak->location_line,
+		    found);
+
+		FREE(leak, M_TEMP);
+		total += found;
+
+	}
+	mutex_exit(&rwlock_list_mutex);
+	printf("Dumped %llu leaked allocations.\n", total);
+
+	lck_mtx_destroy((lck_mtx_t *)&rwlock_list_mutex.m_lock,
+	    zfs_rwlock_group);
+	list_destroy(&rwlock_list);
+#endif
+
+	lck_grp_free(zfs_rwlock_group);
+	zfs_rwlock_group = NULL;
+
+	lck_grp_attr_free(zfs_rwlock_group_attr);
+	zfs_rwlock_group_attr = NULL;
+
+	lck_attr_free(zfs_rwlock_attr);
+	zfs_rwlock_attr = NULL;
+
+	ASSERT3U(zfs_active_rwlock, ==, 0);
+}
diff --git a/module/os/macos/spl/spl-seg_kmem.c b/module/os/macos/spl/spl-seg_kmem.c
new file mode 100644
index 0000000000..f2b36280f3
--- /dev/null
+++ b/module/os/macos/spl/spl-seg_kmem.c
@@ -0,0 +1,289 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/atomic.h>
+
+#include <sys/vmem.h>
+#include <sys/vmem_impl.h>
+// ugly: smd
+#ifdef kmem_free
+#undef kmem_free
+#endif
+
+#include <sys/time.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+
+#include <stdbool.h>
+
+/*
+ * seg_kmem is the primary kernel memory segment driver.  It
+ * maps the kernel heap [kernelheap, ekernelheap), module text,
+ * and all memory which was allocated before the VM was initialized
+ * into kas.
+ *
+ * Pages which belong to seg_kmem are hashed into &kvp vnode at
+ * an offset equal to (u_offset_t)virt_addr, and have p_lckcnt >= 1.
+ * They must never be paged out since segkmem_fault() is a no-op to
+ * prevent recursive faults.
+ *
+ * Currently, seg_kmem pages are sharelocked (p_sharelock == 1) on
+ * __x86 and are unlocked (p_sharelock == 0) on __sparc.  Once __x86
+ * supports relocation the #ifdef kludges can be removed.
+ *
+ * seg_kmem pages may be subject to relocation by page_relocate(),
+ * provided that the HAT supports it; if this is so, segkmem_reloc
+ * will be set to a nonzero value. All boot time allocated memory as
+ * well as static memory is considered off limits to relocation.
+ * Pages are "relocatable" if p_state does not have P_NORELOC set, so
+ * we request P_NORELOC pages for memory that isn't safe to relocate.
+ *
+ * The kernel heap is logically divided up into four pieces:
+ *
+ *   heap32_arena is for allocations that require 32-bit absolute
+ *   virtual addresses (e.g. code that uses 32-bit pointers/offsets).
+ *
+ *   heap_core is for allocations that require 2GB *relative*
+ *   offsets; in other words all memory from heap_core is within
+ *   2GB of all other memory from the same arena. This is a requirement
+ *   of the addressing modes of some processors in supervisor code.
+ *
+ *   heap_arena is the general heap arena.
+ *
+ *   static_arena is the static memory arena.  Allocations from it
+ *   are not subject to relocation so it is safe to use the memory
+ *   physical address as well as the virtual address (e.g. the VA to
+ *   PA translations are static).  Caches may import from static_arena;
+ *   all other static memory allocations should use static_alloc_arena.
+ *
+ * On some platforms which have limited virtual address space, seg_kmem
+ * may share [kernelheap, ekernelheap) with seg_kp; if this is so,
+ * segkp_bitmap is non-NULL, and each bit represents a page of virtual
+ * address space which is actually seg_kp mapped.
+ */
+
+/*
+ * Rough stubbed Port for XNU.
+ *
+ * Copyright (c) 2014 Brendon Humphrey (brendon.humphrey@mac.com)
+ */
+
+
+#ifdef _KERNEL
+#define	XNU_KERNEL_PRIVATE
+#include <mach/vm_types.h>
+extern vm_map_t kernel_map;
+
+/*
+ * These extern prototypes has to be carefully checked against XNU source
+ * in case Apple changes them. They are not defined in the "allowed" parts
+ * of the kernel.framework
+ */
+typedef uint8_t vm_tag_t;
+
+/*
+ * Tag we use to identify memory we have allocated
+ *
+ * (VM_KERN_MEMORY_KEXT - mach_vm_statistics.h)
+ */
+#define	SPL_TAG 6
+
+/*
+ * In kernel lowlevel form of malloc.
+ */
+extern kern_return_t kernel_memory_allocate(vm_map_t map, void **addrp,
+    vm_size_t size, vm_offset_t mask, int flags, vm_tag_t tag);
+
+/*
+ * Free memory
+ */
+extern void kmem_free(vm_map_t map, void *addr, vm_size_t size);
+
+#endif /* _KERNEL */
+
+typedef int page_t;
+
+void *segkmem_alloc(vmem_t *vmp, size_t size, int vmflag);
+void segkmem_free(vmem_t *vmp, void *inaddr, size_t size);
+
+/* Total memory held allocated */
+uint64_t segkmem_total_mem_allocated = 0;
+
+/* primary kernel heap arena */
+vmem_t *heap_arena;
+
+/* qcaches for zio and abd arenas */
+vmem_t *zio_arena_parent;
+
+/* arena for allocating file data */
+vmem_t *zio_arena;
+
+/* and for allocation of zfs metadata */
+vmem_t *zio_metadata_arena;
+
+#ifdef _KERNEL
+extern uint64_t total_memory;
+uint64_t stat_osif_malloc_success = 0;
+uint64_t stat_osif_free = 0;
+uint64_t stat_osif_malloc_bytes = 0;
+uint64_t stat_osif_free_bytes = 0;
+#endif
+
+void *
+osif_malloc(uint64_t size)
+{
+#ifdef _KERNEL
+	void *tr;
+
+	kern_return_t kr = kernel_memory_allocate(kernel_map,
+	    &tr, size, PAGESIZE, 0, SPL_TAG);
+
+	if (kr == KERN_SUCCESS) {
+		atomic_inc_64(&stat_osif_malloc_success);
+		atomic_add_64(&segkmem_total_mem_allocated, size);
+		atomic_add_64(&stat_osif_malloc_bytes, size);
+		return (tr);
+	} else {
+		// well, this can't really happen, kernel_memory_allocate
+		// would panic instead
+		return (NULL);
+	}
+#else
+	return (malloc(size));
+#endif
+}
+
+void
+osif_free(void* buf, uint64_t size)
+{
+#ifdef _KERNEL
+	kmem_free(kernel_map, buf, size);
+	atomic_inc_64(&stat_osif_free);
+	atomic_sub_64(&segkmem_total_mem_allocated, size);
+	atomic_add_64(&stat_osif_free_bytes, size);
+#else
+	free(buf);
+#endif /* _KERNEL */
+}
+
+/*
+ * Configure vmem, such that the heap arena is fed,
+ * and drains to the kernel low level allocator.
+ */
+void
+kernelheap_init()
+{
+	heap_arena = vmem_init("heap", NULL, 0, PAGESIZE, segkmem_alloc,
+	    segkmem_free);
+}
+
+
+void
+kernelheap_fini(void)
+{
+	vmem_fini(heap_arena);
+}
+
+void *
+segkmem_alloc(vmem_t *vmp, size_t size, int maybe_unmasked_vmflag)
+{
+	return (osif_malloc(size));
+}
+
+void
+segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
+{
+	osif_free(inaddr, size);
+	// since this is mainly called by spl_root_arena and free_arena,
+	// do we really want to wake up a waiter, just because we have
+	// transferred from one to the other?
+	// we already have vmem_add_a_gibibyte waking up waiters
+	// so specializing here seems wasteful
+	// (originally included in vmem_experiments)
+	// cv_signal(&vmp->vm_cv);
+}
+
+/*
+ * OSX does not use separate heaps for the ZIO buffers,
+ * the ZFS code is structured such that the zio caches will
+ * fallback to using the kmem_default arena same
+ * as all the other caches.
+ */
+// smd: we nevertheless plumb in an arena with heap as parent, so that
+// we can track stats and maintain the VM_ / qc settings differently
+void
+segkmem_zio_init()
+{
+	// note:  from startup.c and vm_machparam: SEGZIOMINSIZE = 512M.
+	// and SEGZSIOMAXSIZE = 512G; if physmem is between the two, then
+	// segziosize is (physmem - SEGZIOMAXSIZE) / 2.
+
+	// Illumos does not segregate zio_metadata_arena out of heap,
+	// almost exclusively for reasons involving panic dump data
+	// retention. However, parenting zio_metadata_arena to
+	// spl_root_arena and giving it its own qcaches provides better
+	// kstat observability *and* noticeably better performance in
+	// realworld (zfs/dmu) metadata-heavy activity.    Additionally,
+	// the qcaches pester spl_heap_arena only for slabs 256k and bigger,
+	// and each of the qcache entries (powers of two from PAGESIZE to
+	// 64k) are *exact-fit* and therefore dramatically reduce internal
+	// fragmentation and more than pay off for the extra code and (tiny)
+	// extra data for holding the arenas' segment tables.
+
+	extern vmem_t *spl_heap_arena;
+
+	zio_arena_parent = vmem_create("zfs_qcache", NULL, 0,
+	    PAGESIZE, vmem_alloc, vmem_free, spl_heap_arena,
+	    16 * 1024, VM_SLEEP | VMC_TIMEFREE);
+
+	ASSERT(zio_arena_parent != NULL);
+
+	zio_arena = vmem_create("zfs_file_data", NULL, 0,
+	    PAGESIZE, vmem_alloc, vmem_free, zio_arena_parent,
+	    0, VM_SLEEP);
+
+	zio_metadata_arena = vmem_create("zfs_metadata", NULL, 0,
+	    PAGESIZE, vmem_alloc, vmem_free, zio_arena_parent,
+	    0, VM_SLEEP);
+
+	ASSERT(zio_arena != NULL);
+	ASSERT(zio_metadata_arena != NULL);
+
+	extern void spl_zio_no_grow_init(void);
+	spl_zio_no_grow_init();
+}
+
+void
+segkmem_zio_fini(void)
+{
+	if (zio_arena) {
+		vmem_destroy(zio_arena);
+	}
+	if (zio_metadata_arena) {
+		vmem_destroy(zio_metadata_arena);
+	}
+	if (zio_arena_parent) {
+		vmem_destroy(zio_arena_parent);
+	}
+}
diff --git a/module/os/macos/spl/spl-taskq.c b/module/os/macos/spl/spl-taskq.c
new file mode 100644
index 0000000000..7040c66f11
--- /dev/null
+++ b/module/os/macos/spl/spl-taskq.c
@@ -0,0 +1,2529 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+/*
+ * Copyright (C) 2015, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+
+/*
+ * Kernel task queues: general-purpose asynchronous task scheduling.
+ *
+ * A common problem in kernel programming is the need to schedule tasks
+ * to be performed later, by another thread. There are several reasons
+ * you may want or need to do this:
+ *
+ * (1) The task isn't time-critical, but your current code path is.
+ *
+ * (2) The task may require grabbing locks that you already hold.
+ *
+ * (3) The task may need to block (e.g. to wait for memory), but you
+ *     cannot block in your current context.
+ *
+ * (4) Your code path can't complete because of some condition, but you can't
+ *     sleep or fail, so you queue the task for later execution when condition
+ *     disappears.
+ *
+ * (5) You just want a simple way to launch multiple tasks in parallel.
+ *
+ * Task queues provide such a facility. In its simplest form (used when
+ * performance is not a critical consideration) a task queue consists of a
+ * single list of tasks, together with one or more threads to service the
+ * list. There are some cases when this simple queue is not sufficient:
+ *
+ * (1) The task queues are very hot and there is a need to avoid data and lock
+ *	contention over global resources.
+ *
+ * (2) Some tasks may depend on other tasks to complete, so they can't be put in
+ *	the same list managed by the same thread.
+ *
+ * (3) Some tasks may block for a long time, and this should not block other
+ *	tasks in the queue.
+ *
+ * To provide useful service in such cases we define a "dynamic task queue"
+ * which has an individual thread for each of the tasks. These threads are
+ * dynamically created as they are needed and destroyed when they are not in
+ * use. The API for managing task pools is the same as for managing task queues
+ * with the exception of a taskq creation flag TASKQ_DYNAMIC which tells that
+ * dynamic task pool behavior is desired.
+ *
+ * Dynamic task queues may also place tasks in the normal queue (called "backing
+ * queue") when task pool runs out of resources. Users of task queues may
+ * disallow such queued scheduling by specifying TQ_NOQUEUE in the dispatch
+ * flags.
+ *
+ * The backing task queue is also used for scheduling internal tasks needed for
+ * dynamic task queue maintenance.
+ *
+ * INTERFACES ==================================================================
+ *
+ * taskq_t *taskq_create(name, nthreads, pri, minalloc, maxall, flags);
+ *
+ *	Create a taskq with specified properties.
+ *	Possible 'flags':
+ *
+ *	  TASKQ_DYNAMIC: Create task pool for task management. If this flag is
+ *		specified, 'nthreads' specifies the maximum number of threads in
+ *		the task queue. Task execution order for dynamic task queues is
+ *		not predictable.
+ *
+ *		If this flag is not specified (default case) a
+ *		single-list task queue is created with 'nthreads' threads
+ *		servicing it. Entries in this queue are managed by
+ *		taskq_ent_alloc() and taskq_ent_free() which try to keep the
+ *		task population between 'minalloc' and 'maxalloc', but the
+ *		latter limit is only advisory for TQ_SLEEP dispatches and the
+ *		former limit is only advisory for TQ_NOALLOC dispatches. If
+ *		TASKQ_PREPOPULATE is set in 'flags', the taskq will be
+ *		prepopulated with 'minalloc' task structures.
+ *
+ *		Since non-DYNAMIC taskqs are queues, tasks are guaranteed to be
+ *		executed in the order they are scheduled if nthreads == 1.
+ *		If nthreads > 1, task execution order is not predictable.
+ *
+ *	  TASKQ_PREPOPULATE: Prepopulate task queue with threads.
+ *		Also prepopulate the task queue with 'minalloc' task structures.
+ *
+ *	  TASKQ_THREADS_CPU_PCT: This flag specifies that 'nthreads' should be
+ *		interpreted as a percentage of the # of online CPUs on the
+ *		system.  The taskq subsystem will automatically adjust the
+ *		number of threads in the taskq in response to CPU online
+ *		and offline events, to keep the ratio.  nthreads must be in
+ *		the range [0,100].
+ *
+ *		The calculation used is:
+ *
+ *			MAX((ncpus_online * percentage)/100, 1)
+ *
+ *		This flag is not supported for DYNAMIC task queues.
+ *		This flag is not compatible with TASKQ_CPR_SAFE.
+ *
+ *	  TASKQ_CPR_SAFE: This flag specifies that users of the task queue will
+ *		use their own protocol for handling CPR issues. This flag is not
+ *		supported for DYNAMIC task queues.  This flag is not compatible
+ *		with TASKQ_THREADS_CPU_PCT.
+ *
+ *	The 'pri' field specifies the default priority for the threads that
+ *	service all scheduled tasks.
+ *
+ * taskq_t *taskq_create_instance(name, instance, nthreads, pri, minalloc,
+ *    maxall, flags);
+ *
+ *	Like taskq_create(), but takes an instance number (or -1 to indicate
+ *	no instance).
+ *
+ * taskq_t *taskq_create_proc(name, nthreads, pri, minalloc, maxall, proc,
+ *    flags);
+ *
+ *	Like taskq_create(), but creates the taskq threads in the specified
+ *	system process.  If proc != &p0, this must be called from a thread
+ *	in that process.
+ *
+ * taskq_t *taskq_create_sysdc(name, nthreads, minalloc, maxall, proc,
+ *    dc, flags);
+ *
+ *	Like taskq_create_proc(), but the taskq threads will use the
+ *	System Duty Cycle (SDC) scheduling class with a duty cycle of dc.
+ *
+ * void taskq_destroy(tap):
+ *
+ *	Waits for any scheduled tasks to complete, then destroys the taskq.
+ *	Caller should guarantee that no new tasks are scheduled in the closing
+ *	taskq.
+ *
+ * taskqid_t taskq_dispatch(tq, func, arg, flags):
+ *
+ *	Dispatches the task "func(arg)" to taskq. The 'flags' indicates whether
+ *	the caller is willing to block for memory.  The function returns an
+ *	opaque value which is zero iff dispatch fails.  If flags is TQ_NOSLEEP
+ *	or TQ_NOALLOC and the task can't be dispatched, taskq_dispatch() fails
+ *	and returns (taskqid_t)0.
+ *
+ *	ASSUMES: func != NULL.
+ *
+ *	Possible flags:
+ *	  TQ_NOSLEEP: Do not wait for resources; may fail.
+ *
+ *	  TQ_NOALLOC: Do not allocate memory; may fail.  May only be used with
+ *		non-dynamic task queues.
+ *
+ *	  TQ_NOQUEUE: Do not enqueue a task if it can't dispatch it due to
+ *		lack of available resources and fail. If this flag is not
+ *		set, and the task pool is exhausted, the task may be scheduled
+ *		in the backing queue. This flag may ONLY be used with dynamic
+ *		task queues.
+ *
+ *		NOTE: This flag should always be used when a task queue is used
+ *		for tasks that may depend on each other for completion.
+ *		Enqueueing dependent tasks may create deadlocks.
+ *
+ *	  TQ_SLEEP:   May block waiting for resources. May still fail for
+ *		dynamic task queues if TQ_NOQUEUE is also specified, otherwise
+ *		always succeed.
+ *
+ *	  TQ_FRONT:   Puts the new task at the front of the queue.  Be careful.
+ *
+ *	NOTE: Dynamic task queues are much more likely to fail in
+ *		taskq_dispatch() (especially if TQ_NOQUEUE was specified), so it
+ *		is important to have backup strategies handling such failures.
+ *
+ * void taskq_dispatch_ent(tq, func, arg, flags, tqent)
+ *
+ *	This is a light-weight form of taskq_dispatch(), that uses a
+ *	preallocated taskq_ent_t structure for scheduling.  As a
+ *	result, it does not perform allocations and cannot ever fail.
+ *	Note especially that it cannot be used with TASKQ_DYNAMIC
+ *	taskqs.  The memory for the tqent must not be modified or used
+ *	until the function (func) is called.  (However, func itself
+ *	may safely modify or free this memory, once it is called.)
+ *	Note that the taskq framework will NOT free this memory.
+ *
+ * void taskq_wait(tq):
+ *
+ *	Waits for all previously scheduled tasks to complete.
+ *
+ *	NOTE: It does not stop any new task dispatches.
+ *	      Do NOT call taskq_wait() from a task: it will cause deadlock.
+ *
+ * void taskq_suspend(tq)
+ *
+ *	Suspend all task execution. Tasks already scheduled for a dynamic task
+ *	queue will still be executed, but all new scheduled tasks will be
+ *	suspended until taskq_resume() is called.
+ *
+ * int  taskq_suspended(tq)
+ *
+ *	Returns 1 if taskq is suspended and 0 otherwise. It is intended to
+ *	ASSERT that the task queue is suspended.
+ *
+ * void taskq_resume(tq)
+ *
+ *	Resume task queue execution.
+ *
+ * int  taskq_member(tq, thread)
+ *
+ *	Returns 1 if 'thread' belongs to taskq 'tq' and 0 otherwise. The
+ *	intended use is to ASSERT that a given function is called in taskq
+ *	context only.
+ *
+ * system_taskq
+ *
+ *	Global system-wide dynamic task queue for common uses. It may be used by
+ *	any subsystem that needs to schedule tasks and does not need to manage
+ *	its own task queues. It is initialized quite early during system boot.
+ *
+ * IMPLEMENTATION ==============================================================
+ *
+ * This is schematic representation of the task queue structures.
+ *
+ *   taskq:
+ *   +-------------+
+ *   | tq_lock     | +---< taskq_ent_free()
+ *   +-------------+ |
+ *   |...          | | tqent:                  tqent:
+ *   +-------------+ | +------------+          +------------+
+ *   | tq_freelist |-->| tqent_next |--> ... ->| tqent_next |
+ *   +-------------+   +------------+          +------------+
+ *   |...          |   | ...        |          | ...        |
+ *   +-------------+   +------------+          +------------+
+ *   | tq_task     |    |
+ *   |             |    +-------------->taskq_ent_alloc()
+ * +--------------------------------------------------------------------------+
+ * | |                     |            tqent                   tqent         |
+ * | +---------------------+     +--> +------------+     +--> +------------+  |
+ * | | ...		   |     |    | func, arg  |     |    | func, arg  |  |
+ * +>+---------------------+ <---|-+  +------------+ <---|-+  +------------+  |
+ *   | tq_taskq.tqent_next | ----+ |  | tqent_next | --->+ |  | tqent_next |--+
+ *   +---------------------+	   |  +------------+     ^ |  +------------+
+ * +-| tq_task.tqent_prev  |	   +--| tqent_prev |     | +--| tqent_prev |  ^
+ * | +---------------------+	      +------------+     |    +------------+  |
+ * | |...		   |	      | ...        |     |    | ...        |  |
+ * | +---------------------+	      +------------+     |    +------------+  |
+ * |                                      ^              |                    |
+ * |                                      |              |                    |
+ * +--------------------------------------+--------------+       TQ_APPEND() -+
+ *   |             |                      |
+ *   |...          |   taskq_thread()-----+
+ *   +-------------+
+ *   | tq_buckets  |--+-------> [ NULL ] (for regular task queues)
+ *   +-------------+  |
+ *                    |   DYNAMIC TASK QUEUES:
+ *                    |
+ *                    +-> taskq_bucket[nCPU]		taskq_bucket_dispatch()
+ *                        +-------------------+                    ^
+ *                   +--->| tqbucket_lock     |                    |
+ *                   |    +-------------------+   +--------+      +--------+
+ *                   |    | tqbucket_freelist |-->| tqent  |-->...| tqent  | ^
+ *                   |    +-------------------+<--+--------+<--...+--------+ |
+ *                   |    | ...               |   | thread |      | thread | |
+ *                   |    +-------------------+   +--------+      +--------+ |
+ *                   |    +-------------------+                              |
+ * taskq_dispatch()--+--->| tqbucket_lock     |             TQ_APPEND()------+
+ *      TQ_HASH()    |    +-------------------+   +--------+      +--------+
+ *                   |    | tqbucket_freelist |-->| tqent  |-->...| tqent  |
+ *                   |    +-------------------+<--+--------+<--...+--------+
+ *                   |    | ...               |   | thread |      | thread |
+ *                   |    +-------------------+   +--------+      +--------+
+ *		     +--->	...
+ *
+ *
+ * Task queues use tq_task field to link new entry in the queue. The queue is a
+ * circular doubly-linked list. Entries are put in the end of the list with
+ * TQ_APPEND() and processed from the front of the list by taskq_thread() in
+ * FIFO order. Task queue entries are cached in the free list managed by
+ * taskq_ent_alloc() and taskq_ent_free() functions.
+ *
+ *	All threads used by task queues mark t_taskq field of the thread to
+ *	point to the task queue.
+ *
+ * Taskq Thread Management -----------------------------------------------------
+ *
+ * Taskq's non-dynamic threads are managed with several variables and flags:
+ *
+ *	* tq_nthreads	- The number of threads in taskq_thread() for the
+ *			  taskq.
+ *
+ *	* tq_active	- The number of threads not waiting on a CV in
+ *			  taskq_thread(); includes newly created threads
+ *			  not yet counted in tq_nthreads.
+ *
+ *	* tq_nthreads_target
+ *			- The number of threads desired for the taskq.
+ *
+ *	* tq_flags & TASKQ_CHANGING
+ *			- Indicates that tq_nthreads != tq_nthreads_target.
+ *
+ *	* tq_flags & TASKQ_THREAD_CREATED
+ *			- Indicates that a thread is being created in the taskq.
+ *
+ * During creation, tq_nthreads and tq_active are set to 0, and
+ * tq_nthreads_target is set to the number of threads desired.  The
+ * TASKQ_CHANGING flag is set, and taskq_thread_create() is called to
+ * create the first thread. taskq_thread_create() increments tq_active,
+ * sets TASKQ_THREAD_CREATED, and creates the new thread.
+ *
+ * Each thread starts in taskq_thread(), clears the TASKQ_THREAD_CREATED
+ * flag, and increments tq_nthreads.  It stores the new value of
+ * tq_nthreads as its "thread_id", and stores its thread pointer in the
+ * tq_threadlist at the (thread_id - 1).  We keep the thread_id space
+ * densely packed by requiring that only the largest thread_id can exit during
+ * normal adjustment.   The exception is during the destruction of the
+ * taskq; once tq_nthreads_target is set to zero, no new threads will be created
+ * for the taskq queue, so every thread can exit without any ordering being
+ * necessary.
+ *
+ * Threads will only process work if their thread id is <= tq_nthreads_target.
+ *
+ * When TASKQ_CHANGING is set, threads will check the current thread target
+ * whenever they wake up, and do whatever they can to apply its effects.
+ *
+ * TASKQ_THREAD_CPU_PCT --------------------------------------------------------
+ *
+ * When a taskq is created with TASKQ_THREAD_CPU_PCT, we store their requested
+ * percentage in tq_threads_ncpus_pct, start them off with the correct thread
+ * target, and add them to the taskq_cpupct_list for later adjustment.
+ *
+ * We register taskq_cpu_setup() to be called whenever a CPU changes state.  It
+ * walks the list of TASKQ_THREAD_CPU_PCT taskqs, adjusts their nthread_target
+ * if need be, and wakes up all of the threads to process the change.
+ *
+ * Dynamic Task Queues Implementation ------------------------------------------
+ *
+ * For a dynamic task queues there is a 1-to-1 mapping between a thread and
+ * taskq_ent_structure. Each entry is serviced by its own thread and each thread
+ * is controlled by a single entry.
+ *
+ * Entries are distributed over a set of buckets. To avoid using modulo
+ * arithmetics the number of buckets is 2^n and is determined as the nearest
+ * power of two roundown of the number of CPUs in the system. Tunable
+ * variable 'taskq_maxbuckets' limits the maximum number of buckets. Each entry
+ * is attached to a bucket for its lifetime and can't migrate to other buckets.
+ *
+ * Entries that have scheduled tasks are not placed in any list. The dispatch
+ * function sets their "func" and "arg" fields and signals the corresponding
+ * thread to execute the task. Once the thread executes the task it clears the
+ * "func" field and places an entry on the bucket cache of free entries pointed
+ * by "tqbucket_freelist" field. ALL entries on the free list should have "func"
+ * field equal to NULL. The free list is a circular doubly-linked list identical
+ * in structure to the tq_task list above, but entries are taken from it in LIFO
+ * order - the last freed entry is the first to be allocated. The
+ * taskq_bucket_dispatch() function gets the most recently used entry from the
+ * free list, sets its "func" and "arg" fields and signals a worker thread.
+ *
+ * After executing each task a per-entry thread taskq_d_thread() places its
+ * entry on the bucket free list and goes to a timed sleep. If it wakes up
+ * without getting new task it removes the entry from the free list and destroys
+ * itself. The thread sleep time is controlled by a tunable variable
+ * `taskq_thread_timeout'.
+ *
+ * There are various statistics kept in the bucket which allows for later
+ * analysis of taskq usage patterns. Also, a global copy of taskq creation and
+ * death statistics is kept in the global taskq data structure. Since thread
+ * creation and death happen rarely, updating such global data does not present
+ * a performance problem.
+ *
+ * NOTE: Threads are not bound to any CPU and there is absolutely no association
+ *       between the bucket and actual thread CPU, so buckets are used only to
+ *	 split resources and reduce resource contention. Having threads attached
+ *	 to the CPU denoted by a bucket may reduce number of times the job
+ *	 switches between CPUs.
+ *
+ *	 Current algorithm creates a thread whenever a bucket has no free
+ *	 entries. It would be nice to know how many threads are in the running
+ *	 state and don't create threads if all CPUs are busy with existing
+ *	 tasks, but it is unclear how such strategy can be implemented.
+ *
+ *	 Currently buckets are created statically as an array attached to task
+ *	 queue. On some system with nCPUs < max_ncpus it may waste system
+ *	 memory. One solution may be allocation of buckets when they are first
+ *	 touched, but it is not clear how useful it is.
+ *
+ * SUSPEND/RESUME implementation -----------------------------------------------
+ *
+ *	Before executing a task taskq_thread() (executing non-dynamic task
+ *	queues) obtains taskq's thread lock as a reader. The taskq_suspend()
+ *	function gets the same lock as a writer blocking all non-dynamic task
+ *	execution. The taskq_resume() function releases the lock allowing
+ *	taskq_thread to continue execution.
+ *
+ *	For dynamic task queues, each bucket is marked as TQBUCKET_SUSPEND by
+ *	taskq_suspend() function. After that taskq_bucket_dispatch() always
+ *	fails, so that taskq_dispatch() will either enqueue tasks for a
+ *	suspended backing queue or fail if TQ_NOQUEUE is specified in dispatch
+ *	flags.
+ *
+ *	NOTE: taskq_suspend() does not immediately block any tasks already
+ *	      scheduled for dynamic task queues. It only suspends new tasks
+ *	      scheduled after taskq_suspend() was called.
+ *
+ *	taskq_member() function works by comparing a thread t_taskq pointer with
+ *	the passed thread pointer.
+ *
+ * LOCKS and LOCK Hierarchy ----------------------------------------------------
+ *
+ *   There are three locks used in task queues:
+ *
+ *   1) The taskq_t's tq_lock, protecting global task queue state.
+ *
+ *   2) Each per-CPU bucket has a lock for bucket management.
+ *
+ *   3) The global taskq_cpupct_lock, which protects the list of
+ *      TASKQ_THREADS_CPU_PCT taskqs.
+ *
+ *   If both (1) and (2) are needed, tq_lock should be taken *after* the bucket
+ *   lock.
+ *
+ *   If both (1) and (3) are needed, tq_lock should be taken *after*
+ *   taskq_cpupct_lock.
+ *
+ * DEBUG FACILITIES ------------------------------------------------------------
+ *
+ * For DEBUG kernels it is possible to induce random failures to
+ * taskq_dispatch() function when it is given TQ_NOSLEEP argument. The value of
+ * taskq_dmtbf and taskq_smtbf tunables control the mean time between induced
+ * failures for dynamic and static task queues respectively.
+ *
+ * Setting TASKQ_STATISTIC to 0 will disable per-bucket statistics.
+ *
+ * TUNABLES --------------------------------------------------------------------
+ *
+ *	system_taskq_size	- Size of the global system_taskq.
+ *				  This value is multiplied by nCPUs to determine
+ *				  actual size.
+ *				  Default value: 64
+ *
+ *	taskq_minimum_nthreads_max
+ *				- Minimum size of the thread list for a taskq.
+ *				  Useful for testing different thread pool
+ *				  sizes by overwriting tq_nthreads_target.
+ *
+ *	taskq_thread_timeout	- Maximum idle time for taskq_d_thread()
+ *				  Default value: 5 minutes
+ *
+ *	taskq_maxbuckets	- Maximum number of buckets in any task queue
+ *				  Default value: 128
+ *
+ *	taskq_search_depth	- Maximum # of buckets searched for a free entry
+ *				  Default value: 4
+ *
+ *	taskq_dmtbf		- Mean time between induced dispatch failures
+ *				  for dynamic task queues.
+ *				  Default value: UINT_MAX (no induced failures)
+ *
+ *	taskq_smtbf		- Mean time between induced dispatch failures
+ *				  for static task queues.
+ *				  Default value: UINT_MAX (no induced failures)
+ *
+ * CONDITIONAL compilation -----------------------------------------------------
+ *
+ *    TASKQ_STATISTIC	- If set will enable bucket statistic (default).
+ *
+ */
+
+#include <sys/taskq_impl.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/kmem.h>
+#include <sys/vmem.h>
+#include <sys/callb.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/vmsystm.h>	/* For throttlefree */
+#include <sys/sysmacros.h>
+#include <sys/note.h>
+#include <sys/tsd.h>
+
+static kmem_cache_t *taskq_ent_cache, *taskq_cache;
+
+static uint_t taskq_tsd;
+
+/*
+ * Pseudo instance numbers for taskqs without explicitly provided instance.
+ */
+static vmem_t *taskq_id_arena;
+
+/* Global system task queue for common use */
+taskq_t	*system_taskq = NULL;
+taskq_t *system_delay_taskq = NULL;
+
+/*
+ * Maximum number of entries in global system taskq is
+ *	system_taskq_size * max_ncpus
+ */
+#define	SYSTEM_TASKQ_SIZE 64
+int system_taskq_size = SYSTEM_TASKQ_SIZE;
+
+/*
+ * Minimum size for tq_nthreads_max; useful for those who want to play around
+ * with increasing a taskq's tq_nthreads_target.
+ */
+int taskq_minimum_nthreads_max = 1;
+
+/*
+ * We want to ensure that when taskq_create() returns, there is at least
+ * one thread ready to handle requests.  To guarantee this, we have to wait
+ * for the second thread, since the first one cannot process requests until
+ * the second thread has been created.
+ */
+#define	TASKQ_CREATE_ACTIVE_THREADS	2
+
+/* Maximum percentage allowed for TASKQ_THREADS_CPU_PCT */
+#define	TASKQ_CPUPCT_MAX_PERCENT	1000
+int taskq_cpupct_max_percent = TASKQ_CPUPCT_MAX_PERCENT;
+
+/*
+ * Dynamic task queue threads that don't get any work within
+ * taskq_thread_timeout destroy themselves
+ */
+#define	TASKQ_THREAD_TIMEOUT (60 * 5)
+int taskq_thread_timeout = TASKQ_THREAD_TIMEOUT;
+
+#define	TASKQ_MAXBUCKETS 128
+int taskq_maxbuckets = TASKQ_MAXBUCKETS;
+
+/*
+ * When a bucket has no available entries another buckets are tried.
+ * taskq_search_depth parameter limits the amount of buckets that we search
+ * before failing. This is mostly useful in systems with many CPUs where we may
+ * spend too much time scanning busy buckets.
+ */
+#define	TASKQ_SEARCH_DEPTH 4
+int taskq_search_depth = TASKQ_SEARCH_DEPTH;
+
+/*
+ * Hashing function: mix various bits of x. May be pretty much anything.
+ */
+#define	TQ_HASH(x) ((x) ^ ((x) >> 11) ^ ((x) >> 17) ^ ((x) ^ 27))
+
+/*
+ * We do not create any new threads when the system is low on memory and start
+ * throttling memory allocations. The following macro tries to estimate such
+ * condition.
+ */
+#ifdef __APPLE__
+#define	ENOUGH_MEMORY() (!spl_vm_pool_low())
+#else
+#define	ENOUGH_MEMORY() (freemem > throttlefree)
+#endif
+
+/*
+ * Static functions.
+ */
+static taskq_t	*taskq_create_common(const char *, int, int, pri_t, int,
+    int, proc_t *, uint_t, uint_t);
+static void taskq_thread(void *);
+static void taskq_d_thread(taskq_ent_t *);
+static void taskq_bucket_extend(void *);
+static int  taskq_constructor(void *, void *, int);
+static void taskq_destructor(void *, void *);
+static int  taskq_ent_constructor(void *, void *, int);
+static void taskq_ent_destructor(void *, void *);
+static taskq_ent_t *taskq_ent_alloc(taskq_t *, int);
+static void taskq_ent_free(taskq_t *, taskq_ent_t *);
+static int taskq_ent_exists(taskq_t *, task_func_t, void *);
+static taskq_ent_t *taskq_bucket_dispatch(taskq_bucket_t *, task_func_t,
+    void *);
+
+/*
+ * Task queues kstats.
+ */
+struct taskq_kstat {
+	kstat_named_t	tq_pid;
+	kstat_named_t	tq_tasks;
+	kstat_named_t	tq_executed;
+	kstat_named_t	tq_maxtasks;
+	kstat_named_t	tq_totaltime;
+	kstat_named_t	tq_nalloc;
+	kstat_named_t	tq_nactive;
+	kstat_named_t	tq_pri;
+	kstat_named_t	tq_nthreads;
+} taskq_kstat = {
+	{ "pid",		KSTAT_DATA_UINT64 },
+	{ "tasks",		KSTAT_DATA_UINT64 },
+	{ "executed",		KSTAT_DATA_UINT64 },
+	{ "maxtasks",		KSTAT_DATA_UINT64 },
+	{ "totaltime",		KSTAT_DATA_UINT64 },
+	{ "nactive",		KSTAT_DATA_UINT64 },
+	{ "nalloc",		KSTAT_DATA_UINT64 },
+	{ "priority",		KSTAT_DATA_UINT64 },
+	{ "threads",		KSTAT_DATA_UINT64 },
+};
+
+struct taskq_d_kstat {
+	kstat_named_t	tqd_pri;
+	kstat_named_t	tqd_btasks;
+	kstat_named_t	tqd_bexecuted;
+	kstat_named_t	tqd_bmaxtasks;
+	kstat_named_t	tqd_bnalloc;
+	kstat_named_t	tqd_bnactive;
+	kstat_named_t	tqd_btotaltime;
+	kstat_named_t	tqd_hits;
+	kstat_named_t	tqd_misses;
+	kstat_named_t	tqd_overflows;
+	kstat_named_t	tqd_tcreates;
+	kstat_named_t	tqd_tdeaths;
+	kstat_named_t	tqd_maxthreads;
+	kstat_named_t	tqd_nomem;
+	kstat_named_t	tqd_disptcreates;
+	kstat_named_t	tqd_totaltime;
+	kstat_named_t	tqd_nalloc;
+	kstat_named_t	tqd_nfree;
+} taskq_d_kstat = {
+	{ "priority",		KSTAT_DATA_UINT64 },
+	{ "btasks",		KSTAT_DATA_UINT64 },
+	{ "bexecuted",		KSTAT_DATA_UINT64 },
+	{ "bmaxtasks",		KSTAT_DATA_UINT64 },
+	{ "bnalloc",		KSTAT_DATA_UINT64 },
+	{ "bnactive",		KSTAT_DATA_UINT64 },
+	{ "btotaltime",		KSTAT_DATA_UINT64 },
+	{ "hits",		KSTAT_DATA_UINT64 },
+	{ "misses",		KSTAT_DATA_UINT64 },
+	{ "overflows",		KSTAT_DATA_UINT64 },
+	{ "tcreates",		KSTAT_DATA_UINT64 },
+	{ "tdeaths",		KSTAT_DATA_UINT64 },
+	{ "maxthreads",		KSTAT_DATA_UINT64 },
+	{ "nomem",		KSTAT_DATA_UINT64 },
+	{ "disptcreates",	KSTAT_DATA_UINT64 },
+	{ "totaltime",		KSTAT_DATA_UINT64 },
+	{ "nalloc",		KSTAT_DATA_UINT64 },
+	{ "nfree",		KSTAT_DATA_UINT64 },
+};
+
+static kmutex_t taskq_kstat_lock;
+static kmutex_t taskq_d_kstat_lock;
+static int taskq_kstat_update(kstat_t *, int);
+static int taskq_d_kstat_update(kstat_t *, int);
+
+/*
+ * List of all TASKQ_THREADS_CPU_PCT taskqs.
+ */
+static list_t taskq_cpupct_list;	/* protected by cpu_lock */
+
+/*
+ * Collect per-bucket statistic when TASKQ_STATISTIC is defined.
+ */
+#define	TASKQ_STATISTIC 1
+
+#if TASKQ_STATISTIC
+#define	TQ_STAT(b, x)	b->tqbucket_stat.x++
+#else
+#define	TQ_STAT(b, x)
+#endif
+
+/*
+ * Random fault injection.
+ */
+uint_t taskq_random;
+uint_t taskq_dmtbf = UINT_MAX;    /* mean time between injected failures */
+uint_t taskq_smtbf = UINT_MAX;    /* mean time between injected failures */
+
+/*
+ * TQ_NOSLEEP dispatches on dynamic task queues are always allowed to fail.
+ *
+ * TQ_NOSLEEP dispatches on static task queues can't arbitrarily fail because
+ * they could prepopulate the cache and make sure that they do not use more
+ * then minalloc entries.  So, fault injection in this case insures that
+ * either TASKQ_PREPOPULATE is not set or there are more entries allocated
+ * than is specified by minalloc.  TQ_NOALLOC dispatches are always allowed
+ * to fail, but for simplicity we treat them identically to TQ_NOSLEEP
+ * dispatches.
+ */
+#ifdef DEBUG
+#define	TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag)		\
+	taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
+	if ((flag & TQ_NOSLEEP) &&				\
+	    taskq_random < 1771875 / taskq_dmtbf) {		\
+		return (0);					\
+	}
+
+#define	TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag)		\
+	taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
+	if ((flag & (TQ_NOSLEEP | TQ_NOALLOC)) &&		\
+	    (!(tq->tq_flags & TASKQ_PREPOPULATE) ||		\
+	    (tq->tq_nalloc > tq->tq_minalloc)) &&		\
+	    (taskq_random < (1771875 / taskq_smtbf))) {		\
+		mutex_exit(&tq->tq_lock);			\
+		return (0);					\
+	}
+#else
+#define	TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag)
+#define	TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag)
+#endif
+
+#define	IS_EMPTY(l) (((l).tqent_prev == (l).tqent_next) &&	\
+	((l).tqent_prev == &(l)))
+
+/*
+ * Append `tqe' in the end of the doubly-linked list denoted by l.
+ */
+#define	TQ_APPEND(l, tqe) {					\
+	tqe->tqent_next = &l;					\
+	tqe->tqent_prev = l.tqent_prev;				\
+	tqe->tqent_next->tqent_prev = tqe;			\
+	tqe->tqent_prev->tqent_next = tqe;			\
+}
+/*
+ * Prepend 'tqe' to the beginning of l
+ */
+#define	TQ_PREPEND(l, tqe) {					\
+	tqe->tqent_next = l.tqent_next;				\
+	tqe->tqent_prev = &l;					\
+	tqe->tqent_next->tqent_prev = tqe;			\
+	tqe->tqent_prev->tqent_next = tqe;			\
+}
+
+/*
+ * Schedule a task specified by func and arg into the task queue entry tqe.
+ */
+#define	TQ_DO_ENQUEUE(tq, tqe, func, arg, front) {			\
+	ASSERT(MUTEX_HELD(&tq->tq_lock));				\
+	_NOTE(CONSTCOND)						\
+	if (front) {							\
+		TQ_PREPEND(tq->tq_task, tqe);				\
+	} else {							\
+		TQ_APPEND(tq->tq_task, tqe);				\
+	}								\
+	tqe->tqent_func = (func);					\
+	tqe->tqent_arg = (arg);						\
+	tq->tq_tasks++;							\
+	if (tq->tq_tasks - tq->tq_executed > tq->tq_maxtasks)		\
+		tq->tq_maxtasks = tq->tq_tasks - tq->tq_executed;	\
+	cv_signal(&tq->tq_dispatch_cv);					\
+	DTRACE_PROBE2(taskq__enqueue, taskq_t *, tq, taskq_ent_t *, tqe); \
+}
+
+#define	TQ_ENQUEUE(tq, tqe, func, arg)					\
+	TQ_DO_ENQUEUE(tq, tqe, func, arg, 0)
+
+#define	TQ_ENQUEUE_FRONT(tq, tqe, func, arg)				\
+	TQ_DO_ENQUEUE(tq, tqe, func, arg, 1)
+
+/*
+ * Do-nothing task which may be used to prepopulate thread caches.
+ */
+/*ARGSUSED*/
+void
+nulltask(void *unused)
+{
+}
+
+/*ARGSUSED*/
+static int
+taskq_constructor(void *buf, void *cdrarg, int kmflags)
+{
+	taskq_t *tq = buf;
+
+	bzero(tq, sizeof (taskq_t));
+
+	mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
+	cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&tq->tq_exit_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
+
+	tq->tq_task.tqent_next = &tq->tq_task;
+	tq->tq_task.tqent_prev = &tq->tq_task;
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+taskq_destructor(void *buf, void *cdrarg)
+{
+	taskq_t *tq = buf;
+
+	ASSERT(tq->tq_nthreads == 0);
+	ASSERT(tq->tq_buckets == NULL);
+	ASSERT(tq->tq_tcreates == 0);
+	ASSERT(tq->tq_tdeaths == 0);
+
+	mutex_destroy(&tq->tq_lock);
+	rw_destroy(&tq->tq_threadlock);
+	cv_destroy(&tq->tq_dispatch_cv);
+	cv_destroy(&tq->tq_exit_cv);
+	cv_destroy(&tq->tq_wait_cv);
+	cv_destroy(&tq->tq_maxalloc_cv);
+}
+
+/*ARGSUSED*/
+static int
+taskq_ent_constructor(void *buf, void *cdrarg, int kmflags)
+{
+	taskq_ent_t *tqe = buf;
+
+	tqe->tqent_thread = NULL;
+	cv_init(&tqe->tqent_cv, NULL, CV_DEFAULT, NULL);
+#ifdef __APPLE__
+	/* Simulate TS_STOPPED */
+	mutex_init(&tqe->tqent_thread_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&tqe->tqent_thread_cv, NULL, CV_DEFAULT, NULL);
+#endif /* __APPLE__ */
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+taskq_ent_destructor(void *buf, void *cdrarg)
+{
+	taskq_ent_t *tqe = buf;
+
+	ASSERT(tqe->tqent_thread == NULL);
+	cv_destroy(&tqe->tqent_cv);
+#ifdef __APPLE__
+	/* See comment in taskq_d_thread(). */
+	mutex_destroy(&tqe->tqent_thread_lock);
+	cv_destroy(&tqe->tqent_thread_cv);
+#endif /* __APPLE__ */
+}
+
+int
+spl_taskq_init(void)
+{
+	tsd_create(&taskq_tsd, NULL);
+
+	taskq_ent_cache = kmem_cache_create("taskq_ent_cache",
+	    sizeof (taskq_ent_t), 0, taskq_ent_constructor,
+	    taskq_ent_destructor, NULL, NULL, NULL, 0);
+	taskq_cache = kmem_cache_create("taskq_cache", sizeof (taskq_t),
+	    0, taskq_constructor, taskq_destructor, NULL, NULL, NULL, 0);
+	taskq_id_arena = vmem_create("taskq_id_arena",
+	    (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0,
+	    VM_SLEEP | VMC_IDENTIFIER);
+
+	list_create(&taskq_cpupct_list, sizeof (taskq_t),
+	    offsetof(taskq_t, tq_cpupct_link));
+
+	mutex_init(&taskq_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&taskq_d_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	return (0);
+}
+
+void
+spl_taskq_fini(void)
+{
+	mutex_destroy(&taskq_d_kstat_lock);
+	mutex_destroy(&taskq_kstat_lock);
+
+	if (taskq_cache) {
+		kmem_cache_destroy(taskq_cache);
+		taskq_cache = NULL;
+	}
+	if (taskq_ent_cache) {
+		kmem_cache_destroy(taskq_ent_cache);
+		taskq_ent_cache = NULL;
+	}
+
+	list_destroy(&taskq_cpupct_list);
+
+	vmem_destroy(taskq_id_arena);
+
+	tsd_destroy(&taskq_tsd);
+}
+
+
+
+
+static void
+taskq_update_nthreads(taskq_t *tq, uint_t ncpus)
+{
+	uint_t newtarget = TASKQ_THREADS_PCT(ncpus, tq->tq_threads_ncpus_pct);
+
+#ifndef __APPLE__
+	ASSERT(MUTEX_HELD(&cpu_lock));
+#endif
+	ASSERT(MUTEX_HELD(&tq->tq_lock));
+
+	/* We must be going from non-zero to non-zero; no exiting. */
+	ASSERT3U(tq->tq_nthreads_target, !=, 0);
+	ASSERT3U(newtarget, !=, 0);
+
+	ASSERT3U(newtarget, <=, tq->tq_nthreads_max);
+	if (newtarget != tq->tq_nthreads_target) {
+		tq->tq_flags |= TASKQ_CHANGING;
+		tq->tq_nthreads_target = newtarget;
+		cv_broadcast(&tq->tq_dispatch_cv);
+		cv_broadcast(&tq->tq_exit_cv);
+	}
+}
+
+#ifndef __APPLE__
+/* No dynamic CPU add/remove in XNU, so we can just use static ncpu math */
+
+/* called during task queue creation */
+static void
+taskq_cpupct_install(taskq_t *tq, cpupart_t *cpup)
+{
+	ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
+
+	mutex_enter(&cpu_lock);
+	mutex_enter(&tq->tq_lock);
+	tq->tq_cpupart = cpup->cp_id;
+	taskq_update_nthreads(tq, cpup->cp_ncpus);
+	mutex_exit(&tq->tq_lock);
+
+	list_insert_tail(&taskq_cpupct_list, tq);
+	mutex_exit(&cpu_lock);
+}
+
+static void
+taskq_cpupct_remove(taskq_t *tq)
+{
+	ASSERT(tq->tq_flags & TASKQ_THREADS_CPU_PCT);
+
+	mutex_enter(&cpu_lock);
+	list_remove(&taskq_cpupct_list, tq);
+	mutex_exit(&cpu_lock);
+}
+
+/*ARGSUSED*/
+static int
+taskq_cpu_setup(cpu_setup_t what, int id, void *arg)
+{
+	taskq_t *tq;
+	cpupart_t *cp = cpu[id]->cpu_part;
+	uint_t ncpus = cp->cp_ncpus;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	ASSERT(ncpus > 0);
+
+	switch (what) {
+	case CPU_OFF:
+	case CPU_CPUPART_OUT:
+		/* offlines are called *before* the cpu is offlined. */
+		if (ncpus > 1)
+			ncpus--;
+		break;
+
+	case CPU_ON:
+	case CPU_CPUPART_IN:
+		break;
+
+	default:
+		return (0);		/* doesn't affect cpu count */
+	}
+
+	for (tq = list_head(&taskq_cpupct_list); tq != NULL;
+	    tq = list_next(&taskq_cpupct_list, tq)) {
+
+		mutex_enter(&tq->tq_lock);
+		/*
+		 * If the taskq is part of the cpuset which is changing,
+		 * update its nthreads_target.
+		 */
+		if (tq->tq_cpupart == cp->cp_id) {
+			taskq_update_nthreads(tq, ncpus);
+		}
+		mutex_exit(&tq->tq_lock);
+	}
+	return (0);
+}
+
+void
+taskq_mp_init(void)
+{
+	mutex_enter(&cpu_lock);
+	register_cpu_setup_func(taskq_cpu_setup, NULL);
+	/*
+	 * Make sure we're up to date.  At this point in boot, there is only
+	 * one processor set, so we only have to update the current CPU.
+	 */
+	(void) taskq_cpu_setup(CPU_ON, CPU->cpu_id, NULL);
+	mutex_exit(&cpu_lock);
+}
+#endif /* __APPLE__ */
+
+
+/*
+ * Create global system dynamic task queue.
+ */
+void
+system_taskq_init(void)
+{
+	system_taskq = taskq_create_common("system_taskq", 0,
+	    system_taskq_size * max_ncpus, minclsyspri, 4, 512, &p0, 0,
+	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
+	system_delay_taskq = taskq_create("system_delay_taskq", max_ncpus,
+	    minclsyspri, 0, 0, 0);
+}
+
+
+void
+system_taskq_fini(void)
+{
+	if (system_taskq)
+		taskq_destroy(system_delay_taskq);
+	if (system_taskq)
+		taskq_destroy(system_taskq);
+	system_taskq = NULL;
+}
+
+/*
+ * taskq_ent_alloc()
+ *
+ * Allocates a new taskq_ent_t structure either from the free list or from the
+ * cache. Returns NULL if it can't be allocated.
+ *
+ * Assumes: tq->tq_lock is held.
+ */
+static taskq_ent_t *
+taskq_ent_alloc(taskq_t *tq, int flags)
+{
+	int kmflags = (flags & TQ_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+	taskq_ent_t *tqe;
+	clock_t wait_time;
+	clock_t	wait_rv;
+
+	ASSERT(MUTEX_HELD(&tq->tq_lock));
+
+	/*
+	 * TQ_NOALLOC allocations are allowed to use the freelist, even if
+	 * we are below tq_minalloc.
+	 */
+again:	if ((tqe = tq->tq_freelist) != NULL &&
+	    ((flags & TQ_NOALLOC) || tq->tq_nalloc >= tq->tq_minalloc)) {
+		tq->tq_freelist = tqe->tqent_next;
+	} else {
+		if (flags & TQ_NOALLOC)
+			return (NULL);
+
+		if (tq->tq_nalloc >= tq->tq_maxalloc) {
+			if (kmflags & KM_NOSLEEP)
+				return (NULL);
+
+			/*
+			 * We don't want to exceed tq_maxalloc, but we can't
+			 * wait for other tasks to complete (and thus free up
+			 * task structures) without risking deadlock with
+			 * the caller.  So, we just delay for one second
+			 * to throttle the allocation rate. If we have tasks
+			 * complete before one second timeout expires then
+			 * taskq_ent_free will signal us and we will
+			 * immediately retry the allocation (reap free).
+			 */
+			wait_time = ddi_get_lbolt() + hz;
+			while (tq->tq_freelist == NULL) {
+				tq->tq_maxalloc_wait++;
+				wait_rv = cv_timedwait(&tq->tq_maxalloc_cv,
+				    &tq->tq_lock, wait_time);
+				tq->tq_maxalloc_wait--;
+				if (wait_rv == -1)
+					break;
+			}
+			if (tq->tq_freelist)
+				goto again;		/* reap freelist */
+
+		}
+		mutex_exit(&tq->tq_lock);
+
+		tqe = kmem_cache_alloc(taskq_ent_cache, kmflags);
+
+		mutex_enter(&tq->tq_lock);
+		if (tqe != NULL)
+			tq->tq_nalloc++;
+	}
+	return (tqe);
+}
+
+/*
+ * taskq_ent_free()
+ *
+ * Free taskq_ent_t structure by either putting it on the free list or freeing
+ * it to the cache.
+ *
+ * Assumes: tq->tq_lock is held.
+ */
+static void
+taskq_ent_free(taskq_t *tq, taskq_ent_t *tqe)
+{
+	ASSERT(MUTEX_HELD(&tq->tq_lock));
+
+	if (tq->tq_nalloc <= tq->tq_minalloc) {
+		tqe->tqent_next = tq->tq_freelist;
+		tq->tq_freelist = tqe;
+	} else {
+		tq->tq_nalloc--;
+		mutex_exit(&tq->tq_lock);
+		kmem_cache_free(taskq_ent_cache, tqe);
+		mutex_enter(&tq->tq_lock);
+	}
+
+	if (tq->tq_maxalloc_wait)
+		cv_signal(&tq->tq_maxalloc_cv);
+}
+
+/*
+ * taskq_ent_exists()
+ *
+ * Return 1 if taskq already has entry for calling 'func(arg)'.
+ *
+ * Assumes: tq->tq_lock is held.
+ */
+static int
+taskq_ent_exists(taskq_t *tq, task_func_t func, void *arg)
+{
+	taskq_ent_t	*tqe;
+
+	ASSERT(MUTEX_HELD(&tq->tq_lock));
+
+	for (tqe = tq->tq_task.tqent_next; tqe != &tq->tq_task;
+	    tqe = tqe->tqent_next)
+		if ((tqe->tqent_func == func) && (tqe->tqent_arg == arg))
+			return (1);
+	return (0);
+}
+
+/*
+ * Dispatch a task "func(arg)" to a free entry of bucket b.
+ *
+ * Assumes: no bucket locks is held.
+ *
+ * Returns: a pointer to an entry if dispatch was successful.
+ *	    NULL if there are no free entries or if the bucket is suspended.
+ */
+static taskq_ent_t *
+taskq_bucket_dispatch(taskq_bucket_t *b, task_func_t func, void *arg)
+{
+	taskq_ent_t *tqe;
+
+	ASSERT(MUTEX_NOT_HELD(&b->tqbucket_lock));
+	ASSERT(func != NULL);
+
+	mutex_enter(&b->tqbucket_lock);
+
+	ASSERT(b->tqbucket_nfree != 0 || IS_EMPTY(b->tqbucket_freelist));
+	ASSERT(b->tqbucket_nfree == 0 || !IS_EMPTY(b->tqbucket_freelist));
+
+	/*
+	 * Get en entry from the freelist if there is one.
+	 * Schedule task into the entry.
+	 */
+	if ((b->tqbucket_nfree != 0) &&
+	    !(b->tqbucket_flags & TQBUCKET_SUSPEND)) {
+		tqe = b->tqbucket_freelist.tqent_prev;
+
+		ASSERT(tqe != &b->tqbucket_freelist);
+		ASSERT(tqe->tqent_thread != NULL);
+
+		tqe->tqent_prev->tqent_next = tqe->tqent_next;
+		tqe->tqent_next->tqent_prev = tqe->tqent_prev;
+		b->tqbucket_nalloc++;
+		b->tqbucket_nfree--;
+		tqe->tqent_func = func;
+		tqe->tqent_arg = arg;
+		TQ_STAT(b, tqs_hits);
+		cv_signal(&tqe->tqent_cv);
+		DTRACE_PROBE2(taskq__d__enqueue, taskq_bucket_t *, b,
+		    taskq_ent_t *, tqe);
+	} else {
+		tqe = NULL;
+		TQ_STAT(b, tqs_misses);
+	}
+	mutex_exit(&b->tqbucket_lock);
+	return (tqe);
+}
+
+/*
+ * Dispatch a task.
+ *
+ * Assumes: func != NULL
+ *
+ * Returns: NULL if dispatch failed.
+ *	    non-NULL if task dispatched successfully.
+ *	    Actual return value is the pointer to taskq entry that was used to
+ *	    dispatch a task. This is useful for debugging.
+ */
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
+{
+	taskq_bucket_t *bucket = NULL;	/* Which bucket needs extension */
+	taskq_ent_t *tqe = NULL;
+	taskq_ent_t *tqe1;
+	uint_t bsize;
+
+	ASSERT(tq != NULL);
+	ASSERT(func != NULL);
+
+	if (!(tq->tq_flags & TASKQ_DYNAMIC)) {
+		/*
+		 * TQ_NOQUEUE flag can't be used with non-dynamic task queues.
+		 */
+		ASSERT(!(flags & TQ_NOQUEUE));
+		/*
+		 * Enqueue the task to the underlying queue.
+		 */
+		mutex_enter(&tq->tq_lock);
+
+		TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flags);
+
+		if ((tqe = taskq_ent_alloc(tq, flags)) == NULL) {
+			mutex_exit(&tq->tq_lock);
+			return (0);
+		}
+		/* Make sure we start without any flags */
+		tqe->tqent_un.tqent_flags = 0;
+
+		if (flags & TQ_FRONT) {
+			TQ_ENQUEUE_FRONT(tq, tqe, func, arg);
+		} else {
+			TQ_ENQUEUE(tq, tqe, func, arg);
+		}
+		mutex_exit(&tq->tq_lock);
+		return ((taskqid_t)tqe);
+	}
+
+	/*
+	 * Dynamic taskq dispatching.
+	 */
+	ASSERT(!(flags & (TQ_NOALLOC | TQ_FRONT)));
+	TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flags);
+
+	bsize = tq->tq_nbuckets;
+
+	if (bsize == 1) {
+		/*
+		 * In a single-CPU case there is only one bucket, so get
+		 * entry directly from there.
+		 */
+		if ((tqe = taskq_bucket_dispatch(tq->tq_buckets, func, arg))
+		    != NULL)
+			return ((taskqid_t)tqe);	/* Fastpath */
+		bucket = tq->tq_buckets;
+	} else {
+		int loopcount;
+		taskq_bucket_t *b;
+		// uintptr_t h = ((uintptr_t)CPU + (uintptr_t)arg) >> 3;
+		uintptr_t h = ((uintptr_t)(cpu_number()<<3) +
+		    (uintptr_t)arg) >> 3;
+
+		h = TQ_HASH(h);
+
+		/*
+		 * The 'bucket' points to the original bucket that we hit. If we
+		 * can't allocate from it, we search other buckets, but only
+		 * extend this one.
+		 */
+		b = &tq->tq_buckets[h & (bsize - 1)];
+		ASSERT(b->tqbucket_taskq == tq);	/* Sanity check */
+
+		/*
+		 * Do a quick check before grabbing the lock. If the bucket does
+		 * not have free entries now, chances are very small that it
+		 * will after we take the lock, so we just skip it.
+		 */
+		if (b->tqbucket_nfree != 0) {
+			if ((tqe = taskq_bucket_dispatch(b, func, arg)) != NULL)
+				return ((taskqid_t)tqe);	/* Fastpath */
+		} else {
+			TQ_STAT(b, tqs_misses);
+		}
+
+		bucket = b;
+		loopcount = MIN(taskq_search_depth, bsize);
+		/*
+		 * If bucket dispatch failed, search loopcount number of buckets
+		 * before we give up and fail.
+		 */
+		do {
+			b = &tq->tq_buckets[++h & (bsize - 1)];
+			ASSERT(b->tqbucket_taskq == tq);  /* Sanity check */
+			loopcount--;
+
+			if (b->tqbucket_nfree != 0) {
+				tqe = taskq_bucket_dispatch(b, func, arg);
+			} else {
+				TQ_STAT(b, tqs_misses);
+			}
+		} while ((tqe == NULL) && (loopcount > 0));
+	}
+
+	/*
+	 * At this point we either scheduled a task and (tqe != NULL) or failed
+	 * (tqe == NULL). Try to recover from fails.
+	 */
+
+	/*
+	 * For KM_SLEEP dispatches, try to extend the bucket and retry dispatch.
+	 */
+	if ((tqe == NULL) && !(flags & TQ_NOSLEEP)) {
+		/*
+		 * taskq_bucket_extend() may fail to do anything, but this is
+		 * fine - we deal with it later. If the bucket was successfully
+		 * extended, there is a good chance that taskq_bucket_dispatch()
+		 * will get this new entry, unless someone is racing with us and
+		 * stealing the new entry from under our nose.
+		 * taskq_bucket_extend() may sleep.
+		 */
+		taskq_bucket_extend(bucket);
+		TQ_STAT(bucket, tqs_disptcreates);
+		if ((tqe = taskq_bucket_dispatch(bucket, func, arg)) != NULL)
+			return ((taskqid_t)tqe);
+	}
+
+	ASSERT(bucket != NULL);
+
+	/*
+	 * Since there are not enough free entries in the bucket, add a
+	 * taskq entry to extend it in the background using backing queue
+	 * (unless we already have a taskq entry to perform that extension).
+	 */
+	mutex_enter(&tq->tq_lock);
+	if (!taskq_ent_exists(tq, taskq_bucket_extend, bucket)) {
+		if ((tqe1 = taskq_ent_alloc(tq, TQ_NOSLEEP)) != NULL) {
+			TQ_ENQUEUE_FRONT(tq, tqe1, taskq_bucket_extend, bucket);
+		} else {
+			TQ_STAT(bucket, tqs_nomem);
+		}
+	}
+
+	/*
+	 * Dispatch failed and we can't find an entry to schedule a task.
+	 * Revert to the backing queue unless TQ_NOQUEUE was asked.
+	 */
+	if ((tqe == NULL) && !(flags & TQ_NOQUEUE)) {
+		if ((tqe = taskq_ent_alloc(tq, flags)) != NULL) {
+			TQ_ENQUEUE(tq, tqe, func, arg);
+		} else {
+			TQ_STAT(bucket, tqs_nomem);
+		}
+	}
+	mutex_exit(&tq->tq_lock);
+
+	return ((taskqid_t)tqe);
+}
+
+/*
+ * FIXME, Linux has added the ability to start taskq with a given
+ * delay.
+ */
+taskqid_t
+taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
+    uint_t flags, clock_t expire_time)
+{
+	clock_t timo;
+
+	/* If it has already expired, just dispatch */
+	timo = expire_time - ddi_get_lbolt();
+	if (timo <= 0)
+		return (taskq_dispatch(tq, func, arg, flags));
+
+	/* Insert delayed code here: */
+	return (0);
+}
+
+void
+taskq_init_ent(taskq_ent_t *t)
+{
+	memset(t, 0, sizeof (*t));
+}
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+    taskq_ent_t *tqe)
+{
+	ASSERT(func != NULL);
+	ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
+
+	/*
+	 * Mark it as a prealloc'd task.  This is important
+	 * to ensure that we don't free it later.
+	 */
+	tqe->tqent_un.tqent_flags |= TQENT_FLAG_PREALLOC;
+	/*
+	 * Enqueue the task to the underlying queue.
+	 */
+	mutex_enter(&tq->tq_lock);
+
+	if (flags & TQ_FRONT) {
+		TQ_ENQUEUE_FRONT(tq, tqe, func, arg);
+	} else {
+		TQ_ENQUEUE(tq, tqe, func, arg);
+	}
+	mutex_exit(&tq->tq_lock);
+}
+
+/*
+ * Allow our caller to ask if there are tasks pending on the queue.
+ */
+boolean_t
+taskq_empty(taskq_t *tq)
+{
+	boolean_t rv;
+
+	mutex_enter(&tq->tq_lock);
+	rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0);
+	mutex_exit(&tq->tq_lock);
+
+	return (rv);
+}
+
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+	return (IS_EMPTY(*t));
+}
+
+/*
+ * Wait for all pending tasks to complete.
+ * Calling taskq_wait from a task will cause deadlock.
+ */
+void
+taskq_wait(taskq_t *tq)
+{
+#ifndef __APPLE__
+	ASSERT(tq != curthread->t_taskq);
+#endif
+
+	if (tq == NULL)
+		return;
+
+	mutex_enter(&tq->tq_lock);
+	while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+	mutex_exit(&tq->tq_lock);
+
+	if (tq->tq_flags & TASKQ_DYNAMIC) {
+		taskq_bucket_t *b = tq->tq_buckets;
+		int bid = 0;
+		for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
+			mutex_enter(&b->tqbucket_lock);
+			while (b->tqbucket_nalloc > 0)
+				cv_wait(&b->tqbucket_cv, &b->tqbucket_lock);
+			mutex_exit(&b->tqbucket_lock);
+		}
+	}
+}
+
+/*
+ * ZOL implements taskq_wait_id() that can wait for a specific
+ * taskq to finish, rather than all active taskqs. Until it is
+ * implemented, we wait for all to complete.
+ */
+void
+taskq_wait_id(taskq_t *tq, taskqid_t id)
+{
+	return (taskq_wait(tq));
+}
+
+/*
+ * Suspend execution of tasks.
+ *
+ * Tasks in the queue part will be suspended immediately upon return from this
+ * function. Pending tasks in the dynamic part will continue to execute, but all
+ * new tasks will  be suspended.
+ */
+void
+taskq_suspend(taskq_t *tq)
+{
+	rw_enter(&tq->tq_threadlock, RW_WRITER);
+
+	if (tq->tq_flags & TASKQ_DYNAMIC) {
+		taskq_bucket_t *b = tq->tq_buckets;
+		int bid = 0;
+		for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
+			mutex_enter(&b->tqbucket_lock);
+			b->tqbucket_flags |= TQBUCKET_SUSPEND;
+			mutex_exit(&b->tqbucket_lock);
+		}
+	}
+	/*
+	 * Mark task queue as being suspended. Needed for taskq_suspended().
+	 */
+	mutex_enter(&tq->tq_lock);
+	ASSERT(!(tq->tq_flags & TASKQ_SUSPENDED));
+	tq->tq_flags |= TASKQ_SUSPENDED;
+	mutex_exit(&tq->tq_lock);
+}
+
+/*
+ * returns: 1 if tq is suspended, 0 otherwise.
+ */
+int
+taskq_suspended(taskq_t *tq)
+{
+	return ((tq->tq_flags & TASKQ_SUSPENDED) != 0);
+}
+
+/*
+ * Resume taskq execution.
+ */
+void
+taskq_resume(taskq_t *tq)
+{
+	ASSERT(RW_WRITE_HELD(&tq->tq_threadlock));
+
+	if (tq->tq_flags & TASKQ_DYNAMIC) {
+		taskq_bucket_t *b = tq->tq_buckets;
+		int bid = 0;
+		for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
+			mutex_enter(&b->tqbucket_lock);
+			b->tqbucket_flags &= ~TQBUCKET_SUSPEND;
+			mutex_exit(&b->tqbucket_lock);
+		}
+	}
+	mutex_enter(&tq->tq_lock);
+	ASSERT(tq->tq_flags & TASKQ_SUSPENDED);
+	tq->tq_flags &= ~TASKQ_SUSPENDED;
+	mutex_exit(&tq->tq_lock);
+
+	rw_exit(&tq->tq_threadlock);
+}
+
+int
+taskq_member(taskq_t *tq, kthread_t *thread)
+{
+	return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, thread));
+}
+
+taskq_t *
+taskq_of_curthread(void)
+{
+	return (tsd_get(taskq_tsd));
+}
+
+/*
+ * Cancel an already dispatched task given the task id.  Still pending tasks
+ * will be immediately canceled, and if the task is active the function will
+ * block until it completes.  Preallocated tasks which are canceled must be
+ * freed by the caller.
+ */
+int
+taskq_cancel_id(taskq_t *tq, taskqid_t id)
+{
+	// taskq_t *task = (taskq_t *) id;
+
+	/* So we want to tell task to stop, and wait until it does */
+	if (!EMPTY_TASKQ(tq))
+		taskq_wait(tq);
+
+	return (0);
+}
+
+/*
+ * Creates a thread in the taskq.  We only allow one outstanding create at
+ * a time.  We drop and reacquire the tq_lock in order to avoid blocking other
+ * taskq activity while thread_create() or lwp_kernel_create() run.
+ *
+ * The first time we're called, we do some additional setup, and do not
+ * return until there are enough threads to start servicing requests.
+ */
+static void
+taskq_thread_create(taskq_t *tq)
+{
+	kthread_t	*t;
+	const boolean_t	first = (tq->tq_nthreads == 0);
+
+	ASSERT(MUTEX_HELD(&tq->tq_lock));
+	ASSERT(tq->tq_flags & TASKQ_CHANGING);
+	ASSERT(tq->tq_nthreads < tq->tq_nthreads_target);
+	ASSERT(!(tq->tq_flags & TASKQ_THREAD_CREATED));
+
+	tq->tq_flags |= TASKQ_THREAD_CREATED;
+	tq->tq_active++;
+	mutex_exit(&tq->tq_lock);
+
+	/*
+	 * With TASKQ_DUTY_CYCLE the new thread must have an LWP
+	 * as explained in ../disp/sysdc.c (for the msacct data).
+	 * Otherwise simple kthreads are preferred.
+	 */
+	if ((tq->tq_flags & TASKQ_DUTY_CYCLE) != 0) {
+		/* Enforced in taskq_create_common */
+		printf("SPL: taskq_thread_create(TASKQ_DUTY_CYCLE) seen\n");
+#ifndef __APPLE__
+		ASSERT3P(tq->tq_proc, !=, &p0);
+		t = lwp_kernel_create(tq->tq_proc, taskq_thread, tq, TS_RUN,
+		    tq->tq_pri);
+#endif
+	} else {
+		t = thread_create(NULL, 0, taskq_thread, tq, 0, tq->tq_proc,
+		    TS_RUN, tq->tq_pri);
+	}
+
+	if (!first) {
+		mutex_enter(&tq->tq_lock);
+		return;
+	}
+
+	/*
+	 * We know the thread cannot go away, since tq cannot be
+	 * destroyed until creation has completed.  We can therefore
+	 * safely dereference t.
+	 */
+	if (tq->tq_flags & TASKQ_THREADS_CPU_PCT) {
+#ifdef __APPLE__
+		mutex_enter(&tq->tq_lock);
+		taskq_update_nthreads(tq, max_ncpus);
+		mutex_exit(&tq->tq_lock);
+#else
+		taskq_cpupct_install(tq, t->t_cpupart);
+#endif
+	}
+	mutex_enter(&tq->tq_lock);
+
+	/* Wait until we can service requests. */
+	while (tq->tq_nthreads != tq->tq_nthreads_target &&
+	    tq->tq_nthreads < TASKQ_CREATE_ACTIVE_THREADS) {
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+	}
+}
+
+/*
+ * Common "sleep taskq thread" function, which handles CPR stuff, as well
+ * as giving a nice common point for debuggers to find inactive threads.
+ */
+static clock_t
+taskq_thread_wait(taskq_t *tq, kmutex_t *mx, kcondvar_t *cv,
+    callb_cpr_t *cprinfo, clock_t timeout)
+{
+	clock_t ret = 0;
+
+	if (!(tq->tq_flags & TASKQ_CPR_SAFE)) {
+		CALLB_CPR_SAFE_BEGIN(cprinfo);
+	}
+	if ((signed long)timeout < 0)
+		cv_wait(cv, mx);
+	else
+		ret = cv_reltimedwait(cv, mx, timeout, TR_CLOCK_TICK);
+
+	if (!(tq->tq_flags & TASKQ_CPR_SAFE)) {
+		CALLB_CPR_SAFE_END(cprinfo, mx);
+	}
+
+	return (ret);
+}
+
+/*
+ * Worker thread for processing task queue.
+ */
+static void
+taskq_thread(void *arg)
+{
+	int thread_id;
+
+	taskq_t *tq = arg;
+	taskq_ent_t *tqe;
+	callb_cpr_t cprinfo;
+	hrtime_t start, end;
+	boolean_t freeit;
+
+	CALLB_CPR_INIT(&cprinfo, &tq->tq_lock, callb_generic_cpr,
+	    tq->tq_name);
+
+	tsd_set(taskq_tsd, tq);
+	mutex_enter(&tq->tq_lock);
+	thread_id = ++tq->tq_nthreads;
+	ASSERT(tq->tq_flags & TASKQ_THREAD_CREATED);
+	ASSERT(tq->tq_flags & TASKQ_CHANGING);
+	tq->tq_flags &= ~TASKQ_THREAD_CREATED;
+
+	VERIFY3S(thread_id, <=, tq->tq_nthreads_max);
+
+	if (tq->tq_nthreads_max == 1)
+		tq->tq_thread = (kthread_t *)curthread;
+	else
+		tq->tq_threadlist[thread_id - 1] = (kthread_t *)curthread;
+
+	/* Allow taskq_create_common()'s taskq_thread_create() to return. */
+	if (tq->tq_nthreads == TASKQ_CREATE_ACTIVE_THREADS)
+		cv_broadcast(&tq->tq_wait_cv);
+
+	for (;;) {
+		if (tq->tq_flags & TASKQ_CHANGING) {
+			/* See if we're no longer needed */
+			if (thread_id > tq->tq_nthreads_target) {
+				/*
+				 * To preserve the one-to-one mapping between
+				 * thread_id and thread, we must exit from
+				 * highest thread ID to least.
+				 *
+				 * However, if everyone is exiting, the order
+				 * doesn't matter, so just exit immediately.
+				 * (this is safe, since you must wait for
+				 * nthreads to reach 0 after setting
+				 * tq_nthreads_target to 0)
+				 */
+				if (thread_id == tq->tq_nthreads ||
+				    tq->tq_nthreads_target == 0)
+					break;
+
+				/* Wait for higher thread_ids to exit */
+				(void) taskq_thread_wait(tq, &tq->tq_lock,
+				    &tq->tq_exit_cv, &cprinfo, -1);
+				continue;
+			}
+
+			/*
+			 * If no thread is starting taskq_thread(), we can
+			 * do some bookkeeping.
+			 */
+			if (!(tq->tq_flags & TASKQ_THREAD_CREATED)) {
+				/* Check if we've reached our target */
+				if (tq->tq_nthreads == tq->tq_nthreads_target) {
+					tq->tq_flags &= ~TASKQ_CHANGING;
+					cv_broadcast(&tq->tq_wait_cv);
+				}
+				/* Check if we need to create a thread */
+				if (tq->tq_nthreads < tq->tq_nthreads_target) {
+					taskq_thread_create(tq);
+					continue; /* tq_lock was dropped */
+				}
+			}
+		}
+		if ((tqe = tq->tq_task.tqent_next) == &tq->tq_task) {
+			if (--tq->tq_active == 0)
+				cv_broadcast(&tq->tq_wait_cv);
+			(void) taskq_thread_wait(tq, &tq->tq_lock,
+			    &tq->tq_dispatch_cv, &cprinfo, -1);
+			tq->tq_active++;
+			continue;
+		}
+
+		tqe->tqent_prev->tqent_next = tqe->tqent_next;
+		tqe->tqent_next->tqent_prev = tqe->tqent_prev;
+		mutex_exit(&tq->tq_lock);
+
+		/*
+		 * For prealloc'd tasks, we don't free anything.  We
+		 * have to check this now, because once we call the
+		 * function for a prealloc'd taskq, we can't touch the
+		 * tqent any longer (calling the function returns the
+		 * ownershp of the tqent back to caller of
+		 * taskq_dispatch.)
+		 */
+		if ((!(tq->tq_flags & TASKQ_DYNAMIC)) &&
+		    (tqe->tqent_un.tqent_flags & TQENT_FLAG_PREALLOC)) {
+			/* clear pointers to assist assertion checks */
+			tqe->tqent_next = tqe->tqent_prev = NULL;
+			freeit = B_FALSE;
+		} else {
+			freeit = B_TRUE;
+		}
+
+		rw_enter(&tq->tq_threadlock, RW_READER);
+		start = gethrtime();
+		DTRACE_PROBE2(taskq__exec__start, taskq_t *, tq,
+		    taskq_ent_t *, tqe);
+		tqe->tqent_func(tqe->tqent_arg);
+		DTRACE_PROBE2(taskq__exec__end, taskq_t *, tq,
+		    taskq_ent_t *, tqe);
+		end = gethrtime();
+		rw_exit(&tq->tq_threadlock);
+
+		mutex_enter(&tq->tq_lock);
+		tq->tq_totaltime += end - start;
+		tq->tq_executed++;
+
+		if (freeit)
+			taskq_ent_free(tq, tqe);
+	}
+
+	if (tq->tq_nthreads_max == 1)
+		tq->tq_thread = NULL;
+	else
+		tq->tq_threadlist[thread_id - 1] = NULL;
+
+	/* We're exiting, and therefore no longer active */
+	ASSERT(tq->tq_active > 0);
+	tq->tq_active--;
+
+	ASSERT(tq->tq_nthreads > 0);
+	tq->tq_nthreads--;
+
+	/* Wake up anyone waiting for us to exit */
+	cv_broadcast(&tq->tq_exit_cv);
+	if (tq->tq_nthreads == tq->tq_nthreads_target) {
+		if (!(tq->tq_flags & TASKQ_THREAD_CREATED))
+			tq->tq_flags &= ~TASKQ_CHANGING;
+
+		cv_broadcast(&tq->tq_wait_cv);
+	}
+
+	tsd_set(taskq_tsd, NULL);
+
+	CALLB_CPR_EXIT(&cprinfo);
+	thread_exit();
+
+}
+
+/*
+ * Worker per-entry thread for dynamic dispatches.
+ */
+static void
+taskq_d_thread(taskq_ent_t *tqe)
+{
+	taskq_bucket_t	*bucket = tqe->tqent_un.tqent_bucket;
+	taskq_t		*tq = bucket->tqbucket_taskq;
+	kmutex_t	*lock = &bucket->tqbucket_lock;
+	kcondvar_t	*cv = &tqe->tqent_cv;
+	callb_cpr_t	cprinfo;
+	clock_t		w;
+
+	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, tq->tq_name);
+
+#ifdef __APPLE__
+	/*
+	 * There's no way in Mac OS X KPI to create a thread
+	 * in a suspended state (TS_STOPPED). So instead we
+	 * use tqent_thread as a flag and wait for it to get
+	 * initialized.
+	 */
+	mutex_enter(&tqe->tqent_thread_lock);
+	while (tqe->tqent_thread == (kthread_t *)0xCEDEC0DE)
+		cv_wait(&tqe->tqent_thread_cv, &tqe->tqent_thread_lock);
+	mutex_exit(&tqe->tqent_thread_lock);
+#endif
+
+	mutex_enter(lock);
+
+	for (;;) {
+		/*
+		 * If a task is scheduled (func != NULL), execute it, otherwise
+		 * sleep, waiting for a job.
+		 */
+		if (tqe->tqent_func != NULL) {
+			hrtime_t	start;
+			hrtime_t	end;
+
+			ASSERT(bucket->tqbucket_nalloc > 0);
+
+			/*
+			 * It is possible to free the entry right away before
+			 * actually executing the task so that subsequent
+			 * dispatches may immediately reuse it. But this,
+			 * effectively, creates a two-length queue in the entry
+			 * and may lead to a deadlock if the execution of the
+			 * current task depends on the execution of the next
+			 * scheduled task. So, we keep the entry busy until the
+			 * task is processed.
+			 */
+
+			mutex_exit(lock);
+			start = gethrtime();
+			DTRACE_PROBE3(taskq__d__exec__start, taskq_t *, tq,
+			    taskq_bucket_t *, bucket, taskq_ent_t *, tqe);
+			tqe->tqent_func(tqe->tqent_arg);
+			DTRACE_PROBE3(taskq__d__exec__end, taskq_t *, tq,
+			    taskq_bucket_t *, bucket, taskq_ent_t *, tqe);
+			end = gethrtime();
+			mutex_enter(lock);
+			bucket->tqbucket_totaltime += end - start;
+
+			/*
+			 * Return the entry to the bucket free list.
+			 */
+			tqe->tqent_func = NULL;
+			TQ_APPEND(bucket->tqbucket_freelist, tqe);
+			bucket->tqbucket_nalloc--;
+			bucket->tqbucket_nfree++;
+			ASSERT(!IS_EMPTY(bucket->tqbucket_freelist));
+			/*
+			 * taskq_wait() waits for nalloc to drop to zero on
+			 * tqbucket_cv.
+			 */
+			cv_signal(&bucket->tqbucket_cv);
+		}
+
+		/*
+		 * At this point the entry must be in the bucket free list -
+		 * either because it was there initially or because it just
+		 * finished executing a task and put itself on the free list.
+		 */
+		ASSERT(bucket->tqbucket_nfree > 0);
+		/*
+		 * Go to sleep unless we are closing.
+		 * If a thread is sleeping too long, it dies.
+		 */
+		if (! (bucket->tqbucket_flags & TQBUCKET_CLOSE)) {
+			w = taskq_thread_wait(tq, lock, cv,
+			    &cprinfo, taskq_thread_timeout * hz);
+		}
+
+		/*
+		 * At this point we may be in two different states:
+		 *
+		 * (1) tqent_func is set which means that a new task is
+		 *	dispatched and we need to execute it.
+		 *
+		 * (2) Thread is sleeping for too long or we are closing. In
+		 *	both cases destroy the thread and the entry.
+		 */
+
+		/* If func is NULL we should be on the freelist. */
+		ASSERT((tqe->tqent_func != NULL) ||
+		    (bucket->tqbucket_nfree > 0));
+		/* If func is non-NULL we should be allocated */
+		ASSERT((tqe->tqent_func == NULL) ||
+		    (bucket->tqbucket_nalloc > 0));
+
+		/* Check freelist consistency */
+		ASSERT((bucket->tqbucket_nfree > 0) ||
+		    IS_EMPTY(bucket->tqbucket_freelist));
+		ASSERT((bucket->tqbucket_nfree == 0) ||
+		    !IS_EMPTY(bucket->tqbucket_freelist));
+
+		if ((tqe->tqent_func == NULL) &&
+		    ((w == -1) || (bucket->tqbucket_flags & TQBUCKET_CLOSE))) {
+			/*
+			 * This thread is sleeping for too long or we are
+			 * closing - time to die.
+			 * Thread creation/destruction happens rarely,
+			 * so grabbing the lock is not a big performance issue.
+			 * The bucket lock is dropped by CALLB_CPR_EXIT().
+			 */
+
+			/* Remove the entry from the free list. */
+			tqe->tqent_prev->tqent_next = tqe->tqent_next;
+			tqe->tqent_next->tqent_prev = tqe->tqent_prev;
+			ASSERT(bucket->tqbucket_nfree > 0);
+			bucket->tqbucket_nfree--;
+
+			TQ_STAT(bucket, tqs_tdeaths);
+			cv_signal(&bucket->tqbucket_cv);
+			tqe->tqent_thread = NULL;
+			mutex_enter(&tq->tq_lock);
+			tq->tq_tdeaths++;
+			mutex_exit(&tq->tq_lock);
+			CALLB_CPR_EXIT(&cprinfo);
+			kmem_cache_free(taskq_ent_cache, tqe);
+			thread_exit();
+		}
+	}
+}
+
+
+/*
+ * Taskq creation. May sleep for memory.
+ * Always use automatically generated instances to avoid kstat name space
+ * collisions.
+ */
+
+taskq_t *
+taskq_create(const char *name, int nthreads, pri_t pri, int minalloc,
+    int maxalloc, uint_t flags)
+{
+	ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
+
+	return (taskq_create_common(name, 0, nthreads, pri, minalloc,
+	    maxalloc, &p0, 0, flags | TASKQ_NOINSTANCE));
+}
+
+/*
+ * Create an instance of task queue. It is legal to create task queues with the
+ * same name and different instances.
+ *
+ * taskq_create_instance is used by ddi_taskq_create() where it gets the
+ * instance from ddi_get_instance(). In some cases the instance is not
+ * initialized and is set to -1. This case is handled as if no instance was
+ * passed at all.
+ */
+taskq_t *
+taskq_create_instance(const char *name, int instance, int nthreads, pri_t pri,
+    int minalloc, int maxalloc, uint_t flags)
+{
+	ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
+	ASSERT((instance >= 0) || (instance == -1));
+
+	if (instance < 0) {
+		flags |= TASKQ_NOINSTANCE;
+	}
+
+	return (taskq_create_common(name, instance, nthreads,
+	    pri, minalloc, maxalloc, &p0, 0, flags));
+}
+
+taskq_t *
+taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc,
+    int maxalloc, proc_t *proc, uint_t flags)
+{
+	ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
+#ifndef __APPLE__
+	ASSERT(proc->p_flag & SSYS);
+#endif
+	return (taskq_create_common(name, 0, nthreads, pri, minalloc,
+	    maxalloc, proc, 0, flags | TASKQ_NOINSTANCE));
+}
+
+taskq_t *
+taskq_create_sysdc(const char *name, int nthreads, int minalloc,
+    int maxalloc, proc_t *proc, uint_t dc, uint_t flags)
+{
+	ASSERT((flags & ~TASKQ_INTERFACE_FLAGS) == 0);
+#ifndef __APPLE__
+	ASSERT(proc->p_flag & SSYS);
+#endif
+	return (taskq_create_common(name, 0, nthreads, minclsyspri, minalloc,
+	    maxalloc, proc, dc, flags | TASKQ_NOINSTANCE | TASKQ_DUTY_CYCLE));
+}
+
+static taskq_t *
+taskq_create_common(const char *name, int instance, int nthreads, pri_t pri,
+    int minalloc, int maxalloc, proc_t *proc, uint_t dc, uint_t flags)
+{
+	taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_SLEEP);
+#ifdef __APPLE__
+	uint_t ncpus = max_ncpus;
+#else
+	uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus);
+#endif
+	uint_t bsize;	/* # of buckets - always power of 2 */
+	int max_nthreads;
+
+	/*
+	 * We are not allowed to use TASKQ_DYNAMIC with taskq_dispatch_ent()
+	 * but that is done by spa.c - so we will simply mask DYNAMIC out.
+	 */
+	flags &= ~TASKQ_DYNAMIC;
+
+	/*
+	 * TASKQ_DYNAMIC, TASKQ_CPR_SAFE and TASKQ_THREADS_CPU_PCT are all
+	 * mutually incompatible.
+	 */
+	IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_CPR_SAFE));
+	IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_THREADS_CPU_PCT));
+	IMPLY((flags & TASKQ_CPR_SAFE), !(flags & TASKQ_THREADS_CPU_PCT));
+
+	/* Cannot have DYNAMIC with DUTY_CYCLE */
+	IMPLY((flags & TASKQ_DYNAMIC), !(flags & TASKQ_DUTY_CYCLE));
+
+	/* Cannot have DUTY_CYCLE with a p0 kernel process */
+	IMPLY((flags & TASKQ_DUTY_CYCLE), proc != &p0);
+
+	/* Cannot have DC_BATCH without DUTY_CYCLE */
+	ASSERT((flags & (TASKQ_DUTY_CYCLE|TASKQ_DC_BATCH)) != TASKQ_DC_BATCH);
+
+	ASSERT(proc != NULL);
+
+	bsize = 1 << (highbit(ncpus) - 1);
+	ASSERT(bsize >= 1);
+	bsize = MIN(bsize, taskq_maxbuckets);
+
+	if (flags & TASKQ_DYNAMIC) {
+		ASSERT3S(nthreads, >=, 1);
+		tq->tq_maxsize = nthreads;
+
+		/* For dynamic task queues use just one backup thread */
+		nthreads = max_nthreads = 1;
+
+	} else if (flags & TASKQ_THREADS_CPU_PCT) {
+		uint_t pct;
+		ASSERT3S(nthreads, >=, 0);
+		pct = nthreads;
+
+		if (pct > taskq_cpupct_max_percent)
+			pct = taskq_cpupct_max_percent;
+
+		/*
+		 * If you're using THREADS_CPU_PCT, the process for the
+		 * taskq threads must be curproc.  This allows any pset
+		 * binding to be inherited correctly.  If proc is &p0,
+		 * we won't be creating LWPs, so new threads will be assigned
+		 * to the default processor set.
+		 */
+		/* ASSERT(curproc == proc || proc == &p0); */
+		tq->tq_threads_ncpus_pct = pct;
+		nthreads = 1;		/* corrected in taskq_thread_create() */
+		max_nthreads = TASKQ_THREADS_PCT(max_ncpus, pct);
+
+	} else {
+		ASSERT3S(nthreads, >=, 1);
+		max_nthreads = nthreads;
+	}
+
+	if (max_nthreads < taskq_minimum_nthreads_max)
+		max_nthreads = taskq_minimum_nthreads_max;
+
+	/*
+	 * Make sure the name is 0-terminated, and conforms to the rules for
+	 * C indentifiers
+	 */
+	(void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
+	strident_canon(tq->tq_name, TASKQ_NAMELEN + 1);
+
+	tq->tq_flags = flags | TASKQ_CHANGING;
+	tq->tq_active = 0;
+	tq->tq_instance = instance;
+	tq->tq_nthreads_target = nthreads;
+	tq->tq_nthreads_max = max_nthreads;
+	tq->tq_minalloc = minalloc;
+	tq->tq_maxalloc = maxalloc;
+	tq->tq_nbuckets = bsize;
+	tq->tq_proc = proc;
+	tq->tq_pri = pri;
+	tq->tq_DC = dc;
+	list_link_init(&tq->tq_cpupct_link);
+
+	if (max_nthreads > 1)
+		tq->tq_threadlist = kmem_alloc(
+		    sizeof (kthread_t *) * max_nthreads, KM_SLEEP);
+
+	mutex_enter(&tq->tq_lock);
+	if (flags & TASKQ_PREPOPULATE) {
+		while (minalloc-- > 0)
+			taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
+	}
+
+	/*
+	 * Before we start creating threads for this taskq, take a
+	 * zone hold so the zone can't go away before taskq_destroy
+	 * makes sure all the taskq threads are gone.  This hold is
+	 * similar in purpose to those taken by zthread_create().
+	 */
+#ifndef __APPLE__
+	zone_hold(tq->tq_proc->p_zone);
+#endif
+	/*
+	 * Create the first thread, which will create any other threads
+	 * necessary.  taskq_thread_create will not return until we have
+	 * enough threads to be able to process requests.
+	 */
+	taskq_thread_create(tq);
+	mutex_exit(&tq->tq_lock);
+
+	if (flags & TASKQ_DYNAMIC) {
+		taskq_bucket_t *bucket = kmem_zalloc(sizeof (taskq_bucket_t) *
+		    bsize, KM_SLEEP);
+		int b_id;
+
+		tq->tq_buckets = bucket;
+
+		/* Initialize each bucket */
+		for (b_id = 0; b_id < bsize; b_id++, bucket++) {
+			mutex_init(&bucket->tqbucket_lock, NULL, MUTEX_DEFAULT,
+			    NULL);
+			cv_init(&bucket->tqbucket_cv, NULL, CV_DEFAULT, NULL);
+			bucket->tqbucket_taskq = tq;
+			bucket->tqbucket_freelist.tqent_next =
+			    bucket->tqbucket_freelist.tqent_prev =
+			    &bucket->tqbucket_freelist;
+			if (flags & TASKQ_PREPOPULATE)
+				taskq_bucket_extend(bucket);
+		}
+	}
+
+	/*
+	 * Install kstats.
+	 * We have two cases:
+	 *   1) Instance is provided to taskq_create_instance(). In this case it
+	 *	should be >= 0 and we use it.
+	 *
+	 *   2) Instance is not provided and is automatically generated
+	 */
+	if (flags & TASKQ_NOINSTANCE) {
+		instance = tq->tq_instance =
+		    (int)(uintptr_t)vmem_alloc(taskq_id_arena, 1, VM_SLEEP);
+	}
+
+	if (flags & TASKQ_DYNAMIC) {
+		if ((tq->tq_kstat = kstat_create("unix", instance,
+		    tq->tq_name, "taskq_d", KSTAT_TYPE_NAMED,
+		    sizeof (taskq_d_kstat) / sizeof (kstat_named_t),
+		    KSTAT_FLAG_VIRTUAL)) != NULL) {
+			tq->tq_kstat->ks_lock = &taskq_d_kstat_lock;
+			tq->tq_kstat->ks_data = &taskq_d_kstat;
+			tq->tq_kstat->ks_update = taskq_d_kstat_update;
+			tq->tq_kstat->ks_private = tq;
+			kstat_install(tq->tq_kstat);
+		}
+	} else {
+		if ((tq->tq_kstat = kstat_create("unix", instance, tq->tq_name,
+		    "taskq", KSTAT_TYPE_NAMED,
+		    sizeof (taskq_kstat) / sizeof (kstat_named_t),
+		    KSTAT_FLAG_VIRTUAL)) != NULL) {
+			tq->tq_kstat->ks_lock = &taskq_kstat_lock;
+			tq->tq_kstat->ks_data = &taskq_kstat;
+			tq->tq_kstat->ks_update = taskq_kstat_update;
+			tq->tq_kstat->ks_private = tq;
+			kstat_install(tq->tq_kstat);
+		}
+	}
+
+	return (tq);
+}
+
+/*
+ * taskq_destroy().
+ *
+ * Assumes: by the time taskq_destroy is called no one will use this task queue
+ * in any way and no one will try to dispatch entries in it.
+ */
+void
+taskq_destroy(taskq_t *tq)
+{
+	taskq_bucket_t *b = tq->tq_buckets;
+	int bid = 0;
+
+	ASSERT(! (tq->tq_flags & TASKQ_CPR_SAFE));
+
+	/*
+	 * Destroy kstats.
+	 */
+	if (tq->tq_kstat != NULL) {
+		kstat_delete(tq->tq_kstat);
+		tq->tq_kstat = NULL;
+	}
+
+	/*
+	 * Destroy instance if needed.
+	 */
+	if (tq->tq_flags & TASKQ_NOINSTANCE) {
+		vmem_free(taskq_id_arena, (void *)(uintptr_t)(tq->tq_instance),
+		    1);
+		tq->tq_instance = 0;
+	}
+
+	/*
+	 * Unregister from the cpupct list.
+	 */
+#ifndef __APPLE__
+	if (tq->tq_flags & TASKQ_THREADS_CPU_PCT) {
+		taskq_cpupct_remove(tq);
+	}
+#endif
+
+	/*
+	 * Wait for any pending entries to complete.
+	 */
+	taskq_wait(tq);
+
+	mutex_enter(&tq->tq_lock);
+	ASSERT((tq->tq_task.tqent_next == &tq->tq_task) &&
+	    (tq->tq_active == 0));
+
+	/* notify all the threads that they need to exit */
+	tq->tq_nthreads_target = 0;
+
+	tq->tq_flags |= TASKQ_CHANGING;
+	cv_broadcast(&tq->tq_dispatch_cv);
+	cv_broadcast(&tq->tq_exit_cv);
+
+	while (tq->tq_nthreads != 0)
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+
+	if (tq->tq_nthreads_max != 1)
+		kmem_free(tq->tq_threadlist, sizeof (kthread_t *) *
+		    tq->tq_nthreads_max);
+
+	tq->tq_minalloc = 0;
+	while (tq->tq_nalloc != 0)
+		taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
+
+	mutex_exit(&tq->tq_lock);
+
+	/*
+	 * Mark each bucket as closing and wakeup all sleeping threads.
+	 */
+	for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
+		taskq_ent_t *tqe;
+
+		mutex_enter(&b->tqbucket_lock);
+
+		b->tqbucket_flags |= TQBUCKET_CLOSE;
+		/* Wakeup all sleeping threads */
+
+		for (tqe = b->tqbucket_freelist.tqent_next;
+		    tqe != &b->tqbucket_freelist; tqe = tqe->tqent_next)
+			cv_signal(&tqe->tqent_cv);
+
+		ASSERT(b->tqbucket_nalloc == 0);
+
+		/*
+		 * At this point we waited for all pending jobs to complete (in
+		 * both the task queue and the bucket and no new jobs should
+		 * arrive. Wait for all threads to die.
+		 */
+		while (b->tqbucket_nfree > 0)
+			cv_wait(&b->tqbucket_cv, &b->tqbucket_lock);
+		mutex_exit(&b->tqbucket_lock);
+		mutex_destroy(&b->tqbucket_lock);
+		cv_destroy(&b->tqbucket_cv);
+	}
+
+	if (tq->tq_buckets != NULL) {
+		ASSERT(tq->tq_flags & TASKQ_DYNAMIC);
+		kmem_free(tq->tq_buckets,
+		    sizeof (taskq_bucket_t) * tq->tq_nbuckets);
+
+		/* Cleanup fields before returning tq to the cache */
+		tq->tq_buckets = NULL;
+		tq->tq_tcreates = 0;
+		tq->tq_tdeaths = 0;
+	} else {
+		ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
+	}
+
+	/*
+	 * Now that all the taskq threads are gone, we can
+	 * drop the zone hold taken in taskq_create_common
+	 */
+#ifndef __APPLE__
+	zone_rele(tq->tq_proc->p_zone);
+#endif
+
+	tq->tq_threads_ncpus_pct = 0;
+	tq->tq_totaltime = 0;
+	tq->tq_tasks = 0;
+	tq->tq_maxtasks = 0;
+	tq->tq_executed = 0;
+	kmem_cache_free(taskq_cache, tq);
+}
+
+/*
+ * Extend a bucket with a new entry on the free list and attach a worker thread
+ * to it.
+ *
+ * Argument: pointer to the bucket.
+ *
+ * This function may quietly fail. It is only used by taskq_dispatch() which
+ * handles such failures properly.
+ */
+static void
+taskq_bucket_extend(void *arg)
+{
+	taskq_ent_t *tqe;
+	taskq_bucket_t *b = (taskq_bucket_t *)arg;
+	taskq_t *tq = b->tqbucket_taskq;
+	int nthreads;
+#ifdef __APPLE__
+	kthread_t *thread;
+#endif
+
+	if (! ENOUGH_MEMORY()) {
+		TQ_STAT(b, tqs_nomem);
+		return;
+	}
+
+	mutex_enter(&tq->tq_lock);
+
+	/*
+	 * Observe global taskq limits on the number of threads.
+	 */
+	if (tq->tq_tcreates++ - tq->tq_tdeaths > tq->tq_maxsize) {
+		tq->tq_tcreates--;
+		mutex_exit(&tq->tq_lock);
+		return;
+	}
+	mutex_exit(&tq->tq_lock);
+
+	tqe = kmem_cache_alloc(taskq_ent_cache, KM_NOSLEEP);
+
+	if (tqe == NULL) {
+		mutex_enter(&tq->tq_lock);
+		TQ_STAT(b, tqs_nomem);
+		tq->tq_tcreates--;
+		mutex_exit(&tq->tq_lock);
+		return;
+	}
+
+	ASSERT(tqe->tqent_thread == NULL);
+
+	tqe->tqent_un.tqent_bucket = b;
+
+#ifdef __APPLE__
+	/*
+	 * There's no way in Mac OS X KPI to create a thread
+	 * in a suspended state (TS_STOPPED). So instead we
+	 * use tqent_thread as a flag and the thread must wait
+	 * for it to be initialized (below).
+	 */
+	tqe->tqent_thread = (kthread_t *)0xCEDEC0DE;
+	thread = thread_create(NULL, 0, (void (*)(void *))taskq_d_thread,
+	    tqe, 0, pp0, TS_RUN, tq->tq_pri);
+#else
+
+	/*
+	 * Create a thread in a TS_STOPPED state first. If it is successfully
+	 * created, place the entry on the free list and start the thread.
+	 */
+	tqe->tqent_thread = thread_create(NULL, 0, taskq_d_thread, tqe,
+	    0, tq->tq_proc, TS_STOPPED, tq->tq_pri);
+#endif /* __APPLE__ */
+
+	/*
+	 * Once the entry is ready, link it to the the bucket free list.
+	 */
+	mutex_enter(&b->tqbucket_lock);
+	tqe->tqent_func = NULL;
+	TQ_APPEND(b->tqbucket_freelist, tqe);
+	b->tqbucket_nfree++;
+	TQ_STAT(b, tqs_tcreates);
+
+#if TASKQ_STATISTIC
+	nthreads = b->tqbucket_stat.tqs_tcreates -
+	    b->tqbucket_stat.tqs_tdeaths;
+	b->tqbucket_stat.tqs_maxthreads = MAX(nthreads,
+	    b->tqbucket_stat.tqs_maxthreads);
+#endif
+
+	mutex_exit(&b->tqbucket_lock);
+	/*
+	 * Start the stopped thread.
+	 */
+#ifdef __APPLE__
+	mutex_enter(&tqe->tqent_thread_lock);
+	tqe->tqent_thread = thread;
+	cv_signal(&tqe->tqent_thread_cv);
+	mutex_exit(&tqe->tqent_thread_lock);
+#else
+	thread_lock(tqe->tqent_thread);
+	tqe->tqent_thread->t_taskq = tq;
+	tqe->tqent_thread->t_schedflag |= TS_ALLSTART;
+	setrun_locked(tqe->tqent_thread);
+	thread_unlock(tqe->tqent_thread);
+#endif /* __APPLE__ */
+}
+
+static int
+taskq_kstat_update(kstat_t *ksp, int rw)
+{
+	struct taskq_kstat *tqsp = &taskq_kstat;
+	taskq_t *tq = ksp->ks_private;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+#ifdef __APPLE__
+	tqsp->tq_pid.value.ui64 = 0; /* kernel_task'd pid is 0 */
+#else
+	tqsp->tq_pid.value.ui64 = proc_pid(tq->tq_proc->p_pid);
+#endif
+	tqsp->tq_tasks.value.ui64 = tq->tq_tasks;
+	tqsp->tq_executed.value.ui64 = tq->tq_executed;
+	tqsp->tq_maxtasks.value.ui64 = tq->tq_maxtasks;
+	tqsp->tq_totaltime.value.ui64 = tq->tq_totaltime;
+	tqsp->tq_nactive.value.ui64 = tq->tq_active;
+	tqsp->tq_nalloc.value.ui64 = tq->tq_nalloc;
+	tqsp->tq_pri.value.ui64 = tq->tq_pri;
+	tqsp->tq_nthreads.value.ui64 = tq->tq_nthreads;
+	return (0);
+}
+
+static int
+taskq_d_kstat_update(kstat_t *ksp, int rw)
+{
+	struct taskq_d_kstat *tqsp = &taskq_d_kstat;
+	taskq_t *tq = ksp->ks_private;
+	taskq_bucket_t *b = tq->tq_buckets;
+	int bid = 0;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	ASSERT(tq->tq_flags & TASKQ_DYNAMIC);
+
+	tqsp->tqd_btasks.value.ui64 = tq->tq_tasks;
+	tqsp->tqd_bexecuted.value.ui64 = tq->tq_executed;
+	tqsp->tqd_bmaxtasks.value.ui64 = tq->tq_maxtasks;
+	tqsp->tqd_bnalloc.value.ui64 = tq->tq_nalloc;
+	tqsp->tqd_bnactive.value.ui64 = tq->tq_active;
+	tqsp->tqd_btotaltime.value.ui64 = tq->tq_totaltime;
+	tqsp->tqd_pri.value.ui64 = tq->tq_pri;
+
+	tqsp->tqd_hits.value.ui64 = 0;
+	tqsp->tqd_misses.value.ui64 = 0;
+	tqsp->tqd_overflows.value.ui64 = 0;
+	tqsp->tqd_tcreates.value.ui64 = 0;
+	tqsp->tqd_tdeaths.value.ui64 = 0;
+	tqsp->tqd_maxthreads.value.ui64 = 0;
+	tqsp->tqd_nomem.value.ui64 = 0;
+	tqsp->tqd_disptcreates.value.ui64 = 0;
+	tqsp->tqd_totaltime.value.ui64 = 0;
+	tqsp->tqd_nalloc.value.ui64 = 0;
+	tqsp->tqd_nfree.value.ui64 = 0;
+
+	for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
+		tqsp->tqd_hits.value.ui64 += b->tqbucket_stat.tqs_hits;
+		tqsp->tqd_misses.value.ui64 += b->tqbucket_stat.tqs_misses;
+		tqsp->tqd_overflows.value.ui64 += b->tqbucket_stat.tqs_overflow;
+		tqsp->tqd_tcreates.value.ui64 += b->tqbucket_stat.tqs_tcreates;
+		tqsp->tqd_tdeaths.value.ui64 += b->tqbucket_stat.tqs_tdeaths;
+		tqsp->tqd_maxthreads.value.ui64 +=
+		    b->tqbucket_stat.tqs_maxthreads;
+		tqsp->tqd_nomem.value.ui64 += b->tqbucket_stat.tqs_nomem;
+		tqsp->tqd_disptcreates.value.ui64 +=
+		    b->tqbucket_stat.tqs_disptcreates;
+		tqsp->tqd_totaltime.value.ui64 += b->tqbucket_totaltime;
+		tqsp->tqd_nalloc.value.ui64 += b->tqbucket_nalloc;
+		tqsp->tqd_nfree.value.ui64 += b->tqbucket_nfree;
+	}
+	return (0);
+}
+
+int
+EMPTY_TASKQ(taskq_t *tq)
+{
+#ifdef _KERNEL
+	return ((tq)->tq_task.tqent_next == &(tq)->tq_task);
+#else
+	return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0);
+#endif
+}
diff --git a/module/os/macos/spl/spl-thread.c b/module/os/macos/spl/spl-thread.c
new file mode 100644
index 0000000000..886190cba7
--- /dev/null
+++ b/module/os/macos/spl/spl-thread.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/thread.h>
+#include <mach/thread_act.h>
+#include <sys/kmem.h>
+#include <sys/tsd.h>
+#include <sys/debug.h>
+#include <sys/vnode.h>
+#include <sys/callb.h>
+#include <sys/systm.h>
+
+uint64_t zfs_threads = 0;
+
+kthread_t *
+spl_thread_create(
+	caddr_t stk,
+	size_t stksize,
+	void (*proc)(void *),
+	void *arg,
+	size_t len,
+	/* struct proc *pp, */
+	int state,
+#ifdef SPL_DEBUG_THREAD
+	char *filename,
+	int line,
+#endif
+	pri_t pri)
+{
+	kern_return_t result;
+	thread_t thread;
+
+#ifdef SPL_DEBUG_THREAD
+	printf("Start thread pri %d by '%s':%d\n", pri,
+	    filename, line);
+#endif
+
+	result = kernel_thread_start((thread_continue_t)proc, arg, &thread);
+
+	if (result != KERN_SUCCESS)
+		return (NULL);
+
+	/* Improve the priority when asked to do so */
+	if (pri > minclsyspri) {
+		thread_precedence_policy_data_t policy;
+
+		/*
+		 * kernel priorities (osfmk/kern/sched.h)
+		 *
+		 * 96           Reserved (real-time)
+		 * 95           Kernel mode only
+		 *                              A
+		 *                              +
+		 *                      (16 levels)
+		 *                              +
+		 *                              V
+		 * 80           Kernel mode only
+		 * 79           System high priority
+		 *
+		 * spl/include/sys/sysmacros.h
+		 * #define maxclsyspri  89
+		 * #define minclsyspri  81  BASEPRI_KERNEL
+		 * #define defclsyspri  81  BASEPRI_KERNEL
+		 *
+		 * Calling policy.importance = 10 will create
+		 * a default pri (81) at pri (91).
+		 *
+		 * So asking for pri (85) we do 85-81 = 4.
+		 *
+		 * IllumOS priorities are:
+		 * #define MAXCLSYSPRI     99
+		 * #define MINCLSYSPRI     60
+		 */
+
+		policy.importance = (pri - minclsyspri);
+
+		thread_policy_set(thread,
+		    THREAD_PRECEDENCE_POLICY,
+		    (thread_policy_t)&policy,
+		    THREAD_PRECEDENCE_POLICY_COUNT);
+	}
+
+	thread_deallocate(thread);
+
+	atomic_inc_64(&zfs_threads);
+
+	return ((kthread_t *)thread);
+}
+
+kthread_t *
+spl_current_thread(void)
+{
+	thread_t cur_thread = current_thread();
+	return ((kthread_t *)cur_thread);
+}
+
+void
+spl_thread_exit(void)
+{
+	atomic_dec_64(&zfs_threads);
+
+	tsd_thread_exit();
+	(void) thread_terminate(current_thread());
+}
+
+
+/*
+ * IllumOS has callout.c - place it here until we find a better place
+ */
+callout_id_t
+timeout_generic(int type, void (*func)(void *), void *arg,
+    hrtime_t expiration, hrtime_t resolution, int flags)
+{
+	struct timespec ts;
+	hrt2ts(expiration, &ts);
+	bsd_timeout(func, arg, &ts);
+	/*
+	 * bsd_untimeout() requires func and arg to cancel the timeout, so
+	 * pass it back as the callout_id. If we one day were to implement
+	 * untimeout_generic() they would pass it back to us
+	 */
+	return ((callout_id_t)arg);
+}
diff --git a/module/os/macos/spl/spl-time.c b/module/os/macos/spl/spl-time.c
new file mode 100644
index 0000000000..151691d60b
--- /dev/null
+++ b/module/os/macos/spl/spl-time.c
@@ -0,0 +1,138 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <kern/clock.h>
+
+/*
+ * gethrtime() provides high-resolution timestamps with
+ * machine-dependent origin Hence its primary use is to specify
+ * intervals.
+ */
+
+static hrtime_t
+zfs_abs_to_nano(uint64_t elapsed)
+{
+	static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 };
+
+	/*
+	 * If this is the first time we've run, get the timebase.
+	 * We can use denom == 0 to indicate that sTimebaseInfo is
+	 * uninitialised because it makes no sense to have a zero
+	 * denominator in a fraction.
+	 */
+
+	if (sTimebaseInfo.denom == 0) {
+		(void) clock_timebase_info(&sTimebaseInfo);
+	}
+
+	/*
+	 * Convert to nanoseconds.
+	 * return (elapsed * (uint64_t)sTimebaseInfo.numer) /
+	 * (uint64_t)sTimebaseInfo.denom;
+	 *
+	 * Provided the final result is representable in 64 bits the
+	 * following maneuver will deliver that result without intermediate
+	 * overflow.
+	 */
+	if (sTimebaseInfo.denom == sTimebaseInfo.numer)
+		return (elapsed);
+	else if (sTimebaseInfo.denom == 1)
+		return (elapsed * (uint64_t)sTimebaseInfo.numer);
+	else {
+		/* Decompose elapsed = eta32 * 2^32 + eps32: */
+		uint64_t eta32 = elapsed >> 32;
+		uint64_t eps32 = elapsed & 0x00000000ffffffffLL;
+
+		uint32_t numer = sTimebaseInfo.numer;
+		uint32_t denom = sTimebaseInfo.denom;
+
+		/* Form product of elapsed64 (decomposed) and numer: */
+		uint64_t mu64 = numer * eta32;
+		uint64_t lambda64 = numer * eps32;
+
+		/* Divide the constituents by denom: */
+		uint64_t q32 = mu64/denom;
+		uint64_t r32 = mu64 - (q32 * denom); /* mu64 % denom */
+
+		return ((q32 << 32) + ((r32 << 32) + lambda64) / denom);
+	}
+}
+
+
+hrtime_t
+gethrtime(void)
+{
+	static uint64_t start = 0;
+	if (start == 0)
+		start = mach_absolute_time();
+	return (zfs_abs_to_nano(mach_absolute_time() - start));
+}
+
+
+void
+gethrestime(struct timespec *ts)
+{
+	nanotime(ts);
+}
+
+time_t
+gethrestime_sec(void)
+{
+	struct timeval tv;
+
+	microtime(&tv);
+	return (tv.tv_sec);
+}
+
+void
+hrt2ts(hrtime_t hrt, struct timespec *tsp)
+{
+	uint32_t sec, nsec, tmp;
+
+	tmp = (uint32_t)(hrt >> 30);
+	sec = tmp - (tmp >> 2);
+	sec = tmp - (sec >> 5);
+	sec = tmp + (sec >> 1);
+	sec = tmp - (sec >> 6) + 7;
+	sec = tmp - (sec >> 3);
+	sec = tmp + (sec >> 1);
+	sec = tmp + (sec >> 3);
+	sec = tmp + (sec >> 4);
+	tmp = (sec << 7) - sec - sec - sec;
+	tmp = (tmp << 7) - tmp - tmp - tmp;
+	tmp = (tmp << 7) - tmp - tmp - tmp;
+	nsec = (uint32_t)hrt - (tmp << 9);
+	while (nsec >= NANOSEC) {
+		nsec -= NANOSEC;
+		sec++;
+	}
+	tsp->tv_sec = (time_t)sec;
+	tsp->tv_nsec = nsec;
+}
diff --git a/module/os/macos/spl/spl-tsd.c b/module/os/macos/spl/spl-tsd.c
new file mode 100644
index 0000000000..6ca970a9f9
--- /dev/null
+++ b/module/os/macos/spl/spl-tsd.c
@@ -0,0 +1,389 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2014 Jorgen Lundman <lundman@lundman.net>
+ *
+ * A thread will call tsd_create(&key, dtor) to allocate a new
+ * "variable" placement, called a "key". In illumos, this is the index
+ * into an array of dtors. (If dtor is passed as NULL, TSD internally
+ * set it to an empty function). So if the dtor array[i] is NULL, it
+ * is "free" and can be allocated. (returned as *key = i).
+ * illumos will grow this dtor array with realloc when required.
+ * Then Any Thread can set a value on this "key index", and this value
+ * is specific to each thread by calling tsd_set(key, value).
+ * And can be retrieved with tsd_get(key).
+ * When tsd_destroy(key) is called, we need to loop through all
+ * threads different "values", and call the dtor on each one.
+ * Likewise, we need to know when a thread exists, so we can clean up
+ * the values (by calling dtor for each one) so we patch into the
+ * thread_exit() call, to also call tsd_thread_exit().
+ *
+ * In OsX, we build an array of the dtors, and return the key index,
+ * this is to store the dtor, and know which "key" values are valid.
+ * Then we build an AVL tree, indexed by <key,threadid>, to store
+ * each thread's value. This allows us to do key access quick.
+ * On thread_exit, we iterate the dtor array, and for each key
+ * remove <key,current_thread>.
+ * On tsd_destroy(key), we use AVL find nearest with <key,0>, then
+ * avl_next as long as key remains the same, to remove each thread value.
+ *
+ * Note a key of "0" is considered "invalid" in IllumOS, so we return
+ * a "1" based index, even though internally it is 0 based.
+ *
+ */
+
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/tsd.h>
+#include <sys/avl.h>
+#include <sys/debug.h>
+
+/* Initial size of array, and realloc growth size */
+#define	TSD_ALLOC_SIZE 10
+
+/* array of dtors, allocated in init */
+static dtor_func_t	*tsd_dtor_array = NULL;
+static uint32_t		tsd_dtor_size  = 0;
+static avl_tree_t	tsd_tree;
+
+struct spl_tsd_node_s
+{
+	/* The index/key */
+	uint_t		tsd_key;
+	thread_t	tsd_thread;
+
+	/* The payload */
+	void		*tsd_value;
+
+	/* Internal mumbo */
+	avl_node_t	tsd_link_node;
+};
+typedef struct spl_tsd_node_s spl_tsd_node_t;
+
+static kmutex_t spl_tsd_mutex;
+
+/*
+ * tsd_set - set thread specific data
+ * @key: lookup key
+ * @value: value to set
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy(), protected
+ * from racing tsd_get() or tsd_set() because it is thread specific.
+ * This function has been optimized to be fast for the update case.
+ * When setting the tsd initially it will be slower due to additional
+ * required locking and potential memory allocations.
+ * If the value is set to NULL, we also release it.
+ */
+int
+tsd_set(uint_t key, void *value)
+{
+	spl_tsd_node_t *entry = NULL;
+	spl_tsd_node_t search;
+	avl_index_t loc;
+	uint_t i;
+
+	/* Invalid key values? */
+	if ((key < 1) ||
+	    (key >= tsd_dtor_size)) {
+		return (EINVAL);
+	}
+
+	i = key - 1;
+
+	/*
+	 * First handle the easy case, <key,thread> already has a node/value
+	 * so we just need to find it, update it.
+	 */
+
+	search.tsd_key = i;
+	search.tsd_thread = current_thread();
+
+	mutex_enter(&spl_tsd_mutex);
+	entry = avl_find(&tsd_tree, &search, &loc);
+	mutex_exit(&spl_tsd_mutex);
+
+	if (entry) {
+
+		/* If value is set to NULL, release it as well */
+		if (value == NULL) {
+			mutex_enter(&spl_tsd_mutex);
+			avl_remove(&tsd_tree, entry);
+			mutex_exit(&spl_tsd_mutex);
+			kmem_free(entry, sizeof (*entry));
+			return (0);
+		}
+		entry->tsd_value = value;
+		return (0);
+	}
+
+	/* No node, we need to create a new one and insert it. */
+	/* But if the value is NULL, then why create one eh? */
+	if (value == NULL)
+		return (0);
+
+	entry = kmem_alloc(sizeof (spl_tsd_node_t), KM_SLEEP);
+
+	entry->tsd_key		= i;
+	entry->tsd_thread	= current_thread();
+	entry->tsd_value	= value;
+
+	mutex_enter(&spl_tsd_mutex);
+	avl_add(&tsd_tree, entry);
+	mutex_exit(&spl_tsd_mutex);
+
+	return (0);
+}
+
+/*
+ * tsd_get - get thread specific data for specified thread
+ * @key: lookup key
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy().  This
+ * implementation is designed to be fast and scalable, it does not
+ * lock the entire table only a single hash bin.
+ */
+void *
+tsd_get_by_thread(uint_t key, thread_t thread)
+{
+	spl_tsd_node_t *entry = NULL;
+	spl_tsd_node_t search;
+	avl_index_t loc;
+	uint_t i;
+
+	/* Invalid key values? */
+	if ((key < 1) ||
+	    (key >= tsd_dtor_size)) {
+		return (NULL);
+	}
+
+	i = key - 1;
+
+	search.tsd_key = i;
+	search.tsd_thread = thread;
+
+	mutex_enter(&spl_tsd_mutex);
+	entry = avl_find(&tsd_tree, &search, &loc);
+	mutex_exit(&spl_tsd_mutex);
+
+	return (entry ? entry->tsd_value : NULL);
+}
+
+void *
+tsd_get(uint_t key)
+{
+	return (tsd_get_by_thread(key, current_thread()));
+}
+
+static void
+tsd_internal_dtor(void *value)
+{
+}
+
+/*
+ * Create TSD for a pid and fill in key with unique value, remember the dtor
+ *
+ * We cheat and create an entry with pid=0, to keep the dtor.
+ */
+void
+tsd_create(uint_t *keyp, dtor_func_t dtor)
+{
+	uint_t i;
+
+	if (*keyp)
+		return;
+
+	// Iterate the dtor_array, looking for first NULL
+	for (i = 0; i < TSD_ALLOC_SIZE; i++) {
+		if (tsd_dtor_array[i] == NULL) break;
+	}
+
+	/* Do we need to grow the list? */
+	if (i >= tsd_dtor_size) {
+		printf("SPL: tsd list growing not implemented\n");
+		return;
+	}
+
+	if (dtor == NULL)
+		dtor = tsd_internal_dtor;
+
+	tsd_dtor_array[i] = dtor;
+
+	*keyp = i + 1;
+}
+
+void
+tsd_destroy(uint_t *keyp)
+{
+	spl_tsd_node_t *entry = NULL, *next = NULL;
+	spl_tsd_node_t search;
+	avl_index_t loc;
+	dtor_func_t dtor = NULL;
+	uint_t i;
+
+	/* Invalid key values? */
+	if ((*keyp < 1) ||
+	    (*keyp >= tsd_dtor_size)) {
+		return;
+	}
+
+	i = *keyp - 1;
+	*keyp = 0;
+
+	ASSERT(tsd_dtor_array[i] != NULL);
+
+	dtor = tsd_dtor_array[i];
+	tsd_dtor_array[i] = NULL;
+
+	/*
+	 * For each thread;
+	 *   if it has a value
+	 *   call the dtor
+	 */
+	search.tsd_key = i;
+	search.tsd_thread = NULL;
+
+	mutex_enter(&spl_tsd_mutex);
+	entry = avl_find(&tsd_tree, &search, &loc);
+
+	/*
+	 * "entry" should really be NULL here, as we searched for the
+	 * NULL thread
+	 */
+	if (entry == NULL)
+		entry = avl_nearest(&tsd_tree, loc, AVL_AFTER);
+
+	/* Now, free node, and go to next, as long as the key matches */
+	while (entry && (entry->tsd_key == i)) {
+		next = AVL_NEXT(&tsd_tree, entry);
+
+		/* If we have a value, call the dtor for this thread */
+		if (entry->tsd_value)
+			dtor(entry->tsd_value);
+
+		avl_remove(&tsd_tree, entry);
+
+		kmem_free(entry, sizeof (*entry));
+
+		entry = next;
+	}
+
+	mutex_exit(&spl_tsd_mutex);
+}
+
+
+
+/*
+ * A thread is exiting, clear out any tsd values it might have.
+ */
+void
+tsd_thread_exit(void)
+{
+	spl_tsd_node_t *entry = NULL;
+	spl_tsd_node_t search;
+	avl_index_t loc;
+	int i;
+
+	search.tsd_thread = current_thread();
+
+	/* For all defined dtor/values */
+	for (i = 0; i < tsd_dtor_size; i++) {
+
+		/* If not allocated, skip */
+		if (tsd_dtor_array[i] == NULL) continue;
+
+		/* Find out of this thread has a value */
+		search.tsd_key = i;
+
+		mutex_enter(&spl_tsd_mutex);
+		entry = avl_find(&tsd_tree, &search, &loc);
+		if (entry) avl_remove(&tsd_tree, entry);
+		mutex_exit(&spl_tsd_mutex);
+
+		if (entry == NULL) continue;
+
+		/* If we have a value, call dtor */
+		if (entry->tsd_value)
+			tsd_dtor_array[i](entry->tsd_value);
+
+		kmem_free(entry, sizeof (*entry));
+	} // for all i
+}
+
+static int
+tsd_tree_cmp(const void *arg1, const void *arg2)
+{
+	const spl_tsd_node_t *node1 = arg1;
+	const spl_tsd_node_t *node2 = arg2;
+	if (node1->tsd_key > node2->tsd_key)
+		return (1);
+	if (node1->tsd_key < node2->tsd_key)
+		return (-1);
+	if (node1->tsd_thread > node2->tsd_thread)
+		return (1);
+	if (node1->tsd_thread < node2->tsd_thread)
+		return (-1);
+	return (0);
+}
+
+int
+spl_tsd_init(void)
+{
+	tsd_dtor_array = kmem_zalloc(sizeof (dtor_func_t) * TSD_ALLOC_SIZE,
+	    KM_SLEEP);
+	tsd_dtor_size = TSD_ALLOC_SIZE;
+
+	mutex_init(&spl_tsd_mutex, NULL, MUTEX_DEFAULT, NULL);
+	avl_create(&tsd_tree, tsd_tree_cmp,
+	    sizeof (spl_tsd_node_t),
+	    offsetof(spl_tsd_node_t, tsd_link_node));
+	return (0);
+}
+
+
+uint64_t
+spl_tsd_size(void)
+{
+	return (avl_numnodes(&tsd_tree));
+}
+
+void
+spl_tsd_fini(void)
+{
+	spl_tsd_node_t *entry = NULL;
+	void *cookie = NULL;
+
+	printf("SPL: tsd unloading %llu\n", spl_tsd_size());
+
+	mutex_enter(&spl_tsd_mutex);
+	cookie = NULL;
+	while ((entry = avl_destroy_nodes(&tsd_tree, &cookie))) {
+		kmem_free(entry, sizeof (*entry));
+	}
+	mutex_exit(&spl_tsd_mutex);
+
+	avl_destroy(&tsd_tree);
+	mutex_destroy(&spl_tsd_mutex);
+
+	kmem_free(tsd_dtor_array, sizeof (dtor_func_t) * tsd_dtor_size);
+	tsd_dtor_size = 0;
+}
diff --git a/module/os/macos/spl/spl-vmem.c b/module/os/macos/spl/spl-vmem.c
new file mode 100644
index 0000000000..f54a0b133f
--- /dev/null
+++ b/module/os/macos/spl/spl-vmem.c
@@ -0,0 +1,3935 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017 Sean Doran <smd@use.net>
+ */
+
+/*
+ * Big Theory Statement for the virtual memory allocator.
+ *
+ * For a more complete description of the main ideas, see:
+ *
+ *	Jeff Bonwick and Jonathan Adams,
+ *
+ *	Magazines and vmem: Extending the Slab Allocator to Many CPUs and
+ *	Arbitrary Resources.
+ *
+ *	Proceedings of the 2001 Usenix Conference.
+ *	Available as http://www.usenix.org/event/usenix01/bonwick.html
+ *
+ *
+ * 1. General Concepts
+ * -------------------
+ *
+ * 1.1 Overview
+ * ------------
+ * We divide the kernel address space into a number of logically distinct
+ * pieces, or *arenas*: text, data, heap, stack, and so on.  Within these
+ * arenas we often subdivide further; for example, we use heap addresses
+ * not only for the kernel heap (kmem_alloc() space), but also for DVMA,
+ * bp_mapin(), /dev/kmem, and even some device mappings like the TOD chip.
+ * The kernel address space, therefore, is most accurately described as
+ * a tree of arenas in which each node of the tree *imports* some subset
+ * of its parent.  The virtual memory allocator manages these arenas and
+ * supports their natural hierarchical structure.
+ *
+ * 1.2 Arenas
+ * ----------
+ * An arena is nothing more than a set of integers.  These integers most
+ * commonly represent virtual addresses, but in fact they can represent
+ * anything at all.  For example, we could use an arena containing the
+ * integers minpid through maxpid to allocate process IDs.  vmem_create()
+ * and vmem_destroy() create and destroy vmem arenas.  In order to
+ * differentiate between arenas used for adresses and arenas used for
+ * identifiers, the VMC_IDENTIFIER flag is passed to vmem_create().  This
+ * prevents identifier exhaustion from being diagnosed as general memory
+ * failure.
+ *
+ * 1.3 Spans
+ * ---------
+ * We represent the integers in an arena as a collection of *spans*, or
+ * contiguous ranges of integers.  For example, the kernel heap consists
+ * of just one span: [kernelheap, ekernelheap).  Spans can be added to an
+ * arena in two ways: explicitly, by vmem_add(), or implicitly, by
+ * importing, as described in Section 1.5 below.
+ *
+ * 1.4 Segments
+ * ------------
+ * Spans are subdivided into *segments*, each of which is either allocated
+ * or free.  A segment, like a span, is a contiguous range of integers.
+ * Each allocated segment [addr, addr + size) represents exactly one
+ * vmem_alloc(size) that returned addr.  Free segments represent the space
+ * between allocated segments.  If two free segments are adjacent, we
+ * coalesce them into one larger segment; that is, if segments [a, b) and
+ * [b, c) are both free, we merge them into a single segment [a, c).
+ * The segments within a span are linked together in increasing-address order
+ * so we can easily determine whether coalescing is possible.
+ *
+ * Segments never cross span boundaries.  When all segments within
+ * an imported span become free, we return the span to its source.
+ *
+ * 1.5 Imported Memory
+ * -------------------
+ * As mentioned in the overview, some arenas are logical subsets of
+ * other arenas.  For example, kmem_va_arena (a virtual address cache
+ * that satisfies most kmem_slab_create() requests) is just a subset
+ * of heap_arena (the kernel heap) that provides caching for the most
+ * common slab sizes.  When kmem_va_arena runs out of virtual memory,
+ * it *imports* more from the heap; we say that heap_arena is the
+ * *vmem source* for kmem_va_arena.  vmem_create() allows you to
+ * specify any existing vmem arena as the source for your new arena.
+ * Topologically, since every arena is a child of at most one source,
+ * the set of all arenas forms a collection of trees.
+ *
+ * 1.6 Constrained Allocations
+ * ---------------------------
+ * Some vmem clients are quite picky about the kind of address they want.
+ * For example, the DVMA code may need an address that is at a particular
+ * phase with respect to some alignment (to get good cache coloring), or
+ * that lies within certain limits (the addressable range of a device),
+ * or that doesn't cross some boundary (a DMA counter restriction) --
+ * or all of the above.  vmem_xalloc() allows the client to specify any
+ * or all of these constraints.
+ *
+ * 1.7 The Vmem Quantum
+ * --------------------
+ * Every arena has a notion of 'quantum', specified at vmem_create() time,
+ * that defines the arena's minimum unit of currency.  Most commonly the
+ * quantum is either 1 or PAGESIZE, but any power of 2 is legal.
+ * All vmem allocations are guaranteed to be quantum-aligned.
+ *
+ * 1.8 Quantum Caching
+ * -------------------
+ * A vmem arena may be so hot (frequently used) that the scalability of vmem
+ * allocation is a significant concern.  We address this by allowing the most
+ * common allocation sizes to be serviced by the kernel memory allocator,
+ * which provides low-latency per-cpu caching.  The qcache_max argument to
+ * vmem_create() specifies the largest allocation size to cache.
+ *
+ * 1.9 Relationship to Kernel Memory Allocator
+ * -------------------------------------------
+ * Every kmem cache has a vmem arena as its slab supplier.  The kernel memory
+ * allocator uses vmem_alloc() and vmem_free() to create and destroy slabs.
+ *
+ *
+ * 2. Implementation
+ * -----------------
+ *
+ * 2.1 Segment lists and markers
+ * -----------------------------
+ * The segment structure (vmem_seg_t) contains two doubly-linked lists.
+ *
+ * The arena list (vs_anext/vs_aprev) links all segments in the arena.
+ * In addition to the allocated and free segments, the arena contains
+ * special marker segments at span boundaries.  Span markers simplify
+ * coalescing and importing logic by making it easy to tell both when
+ * we're at a span boundary (so we don't coalesce across it), and when
+ * a span is completely free (its neighbors will both be span markers).
+ *
+ * Imported spans will have vs_import set.
+ *
+ * The next-of-kin list (vs_knext/vs_kprev) links segments of the same type:
+ * (1) for allocated segments, vs_knext is the hash chain linkage;
+ * (2) for free segments, vs_knext is the freelist linkage;
+ * (3) for span marker segments, vs_knext is the next span marker.
+ *
+ * 2.2 Allocation hashing
+ * ----------------------
+ * We maintain a hash table of all allocated segments, hashed by address.
+ * This allows vmem_free() to discover the target segment in constant time.
+ * vmem_update() periodically resizes hash tables to keep hash chains short.
+ *
+ * 2.3 Freelist management
+ * -----------------------
+ * We maintain power-of-2 freelists for free segments, i.e. free segments
+ * of size >= 2^n reside in vmp->vm_freelist[n].  To ensure constant-time
+ * allocation, vmem_xalloc() looks not in the first freelist that *might*
+ * satisfy the allocation, but in the first freelist that *definitely*
+ * satisfies the allocation (unless VM_BESTFIT is specified, or all larger
+ * freelists are empty).  For example, a 1000-byte allocation will be
+ * satisfied not from the 512..1023-byte freelist, whose members *might*
+ * contains a 1000-byte segment, but from a 1024-byte or larger freelist,
+ * the first member of which will *definitely* satisfy the allocation.
+ * This ensures that vmem_xalloc() works in constant time.
+ *
+ * We maintain a bit map to determine quickly which freelists are non-empty.
+ * vmp->vm_freemap & (1 << n) is non-zero iff vmp->vm_freelist[n] is non-empty.
+ *
+ * The different freelists are linked together into one large freelist,
+ * with the freelist heads serving as markers.  Freelist markers simplify
+ * the maintenance of vm_freemap by making it easy to tell when we're taking
+ * the last member of a freelist (both of its neighbors will be markers).
+ *
+ * 2.4 Vmem Locking
+ * ----------------
+ * For simplicity, all arena state is protected by a per-arena lock.
+ * For very hot arenas, use quantum caching for scalability.
+ *
+ * 2.5 Vmem Population
+ * -------------------
+ * Any internal vmem routine that might need to allocate new segment
+ * structures must prepare in advance by calling vmem_populate(), which
+ * will preallocate enough vmem_seg_t's to get is through the entire
+ * operation without dropping the arena lock.
+ *
+ * 2.6 Auditing
+ * ------------
+ * If KMF_AUDIT is set in kmem_flags, we audit vmem allocations as well.
+ * Since virtual addresses cannot be scribbled on, there is no equivalent
+ * in vmem to redzone checking, deadbeef, or other kmem debugging features.
+ * Moreover, we do not audit frees because segment coalescing destroys the
+ * association between an address and its segment structure.  Auditing is
+ * thus intended primarily to keep track of who's consuming the arena.
+ * Debugging support could certainly be extended in the future if it proves
+ * necessary, but we do so much live checking via the allocation hash table
+ * that even non-DEBUG systems get quite a bit of sanity checking already.
+ */
+
+#include <sys/vmem_impl.h>
+#include <sys/kmem.h>
+#include <sys/kstat.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+#define	VMEM_INITIAL		21	/* early vmem arenas */
+#define	VMEM_SEG_INITIAL	800
+
+/*
+ * Adding a new span to an arena requires two segment structures: one to
+ * represent the span, and one to represent the free segment it contains.
+ */
+#define	VMEM_SEGS_PER_SPAN_CREATE	2
+
+/*
+ * Allocating a piece of an existing segment requires 0-2 segment structures
+ * depending on how much of the segment we're allocating.
+ *
+ * To allocate the entire segment, no new segment structures are needed; we
+ * simply move the existing segment structure from the freelist to the
+ * allocation hash table.
+ *
+ * To allocate a piece from the left or right end of the segment, we must
+ * split the segment into two pieces (allocated part and remainder), so we
+ * need one new segment structure to represent the remainder.
+ *
+ * To allocate from the middle of a segment, we need two new segment strucures
+ * to represent the remainders on either side of the allocated part.
+ */
+#define	VMEM_SEGS_PER_EXACT_ALLOC	0
+#define	VMEM_SEGS_PER_LEFT_ALLOC	1
+#define	VMEM_SEGS_PER_RIGHT_ALLOC	1
+#define	VMEM_SEGS_PER_MIDDLE_ALLOC	2
+
+/*
+ * vmem_populate() preallocates segment structures for vmem to do its work.
+ * It must preallocate enough for the worst case, which is when we must import
+ * a new span and then allocate from the middle of it.
+ */
+#define	VMEM_SEGS_PER_ALLOC_MAX		\
+(VMEM_SEGS_PER_SPAN_CREATE + VMEM_SEGS_PER_MIDDLE_ALLOC)
+
+/*
+ * The segment structures themselves are allocated from vmem_seg_arena, so
+ * we have a recursion problem when vmem_seg_arena needs to populate itself.
+ * We address this by working out the maximum number of segment structures
+ * this act will require, and multiplying by the maximum number of threads
+ * that we'll allow to do it simultaneously.
+ *
+ * The worst-case segment consumption to populate vmem_seg_arena is as
+ * follows (depicted as a stack trace to indicate why events are occurring):
+ *
+ * (In order to lower the fragmentation in the heap_arena, we specify a
+ * minimum import size for the vmem_metadata_arena which is the same size
+ * as the kmem_va quantum cache allocations.  This causes the worst-case
+ * allocation from the vmem_metadata_arena to be 3 segments.)
+ *
+ * vmem_alloc(vmem_seg_arena)		-> 2 segs (span create + exact alloc)
+ *  segkmem_alloc(vmem_metadata_arena)
+ *   vmem_alloc(vmem_metadata_arena)	-> 3 segs (span create + left alloc)
+ *    vmem_alloc(heap_arena)		-> 1 seg (left alloc)
+ *   page_create()
+ *   hat_memload()
+ *    kmem_cache_alloc()
+ *     kmem_slab_create()
+ *	vmem_alloc(hat_memload_arena)	-> 2 segs (span create + exact alloc)
+ *	 segkmem_alloc(heap_arena)
+ *	  vmem_alloc(heap_arena)	-> 1 seg (left alloc)
+ *	  page_create()
+ *	  hat_memload()		-> (hat layer won't recurse further)
+ *
+ * The worst-case consumption for each arena is 3 segment structures.
+ * Of course, a 3-seg reserve could easily be blown by multiple threads.
+ * Therefore, we serialize all allocations from vmem_seg_arena (which is OK
+ * because they're rare).  We cannot allow a non-blocking allocation to get
+ * tied up behind a blocking allocation, however, so we use separate locks
+ * for VM_SLEEP and VM_NOSLEEP allocations.  Similarly, VM_PUSHPAGE allocations
+ * must not block behind ordinary VM_SLEEPs.  In addition, if the system is
+ * panicking then we must keep enough resources for panic_thread to do its
+ * work.  Thus we have at most four threads trying to allocate from
+ * vmem_seg_arena, and each thread consumes at most three segment structures,
+ * so we must maintain a 12-seg reserve.
+ */
+#define	VMEM_POPULATE_RESERVE	12
+
+/*
+ * vmem_populate() ensures that each arena has VMEM_MINFREE seg structures
+ * so that it can satisfy the worst-case allocation *and* participate in
+ * worst-case allocation from vmem_seg_arena.
+ */
+#define	VMEM_MINFREE	(VMEM_POPULATE_RESERVE + VMEM_SEGS_PER_ALLOC_MAX)
+
+static vmem_t vmem0[VMEM_INITIAL];
+static vmem_t *vmem_populator[VMEM_INITIAL];
+static uint32_t vmem_id;
+static uint32_t vmem_populators;
+static vmem_seg_t vmem_seg0[VMEM_SEG_INITIAL];
+static vmem_seg_t *vmem_segfree;
+static kmutex_t vmem_list_lock;
+static kmutex_t vmem_segfree_lock;
+static kmutex_t vmem_sleep_lock;
+static kmutex_t vmem_nosleep_lock;
+static kmutex_t vmem_pushpage_lock;
+static kmutex_t vmem_panic_lock;
+static kmutex_t vmem_xnu_alloc_lock;
+static vmem_t *vmem_list;
+static vmem_t *vmem_metadata_arena;
+static vmem_t *vmem_seg_arena;
+static vmem_t *vmem_hash_arena;
+static vmem_t *vmem_vmem_arena;
+vmem_t *spl_default_arena; // The bottom-most arena for SPL
+static vmem_t *spl_default_arena_parent; // dummy arena as a placeholder
+#define	VMEM_BUCKETS 13
+#define	VMEM_BUCKET_LOWBIT 12
+#define	VMEM_BUCKET_HIBIT 24
+static vmem_t *vmem_bucket_arena[VMEM_BUCKETS];
+vmem_t *spl_heap_arena;
+static void *spl_heap_arena_initial_alloc;
+static size_t spl_heap_arena_initial_alloc_size = 0;
+#define	NUMBER_OF_ARENAS_IN_VMEM_INIT 21
+/* vmem_update() every 15 seconds */
+static struct timespec vmem_update_interval	= {15, 0};
+uint32_t vmem_mtbf;	/* mean time between failures [default: off] */
+size_t vmem_seg_size = sizeof (vmem_seg_t);
+
+// must match with include/sys/vmem_impl.h
+static vmem_kstat_t vmem_kstat_template = {
+	{ "mem_inuse",		KSTAT_DATA_UINT64 },
+	{ "mem_import",		KSTAT_DATA_UINT64 },
+	{ "mem_total",		KSTAT_DATA_UINT64 },
+	{ "vmem_source",	KSTAT_DATA_UINT32 },
+	{ "alloc",		KSTAT_DATA_UINT64 },
+	{ "free",		KSTAT_DATA_UINT64 },
+	{ "wait",		KSTAT_DATA_UINT64 },
+	{ "fail",		KSTAT_DATA_UINT64 },
+	{ "lookup",		KSTAT_DATA_UINT64 },
+	{ "search",		KSTAT_DATA_UINT64 },
+	{ "populate_fail",	KSTAT_DATA_UINT64 },
+	{ "contains",		KSTAT_DATA_UINT64 },
+	{ "contains_search",	KSTAT_DATA_UINT64 },
+	{ "parent_alloc",	KSTAT_DATA_UINT64 },
+	{ "parent_free",	KSTAT_DATA_UINT64 },
+	{ "threads_waiting",	KSTAT_DATA_UINT64 },
+	{ "excess",	KSTAT_DATA_UINT64 },
+};
+
+
+/*
+ * Insert/delete from arena list (type 'a') or next-of-kin list (type 'k').
+ */
+#define	VMEM_INSERT(vprev, vsp, type)					\
+{									\
+vmem_seg_t *_vnext = (vprev)->vs_##type##next;			\
+(vsp)->vs_##type##next = (_vnext);				\
+(vsp)->vs_##type##prev = (vprev);				\
+(vprev)->vs_##type##next = (vsp);				\
+(_vnext)->vs_##type##prev = (vsp);				\
+}
+
+#define	VMEM_DELETE(vsp, type)						\
+{									\
+vmem_seg_t *_vprev = (vsp)->vs_##type##prev;			\
+vmem_seg_t *_vnext = (vsp)->vs_##type##next;			\
+(_vprev)->vs_##type##next = (_vnext);				\
+(_vnext)->vs_##type##prev = (_vprev);				\
+}
+
+// vmem thread block count
+uint64_t spl_vmem_threads_waiting = 0;
+
+// number of allocations > minalloc
+uint64_t spl_bucket_non_pow2_allocs = 0;
+
+// allocator kstats
+uint64_t spl_vmem_unconditional_allocs = 0;
+uint64_t spl_vmem_unconditional_alloc_bytes = 0;
+uint64_t spl_vmem_conditional_allocs = 0;
+uint64_t spl_vmem_conditional_alloc_bytes = 0;
+uint64_t spl_vmem_conditional_alloc_deny = 0;
+uint64_t spl_vmem_conditional_alloc_deny_bytes = 0;
+
+// bucket allocator kstat
+uint64_t spl_xat_success = 0;
+uint64_t spl_xat_late_success = 0;
+uint64_t spl_xat_late_success_nosleep = 0;
+uint64_t spl_xat_pressured = 0;
+uint64_t spl_xat_bailed = 0;
+uint64_t spl_xat_bailed_contended = 0;
+uint64_t spl_xat_lastalloc = 0;
+uint64_t spl_xat_lastfree = 0;
+uint64_t spl_xat_forced = 0;
+uint64_t spl_xat_sleep = 0;
+uint64_t spl_xat_late_deny = 0;
+uint64_t spl_xat_no_waiters = 0;
+uint64_t spl_xft_wait = 0;
+
+uint64_t spl_vba_parent_memory_appeared = 0;
+uint64_t spl_vba_parent_memory_blocked = 0;
+uint64_t spl_vba_hiprio_blocked = 0;
+uint64_t spl_vba_cv_timeout = 0;
+uint64_t spl_vba_loop_timeout = 0;
+uint64_t spl_vba_cv_timeout_blocked = 0;
+uint64_t spl_vba_loop_timeout_blocked = 0;
+uint64_t spl_vba_sleep = 0;
+uint64_t spl_vba_loop_entries = 0;
+
+// bucket minimum span size tunables
+uint64_t spl_bucket_tunable_large_span = 0;
+uint64_t spl_bucket_tunable_small_span = 0;
+
+// for XAT & XATB visibility into VBA queue
+static _Atomic uint32_t spl_vba_threads[VMEM_BUCKETS] = { 0 };
+static uint32_t
+    vmem_bucket_id_to_bucket_number[NUMBER_OF_ARENAS_IN_VMEM_INIT] = { 0 };
+boolean_t spl_arc_no_grow(size_t, boolean_t, kmem_cache_t **);
+_Atomic uint64_t spl_arc_no_grow_bits = 0;
+uint64_t spl_arc_no_grow_count = 0;
+
+// compare span ages this many steps from the head of the freelist
+uint64_t spl_frag_max_walk = 1000;
+uint64_t spl_frag_walked_out = 0;
+uint64_t spl_frag_walk_cnt = 0;
+
+extern void spl_free_set_emergency_pressure(int64_t p);
+extern uint64_t segkmem_total_mem_allocated;
+extern uint64_t total_memory;
+
+extern void IOSleep(unsigned milliseconds);
+
+/*
+ * Get a vmem_seg_t from the global segfree list.
+ */
+static vmem_seg_t *
+vmem_getseg_global(void)
+{
+	vmem_seg_t *vsp;
+
+	mutex_enter(&vmem_segfree_lock);
+	if ((vsp = vmem_segfree) != NULL)
+		vmem_segfree = vsp->vs_knext;
+	mutex_exit(&vmem_segfree_lock);
+
+	if (vsp != NULL)
+		vsp->vs_span_createtime = 0;
+
+	return (vsp);
+}
+
+/*
+ * Put a vmem_seg_t on the global segfree list.
+ */
+static void
+vmem_putseg_global(vmem_seg_t *vsp)
+{
+	mutex_enter(&vmem_segfree_lock);
+	vsp->vs_knext = vmem_segfree;
+	vmem_segfree = vsp;
+	mutex_exit(&vmem_segfree_lock);
+}
+
+/*
+ * Get a vmem_seg_t from vmp's segfree list.
+ */
+static vmem_seg_t *
+vmem_getseg(vmem_t *vmp)
+{
+	vmem_seg_t *vsp;
+
+	ASSERT(vmp->vm_nsegfree > 0);
+
+	vsp = vmp->vm_segfree;
+	vmp->vm_segfree = vsp->vs_knext;
+	vmp->vm_nsegfree--;
+
+	return (vsp);
+}
+
+/*
+ * Put a vmem_seg_t on vmp's segfree list.
+ */
+static void
+vmem_putseg(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	vsp->vs_knext = vmp->vm_segfree;
+	vmp->vm_segfree = vsp;
+	vmp->vm_nsegfree++;
+}
+
+
+/*
+ * Add vsp to the appropriate freelist, at the appropriate location,
+ * keeping the freelist sorted by age.
+ */
+
+#define	dprintf(...)
+
+/*
+ * return true when we continue the for loop in
+ * vmem_freelist_insert_sort_by_time
+ */
+static inline bool
+flist_sort_compare(bool newfirst,
+    const vmem_seg_t *vhead,
+    const vmem_seg_t *nextlist,
+    vmem_seg_t *p, vmem_seg_t *to_insert)
+{
+	/*
+	 * vsp is the segment we are inserting into the freelist
+	 * p is a freelist poniter or an element inside a  non-empty freelist
+	 * if we return false, then vsp is inserted immedaitely after p,
+	 */
+
+	// always enter the for loop if we're at the front of a flist
+	if (p == vhead)
+		return (true);
+
+	const vmem_seg_t *n = p->vs_knext;
+
+	if (n == nextlist || n == NULL) {
+		// if we are at the tail of the flist, then
+		// insert vsp between p and n
+		return (false);
+	}
+
+	if (n->vs_import == true && to_insert->vs_import == false) {
+		/*
+		 * put non-imported segments before imported segments
+		 * no matter what their respective create times are,
+		 * thereby making imported segments more likely "age out"
+		 */
+		return (false);  // inserts to_insert between p and n
+	}
+
+	if (newfirst == true) {
+		if (n->vs_span_createtime < to_insert->vs_span_createtime) {
+			// n is older than me, so insert me between p and n
+			return (false);
+		}
+	} else {
+		if (n->vs_span_createtime > to_insert->vs_span_createtime) {
+			// n is newer than me, so insert me between p and n
+			return (false);
+		}
+	}
+	// continue iterating
+	return (true);
+}
+
+static void
+vmem_freelist_insert_sort_by_time(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	ASSERT(vmp->vm_cflags & VMC_TIMEFREE);
+	ASSERT(vsp->vs_span_createtime > 0);
+
+	const bool newfirst = 0 == (vmp->vm_cflags & VMC_OLDFIRST);
+
+	const uint64_t abs_max_walk_steps = 1ULL << 30ULL;
+	uint32_t max_walk_steps = (uint32_t)MIN(spl_frag_max_walk,
+	    abs_max_walk_steps);
+
+	vmem_seg_t *vprev;
+
+	ASSERT(*VMEM_HASH(vmp, vsp->vs_start) != vsp);
+
+	/*
+	 * in vmem_create_common() the freelists are arranged:
+	 * freelist[0].vs_kprev = NULL, freelist[VMEM_FREELISTS].vs_knext = NULL
+	 * freelist[1].vs_kprev = freelist[0], freelist[1].vs_knext =
+	 *		freelist[2] ...
+	 * from vmem_freelist_insert():
+	 * VS_SIZE is the segment size (->vs_end - ->vs_start), so say 8k-512
+	 * highbit is the higest bit set PLUS 1, so in this case would be the
+	 * 16k list. so below, vprev is therefore pointing to the 8k list
+	 * in vmem_alloc, the unconstrained allocation takes, for a 8k-512
+	 * block: vsp = flist[8k].vs_knext
+	 * and calls vmem_seg_create() which sends any leftovers from vsp
+	 * to vmem_freelist_insert
+	 *
+	 * vmem_freelist_insert would take the seg (as above, 8k-512 size),
+	 * vprev points to the 16k list, and VMEM_INSERT(vprev, vsp, k)
+	 * inserts the segment immediately after
+	 *
+	 * so vmem_seg_create(...8k-512...) pushes to the head of the 8k list,
+	 * and vmem_alloc(...8-512k...) will pull from the head of the 8k list
+	 *
+	 * below we may want to push to the TAIL of the 8k list, which is
+	 * just before flist[16k].
+	 */
+
+	vprev = (vmem_seg_t *)&vmp->vm_freelist[highbit(VS_SIZE(vsp)) - 1];
+
+	int my_listnum = highbit(VS_SIZE(vsp)) - 1;
+
+	ASSERT(my_listnum >= 1);
+	ASSERT(my_listnum < VMEM_FREELISTS);
+
+	int next_listnum = my_listnum + 1;
+
+	const vmem_seg_t *nextlist =
+	    (vmem_seg_t *)&vmp->vm_freelist[next_listnum];
+
+	ASSERT(vsp->vs_span_createtime != 0);
+	if (vsp->vs_span_createtime == 0) {
+		printf("SPL: %s: WARNING: vsp->vs_span_createtime == 0 (%s)!\n",
+		    __func__, vmp->vm_name);
+	}
+
+	// continuing our example, starts with p at flist[8k]
+	// and n at the following freelist entry
+
+	const vmem_seg_t *vhead = vprev;
+	vmem_seg_t *p = vprev;
+	vmem_seg_t *n = p->vs_knext;
+
+	// walk from the freelist head looking for
+	// a segment whose creation time is earlier than
+	// the segment to be inserted's creation time,
+	// then insert before that segment.
+
+	for (uint32_t step = 0;
+	    flist_sort_compare(newfirst, vhead, nextlist, p, vsp) == true;
+	    step++) {
+		// iterating while predecessor pointer p was created
+		// at a later tick than funcarg vsp.
+		//
+		// below we set p to n and update n.
+		ASSERT(n != NULL);
+		if (n == nextlist) {
+			dprintf("SPL: %s: at marker (%s)(steps: %u) "
+			    "p->vs_start, end == %lu, %lu\n",
+			    __func__, vmp->vm_name, step,
+			    (uintptr_t)p->vs_start, (uintptr_t)p->vs_end);
+			// IOSleep(1);
+			// the next entry is the next marker (e.g. 16k marker)
+			break;
+		}
+		if (n->vs_start == 0) {
+			// from vmem_freelist_delete, this is a head
+			dprintf("SPL: %s: n->vs_start == 0 (%s)(steps: %u) "
+			    "p->vs_start, end == %lu, %lu\n",
+			    __func__, vmp->vm_name, step,
+			    (uintptr_t)p->vs_start, (uintptr_t)p->vs_end);
+			// IOSleep(1);
+			break;
+		}
+		if (step >= max_walk_steps) {
+			ASSERT(nextlist->vs_kprev != NULL);
+			// we have walked far enough.
+			// put this segment at the tail of the freelist.
+			if (nextlist->vs_kprev != NULL) {
+				n = (vmem_seg_t *)nextlist;
+				p = nextlist->vs_kprev;
+			}
+			dprintf("SPL: %s: walked out (%s)\n", __func__,
+			    vmp->vm_name);
+			// IOSleep(1);
+			atomic_inc_64(&spl_frag_walked_out);
+			break;
+		}
+		if (n->vs_knext == NULL) {
+			dprintf("SPL: %s: n->vs_knext == NULL (my_listnum "
+			    "== %d)\n", __func__, my_listnum);
+			// IOSleep(1);
+			break;
+		}
+		p = n;
+		n = n->vs_knext;
+		atomic_inc_64(&spl_frag_walk_cnt);
+	}
+
+	ASSERT(p != NULL);
+
+	// insert segment between p and n
+
+	vsp->vs_type = VMEM_FREE;
+	vmp->vm_freemap |= VS_SIZE(vprev);
+	VMEM_INSERT(p, vsp, k);
+
+	cv_broadcast(&vmp->vm_cv);
+}
+
+/*
+ * Add vsp to the appropriate freelist.
+ */
+static void
+vmem_freelist_insert(vmem_t *vmp, vmem_seg_t *vsp)
+{
+
+	if (vmp->vm_cflags & VMC_TIMEFREE) {
+		vmem_freelist_insert_sort_by_time(vmp, vsp);
+		return;
+	}
+
+	vmem_seg_t *vprev;
+
+	ASSERT(*VMEM_HASH(vmp, vsp->vs_start) != vsp);
+
+	vprev = (vmem_seg_t *)&vmp->vm_freelist[highbit(VS_SIZE(vsp)) - 1];
+	vsp->vs_type = VMEM_FREE;
+	vmp->vm_freemap |= VS_SIZE(vprev);
+	VMEM_INSERT(vprev, vsp, k);
+
+	cv_broadcast(&vmp->vm_cv);
+}
+
+/*
+ * Take vsp from the freelist.
+ */
+static void
+vmem_freelist_delete(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	ASSERT(*VMEM_HASH(vmp, vsp->vs_start) != vsp);
+	ASSERT(vsp->vs_type == VMEM_FREE);
+
+	if (vsp->vs_knext->vs_start == 0 && vsp->vs_kprev->vs_start == 0) {
+		/*
+		 * The segments on both sides of 'vsp' are freelist heads,
+		 * so taking vsp leaves the freelist at vsp->vs_kprev empty.
+		 */
+		ASSERT(vmp->vm_freemap & VS_SIZE(vsp->vs_kprev));
+		vmp->vm_freemap ^= VS_SIZE(vsp->vs_kprev);
+	}
+	VMEM_DELETE(vsp, k);
+}
+
+/*
+ * Add vsp to the allocated-segment hash table and update kstats.
+ */
+static void
+vmem_hash_insert(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	vmem_seg_t **bucket;
+
+	vsp->vs_type = VMEM_ALLOC;
+	bucket = VMEM_HASH(vmp, vsp->vs_start);
+	vsp->vs_knext = *bucket;
+	*bucket = vsp;
+
+	if (vmem_seg_size == sizeof (vmem_seg_t)) {
+		// vsp->vs_depth = (uint8_t)getpcstack(vsp->vs_stack,
+		//		VMEM_STACK_DEPTH);
+		// vsp->vs_thread = curthread;
+		vsp->vs_depth = 0;
+		vsp->vs_thread = 0;
+		vsp->vs_timestamp = gethrtime();
+	} else {
+		vsp->vs_depth = 0;
+	}
+
+	vmp->vm_kstat.vk_alloc.value.ui64++;
+	vmp->vm_kstat.vk_mem_inuse.value.ui64 += VS_SIZE(vsp);
+}
+
+/*
+ * Remove vsp from the allocated-segment hash table and update kstats.
+ */
+static vmem_seg_t *
+vmem_hash_delete(vmem_t *vmp, uintptr_t addr, size_t size)
+{
+	vmem_seg_t *vsp, **prev_vspp;
+
+	prev_vspp = VMEM_HASH(vmp, addr);
+	while ((vsp = *prev_vspp) != NULL) {
+		if (vsp->vs_start == addr) {
+			*prev_vspp = vsp->vs_knext;
+			break;
+		}
+		vmp->vm_kstat.vk_lookup.value.ui64++;
+		prev_vspp = &vsp->vs_knext;
+	}
+
+	if (vsp == NULL)
+		panic("vmem_hash_delete(%p, %lx, %lu): bad free "
+		    "(name: %s, addr, size)",
+		    (void *)vmp, addr, size, vmp->vm_name);
+	if (VS_SIZE(vsp) != size)
+		panic("vmem_hash_delete(%p, %lx, %lu): (%s) wrong size"
+		    "(expect %lu)",
+		    (void *)vmp, addr, size, vmp->vm_name, VS_SIZE(vsp));
+
+	vmp->vm_kstat.vk_free.value.ui64++;
+	vmp->vm_kstat.vk_mem_inuse.value.ui64 -= size;
+
+	return (vsp);
+}
+
+/*
+ * Create a segment spanning the range [start, end) and add it to the arena.
+ */
+static vmem_seg_t *
+vmem_seg_create(vmem_t *vmp, vmem_seg_t *vprev, uintptr_t start, uintptr_t end)
+{
+	vmem_seg_t *newseg = vmem_getseg(vmp);
+
+	newseg->vs_start = start;
+	newseg->vs_end = end;
+	newseg->vs_type = 0;
+	newseg->vs_import = 0;
+	newseg->vs_span_createtime = 0;
+
+	VMEM_INSERT(vprev, newseg, a);
+
+	return (newseg);
+}
+
+/*
+ * Remove segment vsp from the arena.
+ */
+static void
+vmem_seg_destroy(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	ASSERT(vsp->vs_type != VMEM_ROTOR);
+	VMEM_DELETE(vsp, a);
+
+	vmem_putseg(vmp, vsp);
+}
+
+/*
+ * Add the span [vaddr, vaddr + size) to vmp and update kstats.
+ */
+static vmem_seg_t *
+vmem_span_create(vmem_t *vmp, void *vaddr, size_t size, uint8_t import)
+{
+	vmem_seg_t *newseg, *span;
+	uintptr_t start = (uintptr_t)vaddr;
+	uintptr_t end = start + size;
+
+	ASSERT(MUTEX_HELD(&vmp->vm_lock));
+
+	if ((start | end) & (vmp->vm_quantum - 1))
+		panic("vmem_span_create(%p, %p, %lu): misaligned (%s)",
+		    (void *)vmp, vaddr, size, vmp->vm_name);
+
+	span = vmem_seg_create(vmp, vmp->vm_seg0.vs_aprev, start, end);
+	span->vs_type = VMEM_SPAN;
+	span->vs_import = import;
+
+	hrtime_t t = 0;
+	if (vmp->vm_cflags & VMC_TIMEFREE) {
+		t = gethrtime();
+	}
+	span->vs_span_createtime = t;
+
+	VMEM_INSERT(vmp->vm_seg0.vs_kprev, span, k);
+
+	newseg = vmem_seg_create(vmp, span, start, end);
+	newseg->vs_span_createtime = t;
+
+	vmem_freelist_insert(vmp, newseg);
+
+	if (import)
+		vmp->vm_kstat.vk_mem_import.value.ui64 += size;
+	vmp->vm_kstat.vk_mem_total.value.ui64 += size;
+
+	return (newseg);
+}
+
+/*
+ * Remove span vsp from vmp and update kstats.
+ */
+static void
+vmem_span_destroy(vmem_t *vmp, vmem_seg_t *vsp)
+{
+	vmem_seg_t *span = vsp->vs_aprev;
+	size_t size = VS_SIZE(vsp);
+
+	ASSERT(MUTEX_HELD(&vmp->vm_lock));
+	ASSERT(span->vs_type == VMEM_SPAN);
+
+	if (span->vs_import)
+		vmp->vm_kstat.vk_mem_import.value.ui64 -= size;
+	vmp->vm_kstat.vk_mem_total.value.ui64 -= size;
+
+	VMEM_DELETE(span, k);
+
+	vmem_seg_destroy(vmp, vsp);
+	vmem_seg_destroy(vmp, span);
+}
+
+/*
+ * Allocate the subrange [addr, addr + size) from segment vsp.
+ * If there are leftovers on either side, place them on the freelist.
+ * Returns a pointer to the segment representing [addr, addr + size).
+ */
+static vmem_seg_t *
+vmem_seg_alloc(vmem_t *vmp, vmem_seg_t *vsp, uintptr_t addr, size_t size)
+{
+	uintptr_t vs_start = vsp->vs_start;
+	uintptr_t vs_end = vsp->vs_end;
+	size_t vs_size = vs_end - vs_start;
+	size_t realsize = P2ROUNDUP(size, vmp->vm_quantum);
+	uintptr_t addr_end = addr + realsize;
+
+	ASSERT(P2PHASE(vs_start, vmp->vm_quantum) == 0);
+	ASSERT(P2PHASE(addr, vmp->vm_quantum) == 0);
+	ASSERT(vsp->vs_type == VMEM_FREE);
+	ASSERT(addr >= vs_start && addr_end - 1 <= vs_end - 1);
+	ASSERT(addr - 1 <= addr_end - 1);
+
+	hrtime_t parent_seg_span_createtime = vsp->vs_span_createtime;
+
+	/*
+	 * If we're allocating from the start of the segment, and the
+	 * remainder will be on the same freelist, we can save quite
+	 * a bit of work.
+	 */
+	if (P2SAMEHIGHBIT(vs_size, vs_size - realsize) && addr == vs_start) {
+		ASSERT(highbit(vs_size) == highbit(vs_size - realsize));
+		vsp->vs_start = addr_end;
+		vsp = vmem_seg_create(vmp, vsp->vs_aprev, addr, addr + size);
+		vsp->vs_span_createtime = parent_seg_span_createtime;
+		vmem_hash_insert(vmp, vsp);
+		return (vsp);
+	}
+
+	vmem_freelist_delete(vmp, vsp);
+
+	if (vs_end != addr_end) {
+		vmem_seg_t *v = vmem_seg_create(vmp, vsp, addr_end, vs_end);
+		v->vs_span_createtime = parent_seg_span_createtime;
+		vmem_freelist_insert(vmp, v);
+	}
+
+	if (vs_start != addr) {
+		vmem_seg_t *v =
+		    vmem_seg_create(vmp, vsp->vs_aprev, vs_start, addr);
+		v->vs_span_createtime = parent_seg_span_createtime;
+		vmem_freelist_insert(vmp, v);
+	}
+
+	vsp->vs_start = addr;
+	vsp->vs_end = addr + size;
+
+	vsp->vs_span_createtime = parent_seg_span_createtime;
+
+	vmem_hash_insert(vmp, vsp);
+	return (vsp);
+}
+
+/*
+ * Returns 1 if we are populating, 0 otherwise.
+ * Call it if we want to prevent recursion from HAT.
+ */
+int
+vmem_is_populator()
+{
+	return (mutex_owner(&vmem_sleep_lock) == curthread ||
+	    mutex_owner(&vmem_nosleep_lock) == curthread ||
+	    mutex_owner(&vmem_pushpage_lock) == curthread ||
+	    mutex_owner(&vmem_panic_lock) == curthread);
+}
+
+/*
+ * Populate vmp's segfree list with VMEM_MINFREE vmem_seg_t structures.
+ */
+static int
+vmem_populate(vmem_t *vmp, int vmflag)
+{
+	char *p;
+	vmem_seg_t *vsp;
+	ssize_t nseg;
+	size_t size;
+	kmutex_t *lp;
+	int i;
+
+	while (vmp->vm_nsegfree < VMEM_MINFREE &&
+	    (vsp = vmem_getseg_global()) != NULL)
+		vmem_putseg(vmp, vsp);
+
+	if (vmp->vm_nsegfree >= VMEM_MINFREE)
+		return (1);
+
+	/*
+	 * If we're already populating, tap the reserve.
+	 */
+	if (vmem_is_populator()) {
+		ASSERT(vmp->vm_cflags & VMC_POPULATOR);
+		return (1);
+	}
+
+	mutex_exit(&vmp->vm_lock);
+
+	//	if (panic_thread == curthread)
+	//		lp = &vmem_panic_lock;
+	//	else
+
+	if (vmflag & VM_NOSLEEP)
+		lp = &vmem_nosleep_lock;
+	else if (vmflag & VM_PUSHPAGE)
+		lp = &vmem_pushpage_lock;
+	else
+		lp = &vmem_sleep_lock;
+
+	mutex_enter(lp);
+
+	nseg = VMEM_MINFREE + vmem_populators * VMEM_POPULATE_RESERVE;
+	size = P2ROUNDUP(nseg * vmem_seg_size, vmem_seg_arena->vm_quantum);
+	nseg = size / vmem_seg_size;
+
+	/*
+	 * The following vmem_alloc() may need to populate vmem_seg_arena
+	 * and all the things it imports from.  When doing so, it will tap
+	 * each arena's reserve to prevent recursion (see the block comment
+	 * above the definition of VMEM_POPULATE_RESERVE).
+	 */
+	p = vmem_alloc(vmem_seg_arena, size, vmflag & VM_KMFLAGS);
+	if (p == NULL) {
+		mutex_exit(lp);
+		mutex_enter(&vmp->vm_lock);
+		vmp->vm_kstat.vk_populate_fail.value.ui64++;
+		return (0);
+	}
+
+	/*
+	 * Restock the arenas that may have been depleted during population.
+	 */
+	for (i = 0; i < vmem_populators; i++) {
+		mutex_enter(&vmem_populator[i]->vm_lock);
+		while (vmem_populator[i]->vm_nsegfree < VMEM_POPULATE_RESERVE)
+			vmem_putseg(vmem_populator[i],
+			    (vmem_seg_t *)(p + --nseg * vmem_seg_size));
+		mutex_exit(&vmem_populator[i]->vm_lock);
+	}
+
+	mutex_exit(lp);
+	mutex_enter(&vmp->vm_lock);
+
+	/*
+	 * Now take our own segments.
+	 */
+	ASSERT(nseg >= VMEM_MINFREE);
+	while (vmp->vm_nsegfree < VMEM_MINFREE)
+		vmem_putseg(vmp, (vmem_seg_t *)(p + --nseg * vmem_seg_size));
+
+	/*
+	 * Give the remainder to charity.
+	 */
+	while (nseg > 0)
+		vmem_putseg_global((vmem_seg_t *)(p + --nseg * vmem_seg_size));
+
+	return (1);
+}
+
+/*
+ * Advance a walker from its previous position to 'afterme'.
+ * Note: may drop and reacquire vmp->vm_lock.
+ */
+static void
+vmem_advance(vmem_t *vmp, vmem_seg_t *walker, vmem_seg_t *afterme)
+{
+	vmem_seg_t *vprev = walker->vs_aprev;
+	vmem_seg_t *vnext = walker->vs_anext;
+	vmem_seg_t *vsp = NULL;
+
+	VMEM_DELETE(walker, a);
+
+	if (afterme != NULL)
+		VMEM_INSERT(afterme, walker, a);
+
+	/*
+	 * The walker segment's presence may have prevented its neighbors
+	 * from coalescing.  If so, coalesce them now.
+	 */
+	if (vprev->vs_type == VMEM_FREE) {
+		if (vnext->vs_type == VMEM_FREE) {
+			ASSERT(vprev->vs_end == vnext->vs_start);
+			ASSERT(vprev->vs_span_createtime ==
+			    vnext->vs_span_createtime);
+			vmem_freelist_delete(vmp, vnext);
+			vmem_freelist_delete(vmp, vprev);
+			vprev->vs_end = vnext->vs_end;
+			vmem_freelist_insert(vmp, vprev);
+			vmem_seg_destroy(vmp, vnext);
+		}
+		vsp = vprev;
+	} else if (vnext->vs_type == VMEM_FREE) {
+		vsp = vnext;
+	}
+
+	/*
+	 * vsp could represent a complete imported span,
+	 * in which case we must return it to the source.
+	 */
+	if (vsp != NULL && vsp->vs_aprev->vs_import &&
+	    vmp->vm_source_free != NULL &&
+	    vsp->vs_aprev->vs_type == VMEM_SPAN &&
+	    vsp->vs_anext->vs_type == VMEM_SPAN) {
+		void *vaddr = (void *)vsp->vs_start;
+		size_t size = VS_SIZE(vsp);
+		ASSERT(size == VS_SIZE(vsp->vs_aprev));
+		vmem_freelist_delete(vmp, vsp);
+		vmem_span_destroy(vmp, vsp);
+		vmp->vm_kstat.vk_parent_free.value.ui64++;
+		mutex_exit(&vmp->vm_lock);
+		vmp->vm_source_free(vmp->vm_source, vaddr, size);
+		mutex_enter(&vmp->vm_lock);
+	}
+}
+
+/*
+ * VM_NEXTFIT allocations deliberately cycle through all virtual addresses
+ * in an arena, so that we avoid reusing addresses for as long as possible.
+ * This helps to catch used-after-freed bugs.  It's also the perfect policy
+ * for allocating things like process IDs, where we want to cycle through
+ * all values in order.
+ */
+static void *
+vmem_nextfit_alloc(vmem_t *vmp, size_t size, int vmflag)
+{
+	vmem_seg_t *vsp, *rotor;
+	uintptr_t addr;
+	size_t realsize = P2ROUNDUP(size, vmp->vm_quantum);
+	size_t vs_size;
+
+	mutex_enter(&vmp->vm_lock);
+
+	if (vmp->vm_nsegfree < VMEM_MINFREE && !vmem_populate(vmp, vmflag)) {
+		mutex_exit(&vmp->vm_lock);
+		return (NULL);
+	}
+
+	/*
+	 * The common case is that the segment right after the rotor is free,
+	 * and large enough that extracting 'size' bytes won't change which
+	 * freelist it's on.  In this case we can avoid a *lot* of work.
+	 * Instead of the normal vmem_seg_alloc(), we just advance the start
+	 * address of the victim segment.  Instead of moving the rotor, we
+	 * create the new segment structure *behind the rotor*, which has
+	 * the same effect.  And finally, we know we don't have to coalesce
+	 * the rotor's neighbors because the new segment lies between them.
+	 */
+	rotor = &vmp->vm_rotor;
+	vsp = rotor->vs_anext;
+	if (vsp->vs_type == VMEM_FREE && (vs_size = VS_SIZE(vsp)) > realsize &&
+	    P2SAMEHIGHBIT(vs_size, vs_size - realsize)) {
+		ASSERT(highbit(vs_size) == highbit(vs_size - realsize));
+		addr = vsp->vs_start;
+		vsp->vs_start = addr + realsize;
+		hrtime_t t = vsp->vs_span_createtime;
+		vmem_hash_insert(vmp,
+		    vmem_seg_create(vmp, rotor->vs_aprev, addr, addr + size));
+		vsp->vs_span_createtime = t;
+		mutex_exit(&vmp->vm_lock);
+		return ((void *)addr);
+	}
+
+	/*
+	 * Starting at the rotor, look for a segment large enough to
+	 * satisfy the allocation.
+	 */
+	for (;;) {
+		atomic_inc_64(&vmp->vm_kstat.vk_search.value.ui64);
+		if (vsp->vs_type == VMEM_FREE && VS_SIZE(vsp) >= size)
+			break;
+		vsp = vsp->vs_anext;
+		if (vsp == rotor) {
+			/*
+			 * We've come full circle.  One possibility is that the
+			 * there's actually enough space, but the rotor itself
+			 * is preventing the allocation from succeeding because
+			 * it's sitting between two free segments.  Therefore,
+			 * we advance the rotor and see if that liberates a
+			 * suitable segment.
+			 */
+			vmem_advance(vmp, rotor, rotor->vs_anext);
+			vsp = rotor->vs_aprev;
+			if (vsp->vs_type == VMEM_FREE && VS_SIZE(vsp) >= size)
+				break;
+			/*
+			 * If there's a lower arena we can import from, or it's
+			 * a VM_NOSLEEP allocation, let vmem_xalloc() handle it.
+			 * Otherwise, wait until another thread frees something.
+			 */
+			if (vmp->vm_source_alloc != NULL ||
+			    (vmflag & VM_NOSLEEP)) {
+				mutex_exit(&vmp->vm_lock);
+				return (vmem_xalloc(vmp, size, vmp->vm_quantum,
+				    0, 0, NULL, NULL,
+				    vmflag & (VM_KMFLAGS | VM_NEXTFIT)));
+			}
+			atomic_inc_64(&vmp->vm_kstat.vk_wait.value.ui64);
+			atomic_inc_64(
+			    &vmp->vm_kstat.vk_threads_waiting.value.ui64);
+			atomic_inc_64(&spl_vmem_threads_waiting);
+			if (spl_vmem_threads_waiting > 1)
+				printf("SPL: %s: waiting for %lu sized alloc "
+				    "after full circle of  %s, waiting "
+				    "threads %llu, total threads waiting "
+				    "= %llu.\n",
+				    __func__, size, vmp->vm_name,
+				    vmp->vm_kstat.vk_threads_waiting.value.ui64,
+				    spl_vmem_threads_waiting);
+			cv_wait(&vmp->vm_cv, &vmp->vm_lock);
+			atomic_dec_64(&spl_vmem_threads_waiting);
+			atomic_dec_64(
+			    &vmp->vm_kstat.vk_threads_waiting.value.ui64);
+			vsp = rotor->vs_anext;
+		}
+	}
+
+	/*
+	 * We found a segment.  Extract enough space to satisfy the allocation.
+	 */
+	addr = vsp->vs_start;
+	vsp = vmem_seg_alloc(vmp, vsp, addr, size);
+	ASSERT(vsp->vs_type == VMEM_ALLOC &&
+	    vsp->vs_start == addr && vsp->vs_end == addr + size);
+
+	/*
+	 * Advance the rotor to right after the newly-allocated segment.
+	 * That's where the next VM_NEXTFIT allocation will begin searching.
+	 */
+	vmem_advance(vmp, rotor, vsp);
+	mutex_exit(&vmp->vm_lock);
+	return ((void *)addr);
+}
+
+/*
+ * Checks if vmp is guaranteed to have a size-byte buffer somewhere on its
+ * freelist.  If size is not a power-of-2, it can return a false-negative.
+ *
+ * Used to decide if a newly imported span is superfluous after re-acquiring
+ * the arena lock.
+ */
+static int
+vmem_canalloc(vmem_t *vmp, size_t size)
+{
+	int hb;
+	int flist = 0;
+	ASSERT(MUTEX_HELD(&vmp->vm_lock));
+
+	if ((size & (size - 1)) == 0)
+		flist = lowbit(P2ALIGN(vmp->vm_freemap, size));
+	else if ((hb = highbit(size)) < VMEM_FREELISTS)
+		flist = lowbit(P2ALIGN(vmp->vm_freemap, 1ULL << hb));
+
+	return (flist);
+}
+
+// Convenience functions for use when gauging
+// allocation ability when not holding the lock.
+// These are unreliable because vmp->vm_freemap is
+// liable to change immediately after being examined.
+int
+vmem_canalloc_lock(vmem_t *vmp, size_t size)
+{
+	mutex_enter(&vmp->vm_lock);
+	int i = vmem_canalloc(vmp, size);
+	mutex_exit(&vmp->vm_lock);
+	return (i);
+}
+
+int
+vmem_canalloc_atomic(vmem_t *vmp, size_t size)
+{
+	int hb;
+	int flist = 0;
+
+	ulong_t freemap =
+	    __c11_atomic_load((_Atomic ulong_t *)&vmp->vm_freemap,
+	    __ATOMIC_SEQ_CST);
+
+	if (ISP2(size))
+		flist = lowbit(P2ALIGN(freemap, size));
+	else if ((hb = highbit(size)) < VMEM_FREELISTS)
+		flist = lowbit(P2ALIGN(freemap, 1ULL << hb));
+
+	return (flist);
+}
+
+static inline uint64_t
+spl_vmem_xnu_useful_bytes_free(void)
+{
+	extern volatile unsigned int vm_page_free_wanted;
+	extern volatile unsigned int vm_page_free_count;
+	extern volatile unsigned int vm_page_free_min;
+
+	if (vm_page_free_wanted > 0)
+		return (0);
+
+	uint64_t bytes_free = (uint64_t)vm_page_free_count * (uint64_t)PAGESIZE;
+	uint64_t bytes_min = (uint64_t)vm_page_free_min * (uint64_t)PAGESIZE;
+
+	if (bytes_free <= bytes_min)
+		return (0);
+
+	uint64_t useful_free = bytes_free - bytes_min;
+
+	return (useful_free);
+}
+
+uint64_t
+vmem_xnu_useful_bytes_free(void)
+{
+	return (spl_vmem_xnu_useful_bytes_free());
+}
+
+
+static void *
+spl_vmem_malloc_unconditionally_unlocked(size_t size)
+{
+	extern void *osif_malloc(uint64_t);
+	atomic_inc_64(&spl_vmem_unconditional_allocs);
+	atomic_add_64(&spl_vmem_unconditional_alloc_bytes, size);
+	return (osif_malloc(size));
+}
+
+static void *
+spl_vmem_malloc_unconditionally(size_t size)
+{
+	mutex_enter(&vmem_xnu_alloc_lock);
+	void *m = spl_vmem_malloc_unconditionally_unlocked(size);
+	mutex_exit(&vmem_xnu_alloc_lock);
+	return (m);
+}
+
+static void *
+spl_vmem_malloc_if_no_pressure(size_t size)
+{
+	// The mutex serializes concurrent callers, providing time for
+	// the variables in spl_vmem_xnu_useful_bytes_free() to be updated.
+	mutex_enter(&vmem_xnu_alloc_lock);
+	if (spl_vmem_xnu_useful_bytes_free() > (MAX(size, 1024ULL*1024ULL))) {
+		extern void *osif_malloc(uint64_t);
+		void *p = osif_malloc(size);
+		if (p != NULL) {
+			spl_vmem_conditional_allocs++;
+			spl_vmem_conditional_alloc_bytes += size;
+		}
+		mutex_exit(&vmem_xnu_alloc_lock);
+		return (p);
+	} else {
+		spl_vmem_conditional_alloc_deny++;
+		spl_vmem_conditional_alloc_deny_bytes += size;
+		mutex_exit(&vmem_xnu_alloc_lock);
+		return (NULL);
+	}
+}
+
+/*
+ * Allocate size bytes at offset phase from an align boundary such that the
+ * resulting segment [addr, addr + size) is a subset of [minaddr, maxaddr)
+ * that does not straddle a nocross-aligned boundary.
+ */
+void *
+vmem_xalloc(vmem_t *vmp, size_t size, size_t align_arg, size_t phase,
+    size_t nocross, void *minaddr, void *maxaddr, int vmflag)
+{
+	vmem_seg_t *vsp;
+	vmem_seg_t *vbest = NULL;
+	uintptr_t addr, taddr, start, end;
+	uintptr_t align = (align_arg != 0) ? align_arg : vmp->vm_quantum;
+	void *vaddr, *xvaddr = NULL;
+	size_t xsize;
+	int hb, flist, resv;
+	uint32_t mtbf;
+
+	if ((align | phase | nocross) & (vmp->vm_quantum - 1))
+		panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): "
+		    "parameters not vm_quantum aligned",
+		    (void *)vmp, size, align_arg, phase, nocross,
+		    minaddr, maxaddr, vmflag);
+
+	if (nocross != 0 &&
+	    (align > nocross || P2ROUNDUP(phase + size, align) > nocross))
+		panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): "
+		    "overconstrained allocation",
+		    (void *)vmp, size, align_arg, phase, nocross,
+		    minaddr, maxaddr, vmflag);
+
+	if (phase >= align || (align & (align - 1)) != 0 ||
+	    (nocross & (nocross - 1)) != 0)
+		panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): "
+		    "parameters inconsistent or invalid",
+		    (void *)vmp, size, align_arg, phase, nocross,
+		    minaddr, maxaddr, vmflag);
+
+	if ((mtbf = vmem_mtbf | vmp->vm_mtbf) != 0 && gethrtime() % mtbf == 0 &&
+	    (vmflag & (VM_NOSLEEP | VM_PANIC)) == VM_NOSLEEP)
+		return (NULL);
+
+	mutex_enter(&vmp->vm_lock);
+	for (;;) {
+		if (vmp->vm_nsegfree < VMEM_MINFREE &&
+		    !vmem_populate(vmp, vmflag))
+			break;
+do_alloc:
+		/*
+		 * highbit() returns the highest bit + 1, which is exactly
+		 * what we want: we want to search the first freelist whose
+		 * members are *definitely* large enough to satisfy our
+		 * allocation.  However, there are certain cases in which we
+		 * want to look at the next-smallest freelist (which *might*
+		 * be able to satisfy the allocation):
+		 *
+		 * (1)	The size is exactly a power of 2, in which case
+		 *	the smaller freelist is always big enough;
+		 *
+		 * (2)	All other freelists are empty;
+		 *
+		 * (3)	We're in the highest possible freelist, which is
+		 *	always empty (e.g. the 4GB freelist on 32-bit systems);
+		 *
+		 * (4)	We're doing a best-fit or first-fit allocation.
+		 */
+		if ((size & (size - 1)) == 0) {
+			flist = lowbit(P2ALIGN(vmp->vm_freemap, size));
+		} else {
+			hb = highbit(size);
+			if ((vmp->vm_freemap >> hb) == 0 ||
+			    hb == VMEM_FREELISTS ||
+			    (vmflag & (VM_BESTFIT | VM_FIRSTFIT)))
+				hb--;
+			flist = lowbit(P2ALIGN(vmp->vm_freemap, 1UL << hb));
+		}
+
+		for (vbest = NULL, vsp = (flist == 0) ? NULL :
+		    vmp->vm_freelist[flist - 1].vs_knext;
+		    vsp != NULL; vsp = vsp->vs_knext) {
+			atomic_inc_64(&vmp->vm_kstat.vk_search.value.ui64);
+			if (vsp->vs_start == 0) {
+				/*
+				 * We're moving up to a larger freelist,
+				 * so if we've already found a candidate,
+				 * the fit can't possibly get any better.
+				 */
+				if (vbest != NULL)
+					break;
+				/*
+				 * Find the next non-empty freelist.
+				 */
+				flist = lowbit(P2ALIGN(vmp->vm_freemap,
+				    VS_SIZE(vsp)));
+				if (flist-- == 0)
+					break;
+				vsp = (vmem_seg_t *)&vmp->vm_freelist[flist];
+				ASSERT(vsp->vs_knext->vs_type == VMEM_FREE);
+				continue;
+			}
+			if (vsp->vs_end - 1 < (uintptr_t)minaddr)
+				continue;
+			if (vsp->vs_start > (uintptr_t)maxaddr - 1)
+				continue;
+			start = MAX(vsp->vs_start, (uintptr_t)minaddr);
+			end = MIN(vsp->vs_end - 1, (uintptr_t)maxaddr - 1) + 1;
+			taddr = P2PHASEUP(start, align, phase);
+			if (P2BOUNDARY(taddr, size, nocross))
+				taddr +=
+				    P2ROUNDUP(P2NPHASE(taddr, nocross), align);
+			if ((taddr - start) + size > end - start ||
+			    (vbest != NULL && VS_SIZE(vsp) >= VS_SIZE(vbest)))
+				continue;
+			vbest = vsp;
+			addr = taddr;
+			if (!(vmflag & VM_BESTFIT) || VS_SIZE(vbest) == size)
+				break;
+		}
+		if (vbest != NULL)
+			break;
+		ASSERT(xvaddr == NULL);
+		if (size == 0)
+			panic("vmem_xalloc(): size == 0");
+		if (vmp->vm_source_alloc != NULL && nocross == 0 &&
+		    minaddr == NULL && maxaddr == NULL) {
+			size_t aneeded, asize;
+			size_t aquantum = MAX(vmp->vm_quantum,
+			    vmp->vm_source->vm_quantum);
+			size_t aphase = phase;
+			if ((align > aquantum) &&
+			    !(vmp->vm_cflags & VMC_XALIGN)) {
+				aphase = (P2PHASE(phase, aquantum) != 0) ?
+				    align - vmp->vm_quantum : align - aquantum;
+				ASSERT(aphase >= phase);
+			}
+			aneeded = MAX(size + aphase, vmp->vm_min_import);
+			asize = P2ROUNDUP(aneeded, aquantum);
+
+			if (asize < size) {
+				/*
+				 * The rounding induced overflow; return NULL
+				 * if we are permitted to fail the allocation
+				 * (and explicitly panic if we aren't).
+				 */
+				if ((vmflag & VM_NOSLEEP) &&
+				    !(vmflag & VM_PANIC)) {
+					mutex_exit(&vmp->vm_lock);
+					return (NULL);
+				}
+
+				panic("vmem_xalloc(): size overflow");
+			}
+
+			/*
+			 * Determine how many segment structures we'll consume.
+			 * The calculation must be precise because if we're
+			 * here on behalf of vmem_populate(), we are taking
+			 * segments from a very limited reserve.
+			 */
+			if (size == asize && !(vmp->vm_cflags & VMC_XALLOC))
+				resv = VMEM_SEGS_PER_SPAN_CREATE +
+				    VMEM_SEGS_PER_EXACT_ALLOC;
+			else if (phase == 0 &&
+			    align <= vmp->vm_source->vm_quantum)
+				resv = VMEM_SEGS_PER_SPAN_CREATE +
+				    VMEM_SEGS_PER_LEFT_ALLOC;
+			else
+				resv = VMEM_SEGS_PER_ALLOC_MAX;
+
+			ASSERT(vmp->vm_nsegfree >= resv);
+			vmp->vm_nsegfree -= resv;	/* reserve our segs */
+			mutex_exit(&vmp->vm_lock);
+			if (vmp->vm_cflags & VMC_XALLOC) {
+				// size_t oasize = asize;
+				vaddr = ((vmem_ximport_t *)
+				    vmp->vm_source_alloc)(vmp->vm_source,
+				    &asize, align, vmflag & VM_KMFLAGS);
+				// ASSERT(asize >= oasize);
+				ASSERT(P2PHASE(asize,
+				    vmp->vm_source->vm_quantum) == 0);
+				ASSERT(!(vmp->vm_cflags & VMC_XALIGN) ||
+				    IS_P2ALIGNED(vaddr, align));
+			} else {
+				atomic_inc_64(
+				    &vmp->vm_kstat.vk_parent_alloc.value.ui64);
+				vaddr = vmp->vm_source_alloc(vmp->vm_source,
+				    asize, vmflag & (VM_KMFLAGS | VM_NEXTFIT));
+			}
+			mutex_enter(&vmp->vm_lock);
+			vmp->vm_nsegfree += resv;	/* claim reservation */
+			aneeded = size + align - vmp->vm_quantum;
+			aneeded = P2ROUNDUP(aneeded, vmp->vm_quantum);
+			if (vaddr != NULL) {
+				/*
+				 * Since we dropped the vmem lock while
+				 * calling the import function, other
+				 * threads could have imported space
+				 * and made our import unnecessary.  In
+				 * order to save space, we return
+				 * excess imports immediately.
+				 */
+				// but if there are threads waiting below,
+				// do not return the excess import, rather
+				// wake those threads up so they can use it.
+				if (asize > aneeded &&
+				    vmp->vm_source_free != NULL &&
+				    vmp->vm_kstat.vk_threads_waiting.value.ui64
+				    == 0 && vmem_canalloc(vmp, aneeded)) {
+					ASSERT(resv >=
+					    VMEM_SEGS_PER_MIDDLE_ALLOC);
+					xvaddr = vaddr;
+					xsize = asize;
+					goto do_alloc;
+				} else if (
+				    vmp->vm_kstat.vk_threads_waiting.value.ui64
+				    > 0) {
+					vmp->vm_kstat.vk_excess.value.ui64++;
+					cv_broadcast(&vmp->vm_cv);
+				}
+				vbest = vmem_span_create(vmp, vaddr, asize, 1);
+				addr = P2PHASEUP(vbest->vs_start, align, phase);
+				break;
+			} else if (vmem_canalloc(vmp, aneeded)) {
+				/*
+				 * Our import failed, but another thread
+				 * added sufficient free memory to the arena
+				 * to satisfy our request.  Go back and
+				 * grab it.
+				 */
+				ASSERT(resv >= VMEM_SEGS_PER_MIDDLE_ALLOC);
+				goto do_alloc;
+			}
+		}
+
+		/*
+		 * If the requestor chooses to fail the allocation attempt
+		 * rather than reap wait and retry - get out of the loop.
+		 */
+		if (vmflag & VM_ABORT)
+			break;
+		mutex_exit(&vmp->vm_lock);
+
+#if 0
+		if (vmp->vm_cflags & VMC_IDENTIFIER)
+			kmem_reap_idspace();
+		else
+			kmem_reap();
+#endif
+
+		mutex_enter(&vmp->vm_lock);
+		if (vmflag & VM_NOSLEEP)
+			break;
+		atomic_inc_64(&vmp->vm_kstat.vk_wait.value.ui64);
+		atomic_inc_64(&vmp->vm_kstat.vk_threads_waiting.value.ui64);
+		atomic_inc_64(&spl_vmem_threads_waiting);
+		if (spl_vmem_threads_waiting > 0) {
+			printf("SPL: %s: vmem waiting for %lu sized alloc "
+			    "for %s, waiting threads %llu, total threads "
+			    "waiting = %llu\n",
+			    __func__, size, vmp->vm_name,
+			    vmp->vm_kstat.vk_threads_waiting.value.ui64,
+			    spl_vmem_threads_waiting);
+			extern int64_t spl_free_set_and_wait_pressure(int64_t,
+			    boolean_t, clock_t);
+			extern int64_t spl_free_manual_pressure_wrapper(void);
+			mutex_exit(&vmp->vm_lock);
+			// release other waiting threads
+			spl_free_set_pressure(0);
+			int64_t target_pressure = size *
+			    spl_vmem_threads_waiting;
+			int64_t delivered_pressure =
+			    spl_free_set_and_wait_pressure(target_pressure,
+			    TRUE, USEC2NSEC(500));
+			printf("SPL: %s: pressure %lld targeted, %lld "
+			    "delivered\n", __func__, target_pressure,
+			    delivered_pressure);
+			mutex_enter(&vmp->vm_lock);
+		}
+		cv_wait(&vmp->vm_cv, &vmp->vm_lock);
+		atomic_dec_64(&spl_vmem_threads_waiting);
+		atomic_dec_64(&vmp->vm_kstat.vk_threads_waiting.value.ui64);
+	}
+	if (vbest != NULL) {
+		ASSERT(vbest->vs_type == VMEM_FREE);
+		ASSERT(vbest->vs_knext != vbest);
+		/* re-position to end of buffer */
+		if (vmflag & VM_ENDALLOC) {
+			addr += ((vbest->vs_end - (addr + size)) / align) *
+			    align;
+		}
+		(void) vmem_seg_alloc(vmp, vbest, addr, size);
+		mutex_exit(&vmp->vm_lock);
+		if (xvaddr) {
+			atomic_inc_64(&vmp->vm_kstat.vk_parent_free.value.ui64);
+			vmp->vm_source_free(vmp->vm_source, xvaddr, xsize);
+		}
+		ASSERT(P2PHASE(addr, align) == phase);
+		ASSERT(!P2BOUNDARY(addr, size, nocross));
+		ASSERT(addr >= (uintptr_t)minaddr);
+		ASSERT(addr + size - 1 <= (uintptr_t)maxaddr - 1);
+		return ((void *)addr);
+	}
+	if (0 == (vmflag & VM_NO_VBA)) {
+		vmp->vm_kstat.vk_fail.value.ui64++;
+	}
+	mutex_exit(&vmp->vm_lock);
+	if (vmflag & VM_PANIC)
+		panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): "
+		    "cannot satisfy mandatory allocation",
+		    (void *)vmp, size, align_arg, phase, nocross,
+		    minaddr, maxaddr, vmflag);
+	ASSERT(xvaddr == NULL);
+	return (NULL);
+}
+
+/*
+ * Free the segment [vaddr, vaddr + size), where vaddr was a constrained
+ * allocation.  vmem_xalloc() and vmem_xfree() must always be paired because
+ * both routines bypass the quantum caches.
+ */
+void
+vmem_xfree(vmem_t *vmp, void *vaddr, size_t size)
+{
+	vmem_seg_t *vsp, *vnext, *vprev;
+
+	mutex_enter(&vmp->vm_lock);
+
+	vsp = vmem_hash_delete(vmp, (uintptr_t)vaddr, size);
+	vsp->vs_end = P2ROUNDUP(vsp->vs_end, vmp->vm_quantum);
+
+	/*
+	 * Attempt to coalesce with the next segment.
+	 */
+	vnext = vsp->vs_anext;
+	if (vnext->vs_type == VMEM_FREE) {
+		ASSERT(vsp->vs_end == vnext->vs_start);
+		vmem_freelist_delete(vmp, vnext);
+		vsp->vs_end = vnext->vs_end;
+		vmem_seg_destroy(vmp, vnext);
+	}
+
+	/*
+	 * Attempt to coalesce with the previous segment.
+	 */
+	vprev = vsp->vs_aprev;
+	if (vprev->vs_type == VMEM_FREE) {
+		ASSERT(vprev->vs_end == vsp->vs_start);
+		vmem_freelist_delete(vmp, vprev);
+		vprev->vs_end = vsp->vs_end;
+		vmem_seg_destroy(vmp, vsp);
+		vsp = vprev;
+	}
+
+	/*
+	 * If the entire span is free, return it to the source.
+	 */
+	if (vsp->vs_aprev->vs_import && vmp->vm_source_free != NULL &&
+	    vsp->vs_aprev->vs_type == VMEM_SPAN &&
+	    vsp->vs_anext->vs_type == VMEM_SPAN) {
+		vaddr = (void *)vsp->vs_start;
+		size = VS_SIZE(vsp);
+		ASSERT(size == VS_SIZE(vsp->vs_aprev));
+		vmem_span_destroy(vmp, vsp);
+		vmp->vm_kstat.vk_parent_free.value.ui64++;
+		mutex_exit(&vmp->vm_lock);
+		vmp->vm_source_free(vmp->vm_source, vaddr, size);
+	} else {
+		vmem_freelist_insert(vmp, vsp);
+		mutex_exit(&vmp->vm_lock);
+	}
+}
+
+/*
+ * Allocate size bytes from arena vmp.  Returns the allocated address
+ * on success, NULL on failure.  vmflag specifies VM_SLEEP or VM_NOSLEEP,
+ * and may also specify best-fit, first-fit, or next-fit allocation policy
+ * instead of the default instant-fit policy.  VM_SLEEP allocations are
+ * guaranteed to succeed.
+ */
+void *
+vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
+{
+	vmem_seg_t *vsp;
+	uintptr_t addr;
+	int hb;
+	int flist = 0;
+	uint32_t mtbf;
+
+	if (size - 1 < vmp->vm_qcache_max)
+		return (kmem_cache_alloc(vmp->vm_qcache[(size - 1) >>
+		    vmp->vm_qshift], vmflag & VM_KMFLAGS));
+
+	if ((mtbf = vmem_mtbf | vmp->vm_mtbf) != 0 && gethrtime() % mtbf == 0 &&
+	    (vmflag & (VM_NOSLEEP | VM_PANIC)) == VM_NOSLEEP)
+		return (NULL);
+
+	if (vmflag & VM_NEXTFIT)
+		return (vmem_nextfit_alloc(vmp, size, vmflag));
+
+	if (vmflag & (VM_BESTFIT | VM_FIRSTFIT))
+		return (vmem_xalloc(vmp, size, vmp->vm_quantum, 0, 0,
+		    NULL, NULL, vmflag));
+	if (vmp->vm_cflags & VM_NEXTFIT)
+		return (vmem_nextfit_alloc(vmp, size, vmflag));
+
+	/*
+	 * Unconstrained instant-fit allocation from the segment list.
+	 */
+	mutex_enter(&vmp->vm_lock);
+
+	if (vmp->vm_nsegfree >= VMEM_MINFREE || vmem_populate(vmp, vmflag)) {
+		if ((size & (size - 1)) == 0)
+			flist = lowbit(P2ALIGN(vmp->vm_freemap, size));
+		else if ((hb = highbit(size)) < VMEM_FREELISTS)
+			flist = lowbit(P2ALIGN(vmp->vm_freemap, 1UL << hb));
+	}
+
+	if (flist-- == 0) {
+		mutex_exit(&vmp->vm_lock);
+		return (vmem_xalloc(vmp, size, vmp->vm_quantum,
+		    0, 0, NULL, NULL, vmflag));
+	}
+
+	ASSERT(size <= (1UL << flist));
+	vsp = vmp->vm_freelist[flist].vs_knext;
+	addr = vsp->vs_start;
+	if (vmflag & VM_ENDALLOC) {
+		addr += vsp->vs_end - (addr + size);
+	}
+	(void) vmem_seg_alloc(vmp, vsp, addr, size);
+	mutex_exit(&vmp->vm_lock);
+	return ((void *)addr);
+}
+
+/*
+ * Free the segment [vaddr, vaddr + size).
+ */
+void
+vmem_free(vmem_t *vmp, void *vaddr, size_t size)
+{
+	if (size - 1 < vmp->vm_qcache_max)
+		kmem_cache_free(vmp->vm_qcache[(size - 1) >> vmp->vm_qshift],
+		    vaddr);
+	else
+		vmem_xfree(vmp, vaddr, size);
+}
+
+/*
+ * Determine whether arena vmp contains the segment [vaddr, vaddr + size).
+ */
+int
+vmem_contains(vmem_t *vmp, void *vaddr, size_t size)
+{
+	uintptr_t start = (uintptr_t)vaddr;
+	uintptr_t end = start + size;
+	vmem_seg_t *vsp;
+	vmem_seg_t *seg0 = &vmp->vm_seg0;
+
+	mutex_enter(&vmp->vm_lock);
+	vmp->vm_kstat.vk_contains.value.ui64++;
+	for (vsp = seg0->vs_knext; vsp != seg0; vsp = vsp->vs_knext) {
+		vmp->vm_kstat.vk_contains_search.value.ui64++;
+		ASSERT(vsp->vs_type == VMEM_SPAN);
+		if (start >= vsp->vs_start && end - 1 <= vsp->vs_end - 1)
+			break;
+	}
+	mutex_exit(&vmp->vm_lock);
+	return (vsp != seg0);
+}
+
+/*
+ * Add the span [vaddr, vaddr + size) to arena vmp.
+ */
+void *
+vmem_add(vmem_t *vmp, void *vaddr, size_t size, int vmflag)
+{
+	if (vaddr == NULL || size == 0)
+		panic("vmem_add(%p, %p, %lu): bad arguments",
+		    (void *)vmp, vaddr, size);
+
+	ASSERT(!vmem_contains(vmp, vaddr, size));
+
+	mutex_enter(&vmp->vm_lock);
+	if (vmem_populate(vmp, vmflag))
+		(void) vmem_span_create(vmp, vaddr, size, 0);
+	else
+		vaddr = NULL;
+	mutex_exit(&vmp->vm_lock);
+	return (vaddr);
+}
+
+/*
+ * Walk the vmp arena, applying func to each segment matching typemask.
+ * If VMEM_REENTRANT is specified, the arena lock is dropped across each
+ * call to func(); otherwise, it is held for the duration of vmem_walk()
+ * to ensure a consistent snapshot.  Note that VMEM_REENTRANT callbacks
+ * are *not* necessarily consistent, so they may only be used when a hint
+ * is adequate.
+ */
+void
+vmem_walk(vmem_t *vmp, int typemask,
+    void (*func)(void *, void *, size_t), void *arg)
+{
+	vmem_seg_t *vsp;
+	vmem_seg_t *seg0 = &vmp->vm_seg0;
+	vmem_seg_t walker;
+
+	if (typemask & VMEM_WALKER)
+		return;
+
+	bzero(&walker, sizeof (walker));
+	walker.vs_type = VMEM_WALKER;
+
+	mutex_enter(&vmp->vm_lock);
+	VMEM_INSERT(seg0, &walker, a);
+	for (vsp = seg0->vs_anext; vsp != seg0; vsp = vsp->vs_anext) {
+		if (vsp->vs_type & typemask) {
+			void *start = (void *)vsp->vs_start;
+			size_t size = VS_SIZE(vsp);
+			if (typemask & VMEM_REENTRANT) {
+				vmem_advance(vmp, &walker, vsp);
+				mutex_exit(&vmp->vm_lock);
+				func(arg, start, size);
+				mutex_enter(&vmp->vm_lock);
+				vsp = &walker;
+			} else {
+				func(arg, start, size);
+			}
+		}
+	}
+	vmem_advance(vmp, &walker, NULL);
+	mutex_exit(&vmp->vm_lock);
+}
+
+/*
+ * Return the total amount of memory whose type matches typemask.  Thus:
+ *
+ *	typemask VMEM_ALLOC yields total memory allocated (in use).
+ *	typemask VMEM_FREE yields total memory free (available).
+ *	typemask (VMEM_ALLOC | VMEM_FREE) yields total arena size.
+ */
+size_t
+vmem_size(vmem_t *vmp, int typemask)
+{
+	int64_t size = 0;
+
+	if (typemask & VMEM_ALLOC)
+		size += (int64_t)vmp->vm_kstat.vk_mem_inuse.value.ui64;
+	if (typemask & VMEM_FREE)
+		size += (int64_t)vmp->vm_kstat.vk_mem_total.value.ui64 -
+		    (int64_t)vmp->vm_kstat.vk_mem_inuse.value.ui64;
+	if (size < 0)
+		size = 0;
+
+	return ((size_t)size);
+}
+
+size_t
+vmem_size_locked(vmem_t *vmp, int typemask)
+{
+	boolean_t m = (mutex_owner(&vmp->vm_lock) == curthread);
+
+	if (!m)
+		mutex_enter(&vmp->vm_lock);
+	size_t s = vmem_size(vmp, typemask);
+	if (!m)
+		mutex_exit(&vmp->vm_lock);
+	return (s);
+}
+
+size_t
+vmem_size_semi_atomic(vmem_t *vmp, int typemask)
+{
+	int64_t size = 0;
+	uint64_t inuse = 0;
+	uint64_t total = 0;
+
+	__sync_swap(&total, vmp->vm_kstat.vk_mem_total.value.ui64);
+	__sync_swap(&inuse, vmp->vm_kstat.vk_mem_inuse.value.ui64);
+
+	int64_t inuse_signed = (int64_t)inuse;
+	int64_t total_signed = (int64_t)total;
+
+	if (typemask & VMEM_ALLOC)
+		size += inuse_signed;
+	if (typemask & VMEM_FREE)
+		size += total_signed - inuse_signed;
+
+	if (size < 0)
+		size = 0;
+
+	return ((size_t)size);
+}
+
+size_t
+spl_vmem_size(vmem_t *vmp, int typemask)
+{
+	return (vmem_size_locked(vmp, typemask));
+}
+
+/*
+ * Create an arena called name whose initial span is [base, base + size).
+ * The arena's natural unit of currency is quantum, so vmem_alloc()
+ * guarantees quantum-aligned results.  The arena may import new spans
+ * by invoking afunc() on source, and may return those spans by invoking
+ * ffunc() on source.  To make small allocations fast and scalable,
+ * the arena offers high-performance caching for each integer multiple
+ * of quantum up to qcache_max.
+ */
+static vmem_t *
+vmem_create_common(const char *name, void *base, size_t size, size_t quantum,
+    void *(*afunc)(vmem_t *, size_t, int),
+    void (*ffunc)(vmem_t *, void *, size_t),
+    vmem_t *source, size_t qcache_max, int vmflag)
+{
+	int i;
+	size_t nqcache;
+	vmem_t *vmp, *cur, **vmpp;
+	vmem_seg_t *vsp;
+	vmem_freelist_t *vfp;
+	uint32_t id = atomic_inc_32_nv(&vmem_id);
+
+	if (vmem_vmem_arena != NULL) {
+		vmp = vmem_alloc(vmem_vmem_arena, sizeof (vmem_t),
+		    vmflag & VM_KMFLAGS);
+	} else {
+		ASSERT(id <= VMEM_INITIAL);
+		vmp = &vmem0[id - 1];
+	}
+
+	/* An identifier arena must inherit from another identifier arena */
+	ASSERT(source == NULL || ((source->vm_cflags & VMC_IDENTIFIER) ==
+	    (vmflag & VMC_IDENTIFIER)));
+
+	if (vmp == NULL)
+		return (NULL);
+	bzero(vmp, sizeof (vmem_t));
+
+	(void) snprintf(vmp->vm_name, VMEM_NAMELEN, "%s", name);
+	mutex_init(&vmp->vm_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vmp->vm_cv, NULL, CV_DEFAULT, NULL);
+	vmp->vm_cflags = vmflag;
+	vmflag &= VM_KMFLAGS;
+
+	hrtime_t hrnow = gethrtime();
+
+	vmp->vm_createtime = hrnow;
+
+	vmp->vm_quantum = quantum;
+	vmp->vm_qshift = highbit(quantum) - 1;
+	nqcache = MIN(qcache_max >> vmp->vm_qshift, VMEM_NQCACHE_MAX);
+
+	for (i = 0; i <= VMEM_FREELISTS; i++) {
+		vfp = &vmp->vm_freelist[i];
+		vfp->vs_end = 1UL << i;
+		vfp->vs_knext = (vmem_seg_t *)(vfp + 1);
+		vfp->vs_kprev = (vmem_seg_t *)(vfp - 1);
+	}
+
+	vmp->vm_freelist[0].vs_kprev = NULL;
+	vmp->vm_freelist[VMEM_FREELISTS].vs_knext = NULL;
+	vmp->vm_freelist[VMEM_FREELISTS].vs_end = 0;
+	vmp->vm_hash_table = vmp->vm_hash0;
+	vmp->vm_hash_mask = VMEM_HASH_INITIAL - 1;
+	vmp->vm_hash_shift = highbit(vmp->vm_hash_mask);
+
+	vsp = &vmp->vm_seg0;
+	vsp->vs_anext = vsp;
+	vsp->vs_aprev = vsp;
+	vsp->vs_knext = vsp;
+	vsp->vs_kprev = vsp;
+	vsp->vs_type = VMEM_SPAN;
+	vsp->vs_span_createtime = hrnow;
+
+	vsp = &vmp->vm_rotor;
+	vsp->vs_type = VMEM_ROTOR;
+	VMEM_INSERT(&vmp->vm_seg0, vsp, a);
+
+	bcopy(&vmem_kstat_template, &vmp->vm_kstat, sizeof (vmem_kstat_t));
+
+	vmp->vm_id = id;
+	if (source != NULL)
+		vmp->vm_kstat.vk_source_id.value.ui32 = source->vm_id;
+	vmp->vm_source = source;
+	vmp->vm_source_alloc = afunc;
+	vmp->vm_source_free = ffunc;
+
+	/*
+	 * Some arenas (like vmem_metadata and kmem_metadata) cannot
+	 * use quantum caching to lower fragmentation.  Instead, we
+	 * increase their imports, giving a similar effect.
+	 */
+	if (vmp->vm_cflags & VMC_NO_QCACHE) {
+		if (qcache_max > VMEM_NQCACHE_MAX && ISP2(qcache_max)) {
+			vmp->vm_min_import = qcache_max;
+		} else {
+			vmp->vm_min_import =
+			    VMEM_QCACHE_SLABSIZE(nqcache << vmp->vm_qshift);
+		}
+		nqcache = 0;
+	}
+
+	if (nqcache != 0) {
+		ASSERT(!(vmflag & VM_NOSLEEP));
+		vmp->vm_qcache_max = nqcache << vmp->vm_qshift;
+		for (i = 0; i < nqcache; i++) {
+			char buf[VMEM_NAMELEN + 21];
+			(void) snprintf(buf, VMEM_NAMELEN + 20, "%s_%lu",
+			    vmp->vm_name, (i + 1) * quantum);
+			vmp->vm_qcache[i] = kmem_cache_create(buf,
+			    (i + 1) * quantum, quantum, NULL, NULL, NULL,
+			    NULL, vmp, KMC_QCACHE | KMC_NOTOUCH);
+		}
+	}
+
+	if ((vmp->vm_ksp = kstat_create("vmem", vmp->vm_id, vmp->vm_name,
+	    "vmem", KSTAT_TYPE_NAMED, sizeof (vmem_kstat_t) /
+	    sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) != NULL) {
+		vmp->vm_ksp->ks_data = &vmp->vm_kstat;
+		kstat_install(vmp->vm_ksp);
+	}
+
+	mutex_enter(&vmem_list_lock);
+	vmpp = &vmem_list;
+	while ((cur = *vmpp) != NULL)
+		vmpp = &cur->vm_next;
+	*vmpp = vmp;
+	mutex_exit(&vmem_list_lock);
+
+	if (vmp->vm_cflags & VMC_POPULATOR) {
+		ASSERT(vmem_populators < VMEM_INITIAL);
+		vmem_populator[atomic_inc_32_nv(&vmem_populators) - 1] = vmp;
+		mutex_enter(&vmp->vm_lock);
+		(void) vmem_populate(vmp, vmflag | VM_PANIC);
+		mutex_exit(&vmp->vm_lock);
+	}
+
+	if ((base || size) && vmem_add(vmp, base, size, vmflag) == NULL) {
+		vmem_destroy(vmp);
+		return (NULL);
+	}
+
+	return (vmp);
+}
+
+vmem_t *
+vmem_xcreate(const char *name, void *base, size_t size, size_t quantum,
+    vmem_ximport_t *afunc, vmem_free_t *ffunc, vmem_t *source,
+    size_t qcache_max, int vmflag)
+{
+	ASSERT(!(vmflag & (VMC_POPULATOR | VMC_XALLOC)));
+	vmflag &= ~(VMC_POPULATOR | VMC_XALLOC);
+
+	return (vmem_create_common(name, base, size, quantum,
+	    (vmem_alloc_t *)afunc, ffunc, source, qcache_max,
+	    vmflag | VMC_XALLOC));
+}
+
+vmem_t *
+vmem_create(const char *name, void *base, size_t size, size_t quantum,
+    vmem_alloc_t *afunc, vmem_free_t *ffunc, vmem_t *source,
+    size_t qcache_max, int vmflag)
+{
+	ASSERT(!(vmflag & (VMC_XALLOC | VMC_XALIGN)));
+	vmflag &= ~(VMC_XALLOC | VMC_XALIGN);
+
+	return (vmem_create_common(name, base, size, quantum,
+	    afunc, ffunc, source, qcache_max, vmflag));
+}
+
+/*
+ * Destroy arena vmp.
+ */
+void
+vmem_destroy(vmem_t *vmp)
+{
+	vmem_t *cur, **vmpp;
+	vmem_seg_t *seg0 = &vmp->vm_seg0;
+	vmem_seg_t *vsp, *anext;
+	size_t leaked;
+
+	/*
+	 * set vm_nsegfree to zero because vmem_free_span_list
+	 * would have already freed vm_segfree.
+	 */
+	vmp->vm_nsegfree = 0;
+	mutex_enter(&vmem_list_lock);
+	vmpp = &vmem_list;
+	while ((cur = *vmpp) != vmp)
+		vmpp = &cur->vm_next;
+	*vmpp = vmp->vm_next;
+	mutex_exit(&vmem_list_lock);
+
+	leaked = vmem_size(vmp, VMEM_ALLOC);
+	if (leaked != 0)
+		printf("SPL: vmem_destroy('%s'): leaked %lu %s\n",
+		    vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ?
+		    "identifiers" : "bytes");
+
+	if (vmp->vm_hash_table != vmp->vm_hash0)
+		vmem_free(vmem_hash_arena, vmp->vm_hash_table,
+		    (vmp->vm_hash_mask + 1) * sizeof (void *));
+
+	/*
+	 * Give back the segment structures for anything that's left in the
+	 * arena, e.g. the primary spans and their free segments.
+	 */
+	VMEM_DELETE(&vmp->vm_rotor, a);
+	for (vsp = seg0->vs_anext; vsp != seg0; vsp = anext) {
+		anext = vsp->vs_anext;
+		vmem_putseg_global(vsp);
+	}
+
+	while (vmp->vm_nsegfree > 0)
+		vmem_putseg_global(vmem_getseg(vmp));
+
+	kstat_delete(vmp->vm_ksp);
+
+	mutex_destroy(&vmp->vm_lock);
+	cv_destroy(&vmp->vm_cv);
+	vmem_free(vmem_vmem_arena, vmp, sizeof (vmem_t));
+}
+
+
+/*
+ * Destroy arena vmp.
+ */
+void
+vmem_destroy_internal(vmem_t *vmp)
+{
+	vmem_t *cur, **vmpp;
+	vmem_seg_t *seg0 = &vmp->vm_seg0;
+	vmem_seg_t *vsp, *anext;
+	size_t leaked;
+
+	mutex_enter(&vmem_list_lock);
+	vmpp = &vmem_list;
+	while ((cur = *vmpp) != vmp)
+		vmpp = &cur->vm_next;
+	*vmpp = vmp->vm_next;
+	mutex_exit(&vmem_list_lock);
+
+	leaked = vmem_size(vmp, VMEM_ALLOC);
+	if (leaked != 0)
+		printf("SPL: vmem_destroy('%s'): leaked %lu %s\n",
+		    vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ?
+		    "identifiers" : "bytes");
+
+	if (vmp->vm_hash_table != vmp->vm_hash0)
+		if (vmem_hash_arena != NULL)
+			vmem_free(vmem_hash_arena, vmp->vm_hash_table,
+			    (vmp->vm_hash_mask + 1) * sizeof (void *));
+
+	/*
+	 * Give back the segment structures for anything that's left in the
+	 * arena, e.g. the primary spans and their free segments.
+	 */
+	VMEM_DELETE(&vmp->vm_rotor, a);
+	for (vsp = seg0->vs_anext; vsp != seg0; vsp = anext) {
+		anext = vsp->vs_anext;
+		vmem_putseg_global(vsp);
+	}
+
+	while (vmp->vm_nsegfree > 0)
+		vmem_putseg_global(vmem_getseg(vmp));
+
+	if (!(vmp->vm_cflags & VMC_IDENTIFIER) &&
+	    vmem_size(vmp, VMEM_ALLOC) != 0)
+		printf("SPL: vmem_destroy('%s'): STILL %lu bytes at "
+		    "kstat_delete() time\n",
+		    vmp->vm_name, vmem_size(vmp, VMEM_ALLOC));
+
+	kstat_delete(vmp->vm_ksp);
+
+	mutex_destroy(&vmp->vm_lock);
+	cv_destroy(&vmp->vm_cv);
+
+	// Alas, to free, requires access to "vmem_vmem_arena" the very thing
+	// we release first.
+	// vmem_free(vmem_vmem_arena, vmp, sizeof (vmem_t));
+}
+
+/*
+ * Only shrink vmem hashtable if it is 1<<vmem_rescale_minshift times (8x)
+ * larger than necessary.
+ */
+int vmem_rescale_minshift = 3;
+
+/*
+ * Resize vmp's hash table to keep the average lookup depth near 1.0.
+ */
+static void
+vmem_hash_rescale(vmem_t *vmp)
+{
+	vmem_seg_t **old_table, **new_table, *vsp;
+	size_t old_size, new_size, h, nseg;
+
+	nseg = (size_t)(vmp->vm_kstat.vk_alloc.value.ui64 -
+	    vmp->vm_kstat.vk_free.value.ui64);
+
+	new_size = MAX(VMEM_HASH_INITIAL, 1 << (highbit(3 * nseg + 4) - 2));
+	old_size = vmp->vm_hash_mask + 1;
+
+	if ((old_size >> vmem_rescale_minshift) <= new_size &&
+	    new_size <= (old_size << 1))
+		return;
+
+	new_table = vmem_alloc(vmem_hash_arena, new_size * sizeof (void *),
+	    VM_NOSLEEP);
+	if (new_table == NULL)
+		return;
+	bzero(new_table, new_size * sizeof (void *));
+
+	mutex_enter(&vmp->vm_lock);
+
+	old_size = vmp->vm_hash_mask + 1;
+	old_table = vmp->vm_hash_table;
+
+	vmp->vm_hash_mask = new_size - 1;
+	vmp->vm_hash_table = new_table;
+	vmp->vm_hash_shift = highbit(vmp->vm_hash_mask);
+
+	for (h = 0; h < old_size; h++) {
+		vsp = old_table[h];
+		while (vsp != NULL) {
+			uintptr_t addr = vsp->vs_start;
+			vmem_seg_t *next_vsp = vsp->vs_knext;
+			vmem_seg_t **hash_bucket = VMEM_HASH(vmp, addr);
+			vsp->vs_knext = *hash_bucket;
+			*hash_bucket = vsp;
+			vsp = next_vsp;
+		}
+	}
+
+	mutex_exit(&vmp->vm_lock);
+
+	if (old_table != vmp->vm_hash0)
+		vmem_free(vmem_hash_arena, old_table,
+		    old_size * sizeof (void *));
+}
+
+/*
+ * Perform periodic maintenance on all vmem arenas.
+ */
+
+void
+vmem_update(void *dummy)
+{
+	vmem_t *vmp;
+
+	mutex_enter(&vmem_list_lock);
+	for (vmp = vmem_list; vmp != NULL; vmp = vmp->vm_next) {
+		/*
+		 * If threads are waiting for resources, wake them up
+		 * periodically so they can issue another kmem_reap()
+		 * to reclaim resources cached by the slab allocator.
+		 */
+		cv_broadcast(&vmp->vm_cv);
+
+		/*
+		 * Rescale the hash table to keep the hash chains short.
+		 */
+		vmem_hash_rescale(vmp);
+	}
+	mutex_exit(&vmem_list_lock);
+
+	(void) bsd_timeout(vmem_update, dummy, &vmem_update_interval);
+}
+
+void
+vmem_qcache_reap(vmem_t *vmp)
+{
+	int i;
+
+	/*
+	 * Reap any quantum caches that may be part of this vmem.
+	 */
+	for (i = 0; i < VMEM_NQCACHE_MAX; i++)
+		if (vmp->vm_qcache[i])
+			kmem_cache_reap_now(vmp->vm_qcache[i]);
+}
+
+/* given a size, return the appropriate vmem_bucket_arena[] entry */
+
+static inline uint16_t
+vmem_bucket_number(size_t size)
+{
+	// For VMEM_BUCKET_HIBIT == 12,
+	// vmem_bucket_arena[n] holds allocations from 2^[n+11]+1 to  2^[n+12],
+	// so for [n] = 0, 2049-4096, for [n]=5 65537-131072,
+	// for [n]=7 (256k+1)-512k
+	// set hb: 512k == 19, 256k+1 == 19, 256k == 18, ...
+	const int hb = highbit(size-1);
+
+	int bucket = hb - VMEM_BUCKET_LOWBIT;
+
+	// very large allocations go into the 16 MiB bucket
+	if (hb > VMEM_BUCKET_HIBIT)
+		bucket = VMEM_BUCKET_HIBIT - VMEM_BUCKET_LOWBIT;
+
+	// very small allocations go into the 4 kiB bucket
+	if (bucket < 0)
+		bucket = 0;
+
+	return (bucket);
+}
+
+static inline vmem_t *
+vmem_bucket_arena_by_size(size_t size)
+{
+	uint16_t bucket = vmem_bucket_number(size);
+
+	return (vmem_bucket_arena[bucket]);
+}
+
+vmem_t *
+spl_vmem_bucket_arena_by_size(size_t size)
+{
+	return (vmem_bucket_arena_by_size(size));
+}
+
+static inline void
+vmem_bucket_wake_all_waiters(void)
+{
+	for (int i = VMEM_BUCKET_LOWBIT; i < VMEM_BUCKET_HIBIT; i++) {
+		const int bucket = i - VMEM_BUCKET_LOWBIT;
+		vmem_t *bvmp = vmem_bucket_arena[bucket];
+		cv_broadcast(&bvmp->vm_cv);
+	}
+	cv_broadcast(&spl_heap_arena->vm_cv);
+}
+
+/*
+ * xnu_alloc_throttled_bail() : spin looking for memory
+ *
+ */
+
+static inline void *
+xnu_alloc_throttled_bail(uint64_t now_ticks, vmem_t *calling_vmp,
+    size_t size, int vmflags)
+{
+	// spin looking for memory
+	const uint64_t bigtarget = MAX(size, 16ULL*1024ULL*1024ULL);
+	static volatile _Atomic bool alloc_lock = false;
+	static volatile _Atomic uint64_t force_time = 0;
+
+	uint64_t timeout_ticks = hz / 2;
+	if (vmflags & VM_PUSHPAGE)
+		timeout_ticks = hz / 4;
+
+	uint64_t timeout_time = now_ticks + timeout_ticks;
+
+	for (uint32_t suspends = 0, blocked_suspends = 0,
+	    try_no_pressure = 0; /* empty */; /* empty */) {
+		if (force_time + timeout_ticks > timeout_time) {
+			// another thread has forced an allocation
+			// by timing out.  push our deadline into the future.
+			timeout_time = force_time + timeout_ticks;
+		}
+		if (alloc_lock) {
+			blocked_suspends++;
+			IOSleep(1);
+		} else	if (spl_vmem_xnu_useful_bytes_free() >= bigtarget) {
+			bool f = false;
+			// if alloc_lock == f then alloc_lock = true and result
+			// is true otherwise result is false and f = true
+			if (!__c11_atomic_compare_exchange_strong(&alloc_lock,
+			    &f, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+				/*
+				 * avoid (highly unlikely) data race on
+				 * alloc_lock. if alloc_lock has become true
+				 * while we were in the else if expression
+				 * then we effectively optimize away the
+				 * (relaxed) load of alloc_lock (== true)
+				 * into f and continue.
+				 */
+				continue;
+			}
+			// alloc_lock is now visible as true to all threads
+			try_no_pressure++;
+			void *m = spl_vmem_malloc_if_no_pressure(size);
+			if (m != NULL) {
+				uint64_t ticks = zfs_lbolt() - now_ticks;
+				printf("SPL: %s returning %llu bytes after "
+				    "%llu ticks (hz=%u, seconds = %llu), "
+				    "%u suspends, %u blocked, %u tries (%s)\n",
+				    __func__, (uint64_t)size,
+				    ticks, hz, ticks/hz, suspends,
+				    blocked_suspends, try_no_pressure,
+				    calling_vmp->vm_name);
+				// atomic seq cst, so is published to all
+				// threads
+				alloc_lock = false;
+				return (m);
+			} else {
+				alloc_lock = false;
+				spl_free_set_emergency_pressure(bigtarget);
+				suspends++;
+				IOSleep(1);
+			}
+		} else if (zfs_lbolt() > timeout_time) {
+			bool f = false;
+			if (!__c11_atomic_compare_exchange_strong(&alloc_lock,
+			    &f, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+				// avoid (highly unlikely) data race on
+				// alloc_lock as above
+				continue;
+			}
+			void *mp = spl_vmem_malloc_unconditionally(size);
+			uint64_t now = zfs_lbolt();
+			uint64_t ticks = now - now_ticks;
+			force_time = now;
+			printf("SPL: %s TIMEOUT %llu bytes after "
+			    "%llu ticks (hz=%u, seconds=%llu), "
+			    "%u suspends, %u blocked, %u tries (%s)\n",
+			    __func__, (uint64_t)size,
+			    ticks, hz, ticks/hz, suspends,
+			    blocked_suspends, try_no_pressure,
+			    calling_vmp->vm_name);
+			alloc_lock = false;
+			atomic_inc_64(&spl_xat_forced);
+			return (mp);
+		} else {
+			spl_free_set_emergency_pressure(bigtarget);
+			suspends++;
+			IOSleep(1);
+		}
+	}
+}
+
+static void *
+xnu_alloc_throttled(vmem_t *bvmp, size_t size, int vmflag)
+{
+	// the caller is one of the bucket arenas.
+	// null_vmp will be spl_default_arena_parent, which is
+	// just a placeholder.
+
+	uint64_t now = zfs_lbolt();
+	const uint64_t entry_now = now;
+
+	void *m = spl_vmem_malloc_if_no_pressure(size);
+
+	if (m != NULL) {
+		atomic_inc_64(&spl_xat_success);
+		spl_xat_lastalloc = gethrtime();
+		// wake up waiters on all the arena condvars
+		// since there is apparently no memory shortage.
+		vmem_bucket_wake_all_waiters();
+		return (m);
+	} else {
+		spl_free_set_emergency_pressure((int64_t)size);
+	}
+
+	if (vmflag & VM_PANIC) {
+		// force an allocation now to avoid a panic
+		spl_xat_lastalloc = gethrtime();
+		spl_free_set_emergency_pressure(4LL * (int64_t)size);
+		void *p = spl_vmem_malloc_unconditionally(size);
+		// p cannot be NULL (unconditional kernel malloc always works
+		// or panics)
+		// therefore: success, wake all waiters on alloc|free condvar
+		// wake up arena waiters to let them know there is memory
+		// available in the arena; let waiters on other bucket arenas
+		// continue sleeping.
+		cv_broadcast(&bvmp->vm_cv);
+		return (p);
+	}
+
+	if (vmflag & VM_NOSLEEP) {
+		spl_free_set_emergency_pressure(MAX(2LL * (int64_t)size,
+		    16LL*1024LL*1024LL));
+		/* cheating a bit, but not really waiting */
+		kpreempt(KPREEMPT_SYNC);
+		void *p = spl_vmem_malloc_if_no_pressure(size);
+		if (p != NULL) {
+			atomic_inc_64(&spl_xat_late_success_nosleep);
+			cv_broadcast(&bvmp->vm_cv);
+			spl_xat_lastalloc = gethrtime();
+		}
+		// if p == NULL, then there will be an increment in
+		// the fail kstat
+		return (p);
+	}
+
+	/*
+	 * Loop for a while trying to satisfy VM_SLEEP allocations.
+	 *
+	 * If we are able to allocate memory, then return the pointer.
+	 *
+	 * We return NULL if some other thread's activity has caused
+	 * sufficient memory to appear in this arena that we can satisfy
+	 * the allocation.
+	 *
+	 * We call xnu_alloc_throttle_bail() after a few milliseconds of
+	 * waiting; it will either return a pointer to newly allocated
+	 * memory or NULL.  We return the result.
+	 *
+	 */
+
+	const uint32_t bucket_number =
+	    vmem_bucket_id_to_bucket_number[bvmp->vm_id];
+	static volatile _Atomic uint32_t waiters = 0;
+
+	waiters++;
+
+	if (waiters == 1UL)
+		atomic_inc_64(&spl_xat_no_waiters);
+
+	static _Atomic uint32_t max_waiters_seen = 0;
+
+	if (waiters > max_waiters_seen) {
+		max_waiters_seen = waiters;
+		printf("SPL: %s: max_waiters_seen increased to %u\n", __func__,
+		    max_waiters_seen);
+	}
+
+	boolean_t local_xat_pressured = false;
+
+	for (; /* empty */; /* empty */) {
+		clock_t wait_time = USEC2NSEC(500UL * MAX(waiters, 1UL));
+		mutex_enter(&bvmp->vm_lock);
+		spl_xat_sleep++;
+		if (local_xat_pressured) {
+			spl_xat_pressured++;
+			local_xat_pressured = false;
+		}
+		(void) cv_timedwait_hires(&bvmp->vm_cv, &bvmp->vm_lock,
+		    wait_time, 0, 0);
+		mutex_exit(&bvmp->vm_lock);
+		now = zfs_lbolt();
+		// We may be here because of a broadcast to &vmp->vm_cv,
+		// causing xnu to schedule all the sleepers in priority-weighted
+		// FIFO order.  Because of the mutex_exit(), the sections below
+		// here may be entered concurrently.
+		// spl_vmem_malloc_if_no_pressure does a mutex, so avoid calling
+		// it unless there is a chance it will succeed.
+		if (spl_vmem_xnu_useful_bytes_free() > (MAX(size,
+		    16ULL*1024ULL*1024ULL))) {
+			void *a = spl_vmem_malloc_if_no_pressure(size);
+			if (a != NULL) {
+				atomic_inc_64(&spl_xat_late_success);
+				spl_xat_lastalloc = gethrtime();
+				waiters--;
+				// Wake up all waiters on the bucket arena
+				// locks, since the system apparently has
+				// memory again.
+				vmem_bucket_wake_all_waiters();
+				return (a);
+			} else {
+				// Probably vm_page_free_count changed while
+				// we were in the mutex queue in
+				// spl_vmem_malloc_if_no_pressure(). There is
+				// therefore no point in doing the bail-out
+				// check below, so go back to the top of the
+				// for loop.
+				atomic_inc_64(&spl_xat_late_deny);
+				continue;
+			}
+		}
+		if (now > entry_now + hz / 4 ||
+		    spl_vba_threads[bucket_number] > 1UL) {
+			// If there are other threads waiting for us
+			// in vba() then when we satisfy this allocation,
+			// we satisfy more than one thread, so invoke XATB().
+			// Otherwise, if we have had no luck for 250 ms, then
+			// switch to XATB() which is much more aggressive.
+			if (spl_vba_threads[bucket_number] > 1UL)
+				atomic_inc_64(&spl_xat_bailed_contended);
+			atomic_inc_64(&spl_xat_bailed);
+			static _Atomic uint32_t bailing_threads = 0;
+			static _Atomic uint32_t max_bailers_seen = 0;
+			bailing_threads++;
+			if (bailing_threads > max_bailers_seen) {
+				max_bailers_seen = bailing_threads;
+				printf("SPL: %s: max_bailers_seen increased "
+				    "to %u\n", __func__, max_bailers_seen);
+			}
+			void *b =
+			    xnu_alloc_throttled_bail(now, bvmp, size, vmflag);
+			bailing_threads--;
+			spl_xat_lastalloc = gethrtime();
+			// wake up waiters on the arena lock,
+			// since they now have memory they can use.
+			cv_broadcast(&bvmp->vm_cv);
+			// open turnstile after having bailed, rather
+			// than before
+			waiters--;
+			return (b);
+		} else if (now - entry_now > 0 &&
+		    ((now - entry_now) % (hz/10))) {
+			spl_free_set_emergency_pressure(MAX(size,
+			    16LL*1024LL*1024LL));
+			local_xat_pressured = true;
+		}
+	}
+}
+
+static void
+xnu_free_throttled(vmem_t *vmp, void *vaddr, size_t size)
+{
+	extern void osif_free(void *, uint64_t);
+
+	// Serialize behind a (short) spin-sleep delay, giving
+	// xnu time to do freelist management and
+	// PT teardowns
+
+	// In the usual case there is only one thread in this function,
+	// so we proceed waitlessly to osif_free().
+
+	// When there are multiple threads here, we delay the 2nd and later.
+
+	// Explict race:
+	// The osif_free() is not protected by the vmem_xnu_alloc_lock
+	// mutex; that is just used for implementing the delay.   Consequently,
+	// the waiters on the same lock in spl_vmem_malloc_if_no_pressure may
+	// falsely see too small a value for vm_page_free_count.   We don't
+	// care in part because xnu performs poorly when doing
+	// free-then-allocate anwyay.
+
+	// a_waiters gauges the loop exit checking and sleep duration;
+	// it is a count of the number of threads trying to do work
+	// in this function.
+	static volatile _Atomic uint32_t a_waiters = 0;
+
+	// is_freeing protects the osif_free() call; see comment below
+	static volatile _Atomic bool is_freeing = false;
+
+	a_waiters++; // generates "lock incl ..."
+
+	static _Atomic uint32_t max_waiters_seen = 0;
+
+	if (a_waiters > max_waiters_seen) {
+		max_waiters_seen = a_waiters;
+		printf("SPL: %s: max_waiters_seen increased to %u\n",
+		    __func__, max_waiters_seen);
+	}
+
+	for (uint32_t iter = 0; a_waiters > 1UL; iter++) {
+		// there is more than one thread here, so suspend and
+		// sleep for 1 ms
+		atomic_inc_64(&spl_xft_wait);
+		IOSleep(1);
+		// If are growing old in this loop, then see if
+		// anyone else is still in osif_free.  If not,
+		// we can exit.
+		if (iter >= a_waiters) {
+			// if is_freeing == f, then set is_freeing to true with
+			// release semantics (i.e. "push" it to other cores)
+			// then break; otherwise, set f to true relaxedly (i.e.,
+			// optimize it out)
+			bool f = false;
+			if (__c11_atomic_compare_exchange_weak(&is_freeing,
+			    &f, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
+				break;
+			}
+		}
+	}
+	// If there is more than one thread in this function, osif_free() is
+	// protected by is_freeing.   Release it after the osif_free()
+	// call has been made and the lastfree bookkeeping has been done.
+	osif_free(vaddr, size);
+	spl_xat_lastfree = gethrtime();
+	is_freeing = false;
+	a_waiters--;
+	kpreempt(KPREEMPT_SYNC);
+	// since we just gave back xnu enough to satisfy an allocation
+	// in at least the smaller buckets, let's wake up anyone in
+	// the cv_wait() in vmem_xalloc([bucket_#], ...)
+	vmem_bucket_wake_all_waiters();
+}
+
+// return 0 if the bit was unset before the atomic OR.
+static inline bool
+vba_atomic_lock_bucket(volatile _Atomic uint16_t *bbap, uint16_t bucket_bit)
+{
+
+	// We use a test-and-set of the appropriate bit
+	// in buckets_busy_allocating; if it was not set,
+	// then break out of the loop.
+	//
+	// This compiles into an orl, cmpxchgw instruction pair.
+	// the return from __c11_atomic_fetch_or() is the
+	// previous value of buckets_busy_allocating.
+
+	uint16_t prev =
+	    __c11_atomic_fetch_or(bbap, bucket_bit, __ATOMIC_SEQ_CST);
+	if (prev & bucket_bit)
+		return (false); // we did not acquire the bit lock here
+	else
+		return (true); // we turned the bit from 0 to 1
+}
+
+static void *
+vmem_bucket_alloc(vmem_t *null_vmp, size_t size, const int vmflags)
+{
+
+	if (vmflags & VM_NO_VBA)
+		return (NULL);
+
+	// caller is spl_heap_arena looking for memory.
+	// null_vmp will be spl_default_arena_parent, and so
+	// is just a placeholder.
+
+	vmem_t *calling_arena = spl_heap_arena;
+
+	static volatile _Atomic uint32_t hipriority_allocators = 0;
+	boolean_t local_hipriority_allocator = false;
+
+	if (0 != (vmflags & (VM_PUSHPAGE | VM_NOSLEEP | VM_PANIC | VM_ABORT))) {
+		local_hipriority_allocator = true;
+		hipriority_allocators++;
+	}
+
+	if (!ISP2(size))
+		atomic_inc_64(&spl_bucket_non_pow2_allocs);
+
+	vmem_t *bvmp = vmem_bucket_arena_by_size(size);
+
+	// there are 13 buckets, so use a 16-bit scalar to hold
+	// a set of bits, where each bit corresponds to an in-progress
+	// vmem_alloc(bucket, ...) below.
+
+	static volatile _Atomic uint16_t buckets_busy_allocating = 0;
+	const uint16_t bucket_number = vmem_bucket_number(size);
+	const uint16_t bucket_bit = (uint16_t)1 << bucket_number;
+
+	spl_vba_threads[bucket_number]++;
+
+	static volatile _Atomic uint32_t waiters = 0;
+
+	// First, if we are VM_SLEEP, check for memory, try some pressure,
+	// and if that doesn't work, force entry into the loop below.
+
+	bool loop_once = false;
+
+	if ((vmflags & (VM_NOSLEEP | VM_PANIC | VM_ABORT)) == 0 &&
+	    ! vmem_canalloc_atomic(bvmp, size)) {
+		if (spl_vmem_xnu_useful_bytes_free() < (MAX(size,
+		    16ULL*1024ULL*1024ULL))) {
+			spl_free_set_emergency_pressure(size);
+			IOSleep(1);
+			if (!vmem_canalloc_atomic(bvmp, size) &&
+			    (spl_vmem_xnu_useful_bytes_free() < (MAX(size,
+			    16ULL*1024ULL*1024ULL)))) {
+				loop_once = true;
+			}
+		}
+	}
+
+	// spin-sleep: if we would need to go to the xnu allocator.
+	//
+	// We want to avoid a burst of allocs from bucket_heap's children
+	// successively hitting a low-memory condition, or alternatively
+	// each successfully importing memory from xnu when they can share
+	// a single import.
+	//
+	// We also want to take advantage of any memory that becomes available
+	// in bucket_heap.
+	//
+	// If there is more than one thread in this function (~ few percent)
+	// then the subsequent threads are put into the loop below.   They
+	// can escape the loop if they are [1]non-waiting allocations, or
+	// [2]if they become the only waiting thread, or
+	// [3]if the cv_timedwait_hires returns -1 (which represents EWOULDBLOCK
+	// from msleep() which gets it from _sleep()'s THREAD_TIMED_OUT)
+	// allocating in the bucket, or [4]if this thread has (rare condition)
+	// spent a quarter of a second in the loop.
+
+	if (waiters++ > 1 || loop_once) {
+		atomic_inc_64(&spl_vba_loop_entries);
+	}
+
+	static _Atomic uint32_t max_waiters_seen = 0;
+
+	if (waiters > max_waiters_seen) {
+		max_waiters_seen = waiters;
+		printf("SPL: %s: max_waiters_seen increased to %u\n", __func__,
+		    max_waiters_seen);
+	}
+
+	// local counters, to be added atomically to global kstat variables
+	uint64_t local_memory_blocked = 0, local_cv_timeout = 0;
+	uint64_t local_loop_timeout = 0;
+	uint64_t local_cv_timeout_blocked = 0, local_loop_timeout_blocked = 0;
+	uint64_t local_sleep = 0, local_hipriority_blocked = 0;
+
+	const uint64_t loop_ticks = 25; // a tick is 10 msec, so 250 msec
+	const uint64_t hiprio_loop_ticks = 4; // 40 msec
+
+	for (uint64_t entry_time = zfs_lbolt(),
+	    loop_timeout = entry_time + loop_ticks,
+	    hiprio_timeout = entry_time + hiprio_loop_ticks, timedout = 0;
+	    waiters > 1UL || loop_once; /* empty */) {
+		loop_once = false;
+		// non-waiting allocations should proceeed to vmem_alloc()
+		// immediately
+		if (vmflags & (VM_NOSLEEP | VM_PANIC | VM_ABORT)) {
+			break;
+		}
+		if (vmem_canalloc_atomic(bvmp, size)) {
+			// We can probably vmem_alloc(bvmp, size, vmflags).
+			// At worst case it will give us a NULL and we will
+			// end up on the vmp's cv_wait.
+			//
+			// We can have threads with different bvmp
+			// taking this exit, and will proceed concurrently.
+			//
+			// However, we should protect against a burst of
+			// callers hitting the same bvmp before the allocation
+			// results are reflected in
+			// vmem_canalloc_atomic(bvmp, ...)
+			if (local_hipriority_allocator == false &&
+			    hipriority_allocators > 0) {
+				// more high priority allocations are wanted,
+				// so this thread stays here
+				local_hipriority_blocked++;
+			} else if (vba_atomic_lock_bucket(
+			    &buckets_busy_allocating, bucket_bit)) {
+				// we are not being blocked by another allocator
+				// to the same bucket, or any higher priority
+				// allocator
+				atomic_inc_64(&spl_vba_parent_memory_appeared);
+				break;
+				// The vmem_alloc() should return extremely
+				// quickly from an INSTANTFIT allocation that
+				// canalloc predicts will succeed.
+			} else {
+				// another thread is trying to use the free
+				// memory in the bucket_## arena; there might
+				// still be free memory there after its
+				// allocation is completed, and there might be
+				// excess in the bucket_heap arena, so stick
+				// around in this loop.
+				local_memory_blocked++;
+				cv_broadcast(&bvmp->vm_cv);
+			}
+		}
+		if (timedout > 0) {
+			if (local_hipriority_allocator == false &&
+			    hipriority_allocators > 0) {
+				local_hipriority_blocked++;
+			} else if (vba_atomic_lock_bucket(
+			    &buckets_busy_allocating, bucket_bit)) {
+				if (timedout & 1)
+					local_cv_timeout++;
+				if (timedout & 6 || zfs_lbolt() >= loop_timeout)
+					local_loop_timeout++;
+				break;
+			} else {
+				if (timedout & 1) {
+					local_cv_timeout_blocked++;
+				}
+				if (timedout & 6) {
+					local_loop_timeout_blocked++;
+				} else if (zfs_lbolt() > loop_timeout) {
+					timedout |= 2;
+				}
+				// flush the current thread in xat() out of
+				// xat()'s for() loop and into xat_bail()
+				cv_broadcast(&bvmp->vm_cv);
+			}
+		}
+		// The bucket is already allocating, or the bucket needs
+		// more memory to satisfy vmem_allocat(bvmp, size, VM_NOSLEEP),
+		// or we want to give the bucket some time to acquire more
+		// memory.
+		// substitute for the vmp arena's cv_wait in vmem_xalloc()
+		// (vmp is the bucket_heap AKA spl_heap_arena)
+		mutex_enter(&calling_arena->vm_lock);
+		local_sleep++;
+		if (local_sleep >= 1000ULL) {
+			atomic_add_64(&spl_vba_sleep, local_sleep - 1ULL);
+			local_sleep = 1ULL;
+			atomic_add_64(&spl_vba_cv_timeout_blocked,
+			    local_cv_timeout_blocked);
+			local_cv_timeout_blocked = 0;
+			atomic_add_64(&spl_vba_loop_timeout_blocked,
+			    local_loop_timeout_blocked);
+			local_loop_timeout_blocked = 0;
+			atomic_add_64(&spl_vba_hiprio_blocked,
+			    local_hipriority_blocked);
+			local_hipriority_blocked = 0;
+			if (local_memory_blocked > 1ULL) {
+				atomic_add_64(&spl_vba_parent_memory_blocked,
+				    local_memory_blocked - 1ULL);
+				local_memory_blocked = 1ULL;
+			}
+		}
+		clock_t wait_time = MSEC2NSEC(30);
+		if (timedout > 0 || local_memory_blocked > 0) {
+			wait_time = MSEC2NSEC(1);
+		}
+		int ret = cv_timedwait_hires(&calling_arena->vm_cv,
+		    &calling_arena->vm_lock,
+		    wait_time, 0, 0);
+		// We almost certainly have exited because of a
+		// signal/broadcast, but maybe just timed out.
+		// Either way, recheck memory.
+		mutex_exit(&calling_arena->vm_lock);
+		if (ret == -1) {
+			// cv_timedwait_hires timer expired
+			timedout |= 1;
+			cv_broadcast(&bvmp->vm_cv);
+		} else if ((timedout & 2) == 0) {
+			// we were awakened; check to see if we have been
+			// in the for loop for a long time
+			uint64_t n = zfs_lbolt();
+			if (n > loop_timeout) {
+				timedout |= 2;
+				extern uint64_t real_total_memory;
+				spl_free_set_emergency_pressure(
+				    real_total_memory / 64LL);
+				// flush the current thread in xat() out of
+				// xat()'s for() loop and into xat_bail()
+				cv_broadcast(&bvmp->vm_cv);
+			} else if (local_hipriority_allocator &&
+			    n > hiprio_timeout && waiters > 1UL) {
+				timedout |= 4;
+			}
+		}
+	}
+
+	/*
+	 * Turn on the exclusion bit in buckets_busy_allocating, to
+	 * prevent multiple threads from calling vmem_alloc() on the
+	 * same bucket arena concurrently rather than serially.
+	 *
+	 * This principally reduces the liklihood of asking xnu for
+	 * more memory when other memory is or becomes available.
+	 *
+	 * This exclusion only applies to VM_SLEEP allocations;
+	 * others (VM_PANIC, VM_NOSLEEP, VM_ABORT) will go to
+	 * vmem_alloc() concurrently with any other threads.
+	 *
+	 * Since we aren't doing a test-and-set operation like above,
+	 * we can just use |= and &= below and get correct atomic
+	 * results, instead of using:
+	 *
+	 * __c11_atomic_fetch_or(&buckets_busy_allocating,
+	 * bucket_bit, __ATOMIC_SEQ_CST);
+	 * with the &= down below being written as
+	 * __c11_atomic_fetch_and(&buckets_busy_allocating,
+	 * ~bucket_bit, __ATOMIC_SEQ_CST);
+	 *
+	 * and this makes a difference with no optimization either
+	 * compiling the whole file or with __attribute((optnone))
+	 * in front of the function decl.   In particular, the non-
+	 * optimized version that uses the builtin __c11_atomic_fetch_{and,or}
+	 * preserves the C program order in the machine language output,
+	 * inersting cmpxchgws, while all optimized versions, and the
+	 * non-optimized version using the plainly-written version, reorder
+	 * the "orw regr, memory" and "andw register, memory" (these are atomic
+	 * RMW operations in x86-64 when the memory is naturally aligned) so
+	 * that the strong memory model x86-64 promise that later loads see the
+	 * results of earlier stores.
+	 *
+	 * clang+llvm simply are good at optimizing _Atomics and
+	 * the optimized code differs only in line numbers and
+	 * among all three approaches (as plainly written, using
+	 * the __c11_atomic_fetch_{or,and} with sequential consistency,
+	 * or when compiling with at least -O optimization so an
+	 * atomic_or_16(&buckets_busy_allocating) built with GCC intrinsics
+	 * is actually inlined rather than a function call).
+	 *
+	 */
+
+	// in case we left the loop by being the only waiter, stop the
+	// next thread arriving from leaving the for loop because
+	// vmem_canalloc(bvmp, that_thread's_size) is true.
+
+	buckets_busy_allocating |= bucket_bit;
+
+	// update counters
+	if (local_sleep > 0)
+		atomic_add_64(&spl_vba_sleep, local_sleep);
+	if (local_memory_blocked > 0)
+		atomic_add_64(&spl_vba_parent_memory_blocked,
+		    local_memory_blocked);
+	if (local_cv_timeout > 0)
+		atomic_add_64(&spl_vba_cv_timeout, local_cv_timeout);
+	if (local_cv_timeout_blocked > 0)
+		atomic_add_64(&spl_vba_cv_timeout_blocked,
+		    local_cv_timeout_blocked);
+	if (local_loop_timeout > 0)
+		atomic_add_64(&spl_vba_loop_timeout, local_loop_timeout);
+	if (local_loop_timeout_blocked > 0)
+		atomic_add_64(&spl_vba_loop_timeout_blocked,
+		    local_loop_timeout_blocked);
+	if (local_hipriority_blocked > 0)
+		atomic_add_64(&spl_vba_hiprio_blocked,
+		    local_hipriority_blocked);
+
+	// There is memory in this bucket, or there are no other waiters,
+	// or we aren't a VM_SLEEP allocation,  or we iterated out of the
+	// for loop.
+	// vmem_alloc() and vmem_xalloc() do their own mutex serializing
+	// on bvmp->vm_lock, so we don't have to here.
+	//
+	// vmem_alloc may take some time to return (especially for VM_SLEEP
+	// allocations where we did not take the vm_canalloc(bvmp...) break out
+	// of the for loop).  Therefore, if we didn't enter the for loop at all
+	// because waiters was 0 when we entered this function,
+	// subsequent callers will enter the for loop.
+
+	void *m = vmem_alloc(bvmp, size, vmflags);
+
+	// allow another vmem_canalloc() through for this bucket
+	// by atomically turning off the appropriate bit
+
+	/*
+	 * Except clang+llvm DTRT because of _Atomic, could be written as:
+	 *	__c11_atomic_fetch_and(&buckets_busy_allocating,
+	 *	~bucket_bit, __ATOMIC_SEQ_CST);
+	 *
+	 * On processors with more relaxed memory models, it might be
+	 * more efficient to do so with release semantics here, and
+	 * in the atomic |= above, with acquire semantics in the bit tests,
+	 * but on the other hand it may be hard to do better than clang+llvm.
+	 */
+
+	buckets_busy_allocating &= ~bucket_bit;
+
+	if (local_hipriority_allocator)
+		hipriority_allocators--;
+
+	// if we got an allocation, wake up the arena cv waiters
+	// to let them try to exit the for(;;) loop above and
+	// exit the cv_wait() in vmem_xalloc(vmp, ...)
+
+	if (m != NULL) {
+		cv_broadcast(&calling_arena->vm_cv);
+	}
+
+	waiters--;
+	spl_vba_threads[bucket_number]--;
+	return (m);
+}
+
+static void
+vmem_bucket_free(vmem_t *null_vmp, void *vaddr, size_t size)
+{
+	vmem_t *calling_arena = spl_heap_arena;
+
+	vmem_free(vmem_bucket_arena_by_size(size), vaddr, size);
+
+	// wake up arena waiters to let them try an alloc
+	cv_broadcast(&calling_arena->vm_cv);
+}
+
+static inline int64_t
+vmem_bucket_arena_free(uint16_t bucket)
+{
+	VERIFY(bucket < VMEM_BUCKETS);
+	return ((int64_t)vmem_size_semi_atomic(vmem_bucket_arena[bucket],
+	    VMEM_FREE));
+}
+
+static inline int64_t
+vmem_bucket_arena_used(int bucket)
+{
+	VERIFY(bucket < VMEM_BUCKETS);
+	return ((int64_t)vmem_size_semi_atomic(vmem_bucket_arena[bucket],
+	    VMEM_ALLOC));
+}
+
+
+int64_t
+vmem_buckets_size(int typemask)
+{
+	int64_t total_size = 0;
+
+	for (int i = 0; i < VMEM_BUCKETS; i++) {
+		int64_t u = vmem_bucket_arena_used(i);
+		int64_t f = vmem_bucket_arena_free(i);
+		if (typemask & VMEM_ALLOC)
+			total_size += u;
+		if (typemask & VMEM_FREE)
+			total_size += f;
+	}
+	if (total_size < 0)
+		total_size = 0;
+
+	return ((size_t)total_size);
+}
+
+static uint64_t
+spl_validate_bucket_span_size(uint64_t val)
+{
+	if (!ISP2(val)) {
+		printf("SPL: %s: WARNING %llu is not a power of two, "
+		    "not changing.\n", __func__, val);
+		return (0);
+	}
+	if (val < 128ULL*1024ULL || val > 16ULL*1024ULL*1024ULL) {
+		printf("SPL: %s: WARNING %llu is out of range [128k - 16M], "
+		    "not changing.\n", __func__, val);
+		return (0);
+	}
+	return (val);
+}
+
+static inline void
+spl_modify_bucket_span_size(int bucket, uint64_t size)
+{
+	vmem_t *bvmp = vmem_bucket_arena[bucket];
+
+	mutex_enter(&bvmp->vm_lock);
+	bvmp->vm_min_import = size;
+	mutex_exit(&bvmp->vm_lock);
+}
+
+static inline void
+spl_modify_bucket_array()
+{
+	for (int i = VMEM_BUCKET_LOWBIT; i < VMEM_BUCKET_HIBIT; i++) {
+		// i = 12, bucket = 0, contains allocs from 8192 to 16383 bytes,
+		// and should never ask xnu for < 16384 bytes, so as to avoid
+		// asking xnu for a non-power-of-two size.
+		const int bucket = i - VMEM_BUCKET_LOWBIT;
+		const uint32_t bucket_alloc_minimum_size = 1UL << (uint32_t)i;
+		const uint32_t bucket_parent_alloc_minimum_size =
+		    bucket_alloc_minimum_size * 2UL;
+
+		switch (i) {
+			// see vmem_init() below for details
+		case 16:
+		case 17:
+			spl_modify_bucket_span_size(bucket,
+			    MAX(spl_bucket_tunable_small_span,
+			    bucket_parent_alloc_minimum_size));
+			break;
+		default:
+			spl_modify_bucket_span_size(bucket,
+			    MAX(spl_bucket_tunable_large_span,
+			    bucket_parent_alloc_minimum_size));
+			break;
+		}
+	}
+}
+
+static inline void
+spl_printf_bucket_span_sizes(void)
+{
+	// this doesn't have to be super-exact
+	printf("SPL: %s: ", __func__);
+	for (int i = VMEM_BUCKET_LOWBIT; i < VMEM_BUCKET_HIBIT; i++) {
+		int bnum = i - VMEM_BUCKET_LOWBIT;
+		vmem_t *bvmp = vmem_bucket_arena[bnum];
+		printf("%llu ", (uint64_t)bvmp->vm_min_import);
+	}
+	printf("\n");
+}
+
+static inline void
+spl_set_bucket_spans(uint64_t l, uint64_t s)
+{
+	if (spl_validate_bucket_span_size(l) &&
+	    spl_validate_bucket_span_size(s)) {
+		atomic_swap_64(&spl_bucket_tunable_large_span, l);
+		atomic_swap_64(&spl_bucket_tunable_small_span, s);
+		spl_modify_bucket_array();
+	}
+}
+
+void
+spl_set_bucket_tunable_large_span(uint64_t size)
+{
+	uint64_t s = 0;
+
+	mutex_enter(&vmem_xnu_alloc_lock);
+	atomic_swap_64(&s, spl_bucket_tunable_small_span);
+	spl_set_bucket_spans(size, s);
+	mutex_exit(&vmem_xnu_alloc_lock);
+
+	spl_printf_bucket_span_sizes();
+}
+
+void
+spl_set_bucket_tunable_small_span(uint64_t size)
+{
+	uint64_t l = 0;
+
+	mutex_enter(&vmem_xnu_alloc_lock);
+	atomic_swap_64(&l, spl_bucket_tunable_large_span);
+	spl_set_bucket_spans(l, size);
+	mutex_exit(&vmem_xnu_alloc_lock);
+
+	spl_printf_bucket_span_sizes();
+}
+
+static void *
+spl_vmem_default_alloc(vmem_t *vmp, size_t size, int vmflags)
+{
+	extern void *osif_malloc(uint64_t);
+	return (osif_malloc(size));
+}
+
+static void
+spl_vmem_default_free(vmem_t *vmp, void *vaddr, size_t size)
+{
+	extern void osif_free(void *, uint64_t);
+	osif_free(vaddr, size);
+}
+
+vmem_t *
+vmem_init(const char *heap_name,
+    void *heap_start, size_t heap_size, size_t heap_quantum,
+    void *(*heap_alloc)(vmem_t *, size_t, int),
+    void (*heap_free)(vmem_t *, void *, size_t))
+{
+	uint32_t id;
+	int nseg = VMEM_SEG_INITIAL;
+	vmem_t *heap;
+
+	// XNU mutexes need initialisation
+	mutex_init(&vmem_list_lock, "vmem_list_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_segfree_lock, "vmem_segfree_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_sleep_lock, "vmem_sleep_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_nosleep_lock, "vmem_nosleep_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_pushpage_lock, "vmem_pushpage_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_panic_lock, "vmem_panic_lock", MUTEX_DEFAULT,
+	    NULL);
+	mutex_init(&vmem_xnu_alloc_lock, "vmem_xnu_alloc_lock", MUTEX_DEFAULT,
+	    NULL);
+
+	while (--nseg >= 0)
+		vmem_putseg_global(&vmem_seg0[nseg]);
+
+	/*
+	 * On OSX we ultimately have to use the OS allocator
+	 * as the ource and sink of memory as it is allocated
+	 * and freed.
+	 *
+	 * The spl_root_arena_parent is needed in order to provide a
+	 * base arena with an always-NULL afunc and ffunc in order to
+	 * end the searches done by vmem_[x]alloc and vm_xfree; it
+	 * serves no other purpose; its stats will always be zero.
+	 *
+	 */
+
+	// id 0
+	spl_default_arena_parent = vmem_create("spl_default_arena_parent",
+	    NULL, 0, heap_quantum, NULL, NULL, NULL, 0, VM_SLEEP);
+
+	// illumos/openzfs has a gigantic pile of memory that it can use
+	// for its first arena;
+	// o3x is not so lucky, so we start with this
+	static char initial_default_block[16ULL*1024ULL*1024ULL]
+	    __attribute__((aligned(4096))) = { 0 };
+
+	// The default arena is very low-bandwidth; it supplies the initial
+	// large allocation for the heap arena below, and it serves as the
+	// parent of the vmem_metadata arena.   It will typically do only 2
+	// or 3 parent_alloc calls (to spl_vmem_default_alloc) in total.
+
+	spl_default_arena = vmem_create("spl_default_arena", // id 1
+	    initial_default_block, 16ULL*1024ULL*1024ULL,
+	    heap_quantum, spl_vmem_default_alloc, spl_vmem_default_free,
+	    spl_default_arena_parent, 16ULL*1024ULL*1024ULL,
+	    VM_SLEEP | VMC_POPULATOR | VMC_NO_QCACHE);
+
+	VERIFY(spl_default_arena != NULL);
+
+	// The bucket arenas satisfy allocations & frees from the bucket heap
+	// that are dispatched to the bucket whose power-of-two label is the
+	// smallest allocation that vmem_bucket_allocate will ask for.
+	//
+	// The bucket arenas in turn exchange memory with XNU's allocator/freer
+	// in large spans (~ 1 MiB is stable on all systems but creates bucket
+	// fragmentation)
+	//
+	// Segregating by size constrains internal fragmentation within the
+	// bucket and provides kstat.vmem visiblity and span-size policy to
+	// be applied to particular buckets (notably the sources of most
+	// allocations, see the comments below)
+	//
+	// For VMEM_BUCKET_HIBIT == 12,
+	// vmem_bucket_arena[n] holds allocations from 2^[n+11]+1 to  2^[n+12],
+	// so for [n] = 0, 2049-4096, for [n]=5 65537-131072,
+	// for [n]=7 (256k+1)-512k
+	//
+	// so "kstat.vmvm.vmem.bucket_1048576" should be read as the bucket
+	// arena containing allocations 1 MiB and smaller, but larger
+	// than 512 kiB.
+
+	// create arenas for the VMEM_BUCKETS, id 2 - id 14
+
+	extern uint64_t real_total_memory;
+	VERIFY3U(real_total_memory, >=, 1024ULL*1024ULL*1024ULL);
+
+	// adjust minimum bucket span size for memory size
+	// see comments in the switch below
+	// large span: 1 MiB and bigger on large-memory (> 32 GiB)  systems
+	// small span: 256 kiB and bigger on large-memory systems
+	const uint64_t k = 1024ULL;
+	const uint64_t qm = 256ULL * k;
+	const uint64_t m = 1024ULL* k;
+	const uint64_t big = MAX(real_total_memory / (k * 32ULL), m);
+	const uint64_t small = MAX(real_total_memory / (k * 128ULL), qm);
+	spl_bucket_tunable_large_span = MIN(big, 16ULL * m);
+	spl_bucket_tunable_small_span = small;
+	printf("SPL: %s: real_total_memory %llu, large spans %llu, small "
+	    "spans %llu\n", __func__, real_total_memory,
+	    spl_bucket_tunable_large_span, spl_bucket_tunable_small_span);
+	char *buf = vmem_alloc(spl_default_arena, VMEM_NAMELEN + 21, VM_SLEEP);
+	for (int32_t i = VMEM_BUCKET_LOWBIT; i <= VMEM_BUCKET_HIBIT; i++) {
+		size_t minimum_allocsize = 0;
+		const uint64_t bucket_largest_size = (1ULL << (uint64_t)i);
+		(void) snprintf(buf, VMEM_NAMELEN + 20, "%s_%llu",
+		    "bucket", bucket_largest_size);
+		dprintf("SPL: %s creating arena %s (i == %d)\n", __func__, buf,
+		    i);
+		switch (i) {
+		case 15:
+		case 16:
+			/*
+			 * With the arrival of abd, the 2^15 (== 32768) and 2^16
+			 * buckets are by far the most busy, holding
+			 * respectively the qcache spans of kmem_va (the
+			 * kmem_alloc et al. heap) and zfs_qcache (notably the
+			 * source for the abd_chunk arena)
+			 *
+			 * The lifetime of early (i.e., after import and mount)
+			 * allocations can be highly variable, leading
+			 * to persisting fragmentation from the first eviction
+			 * after arc has grown large.    This can happen if, for
+			 * example, there substantial import and mounting (and
+			 * mds/mdworker and backupd scanning) activity before a
+			 * user logs in and starts demanding memory in userland
+			 * (e.g. by firing up a browser or mail app).
+			 *
+			 * Crucially, this makes it difficult to give back
+			 * memory to xnu without holding the ARC size down for
+			 * long periods of time.
+			 *
+			 * We can mitigate this by exchanging smaller
+			 * amounts of memory with xnu for these buckets.
+			 * There are two downsides: xnu's memory
+			 * freelist will be prone to greater
+			 * fragmentation, which will affect all
+			 * allocation and free activity using xnu's
+			 * allocator including kexts other than our; and
+			 * we are likely to have more waits in the throttled
+			 * alloc function, as more threads are likely to require
+			 * slab importing into the kmem layer and fewer threads
+			 * can be satisfied by a small allocation vs a large
+			 * one.
+			 *
+			 * The import sizes are sysadmin-tunable by setting
+			 * kstat.spl.misc.spl_misc.spl_tunable_small_span
+			 * to a power-of-two number of bytes in zsysctl.conf
+			 * should a sysadmin prefer non-early allocations to
+			 * be larger or smaller depending on system performance
+			 * and workload.
+			 *
+			 * However, a zfs booting system must use the defaults
+			 * here for the earliest allocations, therefore they.
+			 * should be only large enough to protect system
+			 * performance if the sysadmin never changes the tunable
+			 * span sizes.
+			 */
+			minimum_allocsize = MAX(spl_bucket_tunable_small_span,
+			    bucket_largest_size * 4);
+			break;
+		default:
+			/*
+			 * These buckets are all relatively low bandwidth and
+			 * with relatively uniform lifespans for most
+			 * allocations (borrowed arc buffers dominate).
+			 * They should be large enough that they do not
+			 * pester xnu.
+			 */
+			minimum_allocsize = MAX(spl_bucket_tunable_large_span,
+			    bucket_largest_size * 4);
+			break;
+		}
+		dprintf("SPL: %s setting bucket %d (%d) to size %llu\n",
+		    __func__, i, (int)(1 << i), (uint64_t)minimum_allocsize);
+		const int bucket_number = i - VMEM_BUCKET_LOWBIT;
+		vmem_t *b = vmem_create(buf, NULL, 0, heap_quantum,
+		    xnu_alloc_throttled, xnu_free_throttled,
+		    spl_default_arena_parent, minimum_allocsize,
+		    VM_SLEEP | VMC_POPULATOR | VMC_NO_QCACHE | VMC_TIMEFREE);
+		VERIFY(b != NULL);
+		b->vm_min_import = minimum_allocsize;
+		b->vm_source = b;
+		vmem_bucket_arena[bucket_number] = b;
+		vmem_bucket_id_to_bucket_number[b->vm_id] = bucket_number;
+	}
+
+	vmem_free(spl_default_arena, buf, VMEM_NAMELEN + 21);
+	// spl_heap_arena, the bucket heap, is the primary interface
+	// to the vmem system
+
+	// all arenas not rooted to vmem_metadata will be rooted to
+	// spl_heap arena.
+
+	spl_heap_arena = vmem_create("bucket_heap", // id 15
+	    NULL, 0, heap_quantum,
+	    vmem_bucket_alloc, vmem_bucket_free, spl_default_arena_parent, 0,
+	    VM_SLEEP | VMC_TIMEFREE | VMC_OLDFIRST);
+
+	VERIFY(spl_heap_arena != NULL);
+
+	// add a fixed-sized allocation to spl_heap_arena; this reduces the
+	// need to talk to the bucket arenas by a substantial margin
+	// (kstat.vmem.vmem.bucket_heap.{alloc+free} is much greater than
+	// kstat.vmem.vmem.bucket_heap.parent_{alloc+free}, and improves with
+	// increasing initial fixed allocation size.
+
+	const size_t mib = 1024ULL * 1024ULL;
+	const size_t gib = 1024ULL * mib;
+	size_t resv_size = 128ULL * mib;
+	extern uint64_t real_total_memory;
+
+	if (real_total_memory >= 4ULL * gib)
+		resv_size = 256ULL * mib;
+	if (real_total_memory >= 8ULL * gib)
+		resv_size = 512ULL * mib;
+	if (real_total_memory >= 16ULL * gib)
+		resv_size = gib;
+
+	printf("SPL: %s adding fixed allocation of %llu to the bucket_heap\n",
+	    __func__, (uint64_t)resv_size);
+
+	spl_heap_arena_initial_alloc = vmem_add(spl_heap_arena,
+	    vmem_alloc(spl_default_arena, resv_size, VM_SLEEP),
+	    resv_size, VM_SLEEP);
+
+	VERIFY(spl_heap_arena_initial_alloc != NULL);
+
+	spl_heap_arena_initial_alloc_size = resv_size;
+
+	// kstat.vmem.vmem.heap : kmem_cache_alloc() and similar calls
+	// to handle in-memory datastructures other than arc and zio buffers.
+
+	heap = vmem_create(heap_name,  // id 16
+	    NULL, 0, heap_quantum,
+	    vmem_alloc, vmem_free, spl_heap_arena, 0,
+	    VM_SLEEP);
+
+	VERIFY(heap != NULL);
+
+	// Root all the low bandwidth metadata arenas to the default arena.
+	// The vmem_metadata allocations will all be 32 kiB or larger,
+	// and the total allocation will generally cap off around 24 MiB.
+
+	vmem_metadata_arena = vmem_create("vmem_metadata", // id 17
+	    NULL, 0, heap_quantum, vmem_alloc, vmem_free, spl_default_arena,
+	    8 * PAGESIZE, VM_SLEEP | VMC_POPULATOR | VMC_NO_QCACHE);
+
+	VERIFY(vmem_metadata_arena != NULL);
+
+	vmem_seg_arena = vmem_create("vmem_seg", // id 18
+	    NULL, 0, heap_quantum,
+	    vmem_alloc, vmem_free, vmem_metadata_arena, 0,
+	    VM_SLEEP | VMC_POPULATOR);
+
+	VERIFY(vmem_seg_arena != NULL);
+
+	vmem_hash_arena = vmem_create("vmem_hash", // id 19
+	    NULL, 0, 8,
+	    vmem_alloc, vmem_free, vmem_metadata_arena, 0,
+	    VM_SLEEP);
+
+	VERIFY(vmem_hash_arena != NULL);
+
+	vmem_vmem_arena = vmem_create("vmem_vmem", // id 20
+	    vmem0, sizeof (vmem0), 1,
+	    vmem_alloc, vmem_free, vmem_metadata_arena, 0,
+	    VM_SLEEP);
+
+	VERIFY(vmem_vmem_arena != NULL);
+
+	// 21 (0-based) vmem_create before this line. - macroized
+	// NUMBER_OF_ARENAS_IN_VMEM_INIT
+	for (id = 0; id < vmem_id; id++) {
+		(void) vmem_xalloc(vmem_vmem_arena, sizeof (vmem_t),
+		    1, 0, 0, &vmem0[id], &vmem0[id + 1],
+		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
+	}
+
+	printf("SPL: starting vmem_update() thread\n");
+	vmem_update(NULL);
+
+	return (heap);
+}
+
+struct free_slab {
+	vmem_t *vmp;
+	size_t slabsize;
+	void *slab;
+	list_node_t next;
+};
+static list_t freelist;
+
+static void vmem_fini_freelist(void *vmp, void *start, size_t size)
+{
+	struct free_slab *fs;
+
+	MALLOC(fs, struct free_slab *, sizeof (struct free_slab), M_TEMP,
+	    M_WAITOK);
+	fs->vmp = vmp;
+	fs->slabsize = size;
+	fs->slab = start;
+	list_link_init(&fs->next);
+	list_insert_tail(&freelist, fs);
+}
+
+void
+vmem_free_span_list(void)
+{
+	int total = 0;
+	int total_count = 0;
+	struct free_slab *fs;
+	int release = 1;
+
+	while ((fs = list_head(&freelist))) {
+		total_count++;
+		total += fs->slabsize;
+		list_remove(&freelist, fs);
+		for (int id = 0; id < VMEM_INITIAL; id++) {
+			if (&vmem0[id] == fs->slab) {
+				release = 0;
+				break;
+			}
+		}
+		if (release)
+			fs->vmp->vm_source_free(fs->vmp, fs->slab,
+			    fs->slabsize);
+		release = 1;
+		FREE(fs, M_TEMP);
+	}
+}
+
+static void
+vmem_fini_void(void *vmp, void *start, uint32_t size)
+{
+}
+
+void
+vmem_fini(vmem_t *heap)
+{
+	struct free_slab *fs;
+	uint64_t total;
+
+	bsd_untimeout(vmem_update, NULL);
+
+	printf("SPL: %s: stopped vmem_update.  Creating list and walking "
+	    "arenas.\n", __func__);
+
+	/* Create a list of slabs to free by walking the list of allocs */
+	list_create(&freelist, sizeof (struct free_slab),
+	    offsetof(struct free_slab, next));
+
+	/* Walk to list of allocations */
+
+	/*
+	 * walking with VMEM_REENTRANT causes segment consolidation and
+	 * freeing of spans the freelist contains a list of segments that
+	 * are still allocated at the time of the walk; unfortunately the
+	 * lists cannot be exact without complex multiple passes, locking,
+	 * and a more complex vmem_fini_freelist().
+	 *
+	 * Walking without VMEM_REENTRANT can produce a nearly-exact list
+	 * of unfreed spans, which Illumos would then free directly after
+	 * the list is complete.
+	 *
+	 * Unfortunately in O3X, that lack of exactness can lead to a panic
+	 * caused by attempting to free to xnu memory that we already freed
+	 * to xnu. Fortunately, we can get a sense of what would have been
+	 * destroyed after the (non-reentrant) walking, and we printf that
+	 * at the end of this function.
+	 */
+
+	// Walk all still-alive arenas from leaves to the root
+
+	vmem_walk(heap, VMEM_ALLOC | VMEM_REENTRANT, vmem_fini_void, heap);
+
+	vmem_walk(heap, VMEM_ALLOC, vmem_fini_freelist, heap);
+
+	vmem_free_span_list();
+	dprintf("\nSPL: %s destroying heap\n", __func__);
+	vmem_destroy(heap); // PARENT: spl_heap_arena
+
+	printf("SPL: %s: walking spl_heap_arena, aka bucket_heap (pass 1)\n",
+	    __func__);
+
+	vmem_walk(spl_heap_arena, VMEM_ALLOC | VMEM_REENTRANT, vmem_fini_void,
+	    spl_heap_arena);
+
+	printf("SPL: %s: calling vmem_xfree(spl_default_arena, ptr, %llu);\n",
+	    __func__, (uint64_t)spl_heap_arena_initial_alloc_size);
+
+	// forcibly remove the initial alloc from spl_heap_arena arena, whether
+	// or not it is empty.  below this point, any activity on
+	// spl_default_arena other than a non-reentrant(!) walk and a destroy
+	// is unsafe (UAF or MAF).
+	// However, all the children of spl_heap_arena should now be destroyed.
+
+	vmem_xfree(spl_default_arena, spl_heap_arena_initial_alloc,
+	    spl_heap_arena_initial_alloc_size);
+
+	printf("SPL: %s: walking spl_heap_arena, aka bucket_heap (pass 2)\n",
+	    __func__);
+
+	vmem_walk(spl_heap_arena, VMEM_ALLOC, vmem_fini_freelist,
+	    spl_heap_arena);
+	vmem_free_span_list();
+
+	printf("SPL: %s: walking bucket arenas...\n", __func__);
+
+	for (int i = VMEM_BUCKET_LOWBIT; i <= VMEM_BUCKET_HIBIT; i++) {
+		const int bucket = i - VMEM_BUCKET_LOWBIT;
+		vmem_walk(vmem_bucket_arena[bucket],
+		    VMEM_ALLOC | VMEM_REENTRANT, vmem_fini_void,
+		    vmem_bucket_arena[bucket]);
+
+		vmem_walk(vmem_bucket_arena[bucket], VMEM_ALLOC,
+		    vmem_fini_freelist, vmem_bucket_arena[bucket]);
+	}
+	vmem_free_span_list();
+
+	dprintf("SPL: %s destroying spl_bucket_arenas...", __func__);
+	for (int32_t i = VMEM_BUCKET_LOWBIT; i <= VMEM_BUCKET_HIBIT; i++) {
+		vmem_t *vmpt = vmem_bucket_arena[i - VMEM_BUCKET_LOWBIT];
+		dprintf(" %llu", (1ULL << i));
+		vmem_destroy(vmpt); // parent: spl_default_arena_parent
+	}
+	dprintf("\n");
+
+	printf("SPL: %s: walking vmem metadata-related arenas...\n", __func__);
+
+	vmem_walk(vmem_vmem_arena, VMEM_ALLOC | VMEM_REENTRANT,
+	    vmem_fini_void, vmem_vmem_arena);
+
+	vmem_walk(vmem_vmem_arena, VMEM_ALLOC,
+	    vmem_fini_freelist, vmem_vmem_arena);
+
+	vmem_free_span_list();
+
+	// We should not do VMEM_REENTRANT on vmem_seg_arena or
+	// vmem_hash_arena or below to avoid causing work in
+	// vmem_seg_arena and vmem_hash_arena.
+
+	vmem_walk(vmem_seg_arena, VMEM_ALLOC,
+	    vmem_fini_freelist, vmem_seg_arena);
+
+	vmem_free_span_list();
+
+	vmem_walk(vmem_hash_arena, VMEM_ALLOC,
+	    vmem_fini_freelist, vmem_hash_arena);
+	vmem_free_span_list();
+
+	vmem_walk(vmem_metadata_arena, VMEM_ALLOC,
+	    vmem_fini_freelist, vmem_metadata_arena);
+
+	vmem_free_span_list();
+	dprintf("SPL: %s walking the root arena (spl_default_arena)...\n",
+	    __func__);
+
+	vmem_walk(spl_default_arena, VMEM_ALLOC,
+	    vmem_fini_freelist, spl_default_arena);
+
+	vmem_free_span_list();
+
+	dprintf("SPL: %s destroying bucket heap\n", __func__);
+	// PARENT: spl_default_arena_parent (but depends on buckets)
+	vmem_destroy(spl_heap_arena);
+
+	// destroying the vmem_vmem arena and any arena afterwards
+	// requires the use of vmem_destroy_internal(), which does
+	// not talk to vmem_vmem_arena like vmem_destroy() does.
+	// dprintf("SPL: %s destroying vmem_vmem_arena\n", __func__);
+	// vmem_destroy_internal(vmem_vmem_arena);
+	// parent: vmem_metadata_arena
+
+	// destroying the seg arena means we must no longer
+	// talk to vmem_populate()
+	dprintf("SPL: %s destroying vmem_seg_arena\n", __func__);
+	vmem_destroy(vmem_seg_arena);
+
+	// vmem_hash_arena may be freed-to in vmem_destroy_internal()
+	// so it should be just before the vmem_metadata_arena.
+	dprintf("SPL: %s destroying vmem_hash_arena\n", __func__);
+	vmem_destroy(vmem_hash_arena); // parent: vmem_metadata_arena
+	vmem_hash_arena = NULL;
+
+	// XXX: if we panic on unload below here due to destroyed mutex,
+	// vmem_init() will need some reworking (e.g. have
+	// vmem_metadata_arena talk directly to xnu), or alternatively a
+	// vmem_destroy_internal_internal() function that does not touch
+	// vmem_hash_arena will need writing.
+
+	dprintf("SPL: %s destroying vmem_metadata_arena\n", __func__);
+	vmem_destroy(vmem_metadata_arena); // parent: spl_default_arena
+
+	dprintf("\nSPL: %s destroying spl_default_arena\n", __func__);
+	vmem_destroy(spl_default_arena); // parent: spl_default_arena_parent
+	dprintf("\nSPL: %s destroying spl_default_arena_parant\n", __func__);
+	vmem_destroy(spl_default_arena_parent);
+
+	dprintf("SPL: %s destroying vmem_vmem_arena\n", __func__);
+	vmem_destroy_internal(vmem_vmem_arena);
+
+	printf("SPL: arenas removed, now try destroying mutexes... ");
+
+	printf("vmem_xnu_alloc_lock ");
+	mutex_destroy(&vmem_xnu_alloc_lock);
+	printf("vmem_panic_lock ");
+	mutex_destroy(&vmem_panic_lock);
+	printf("vmem_pushpage_lock ");
+	mutex_destroy(&vmem_pushpage_lock);
+	printf("vmem_nosleep_lock ");
+	mutex_destroy(&vmem_nosleep_lock);
+	printf("vmem_sleep_lock ");
+	mutex_destroy(&vmem_sleep_lock);
+	printf("vmem_segfree_lock ");
+	mutex_destroy(&vmem_segfree_lock);
+	printf("vmem_list_lock ");
+	mutex_destroy(&vmem_list_lock);
+
+	printf("\nSPL: %s: walking list of live slabs at time of call to %s\n",
+	    __func__, __func__);
+
+	// annoyingly, some of these should be returned to xnu, but
+	// we have no idea which have already been freed to xnu, and
+	// freeing a second time results in a panic.
+
+	/* Now release the list of allocs to built above */
+	total = 0;
+	uint64_t total_count = 0;
+	while ((fs = list_head(&freelist))) {
+		total_count++;
+		total += fs->slabsize;
+		list_remove(&freelist, fs);
+		// extern void segkmem_free(vmem_t *, void *, size_t);
+		// segkmem_free(fs->vmp, fs->slab, fs->slabsize);
+		FREE(fs, M_TEMP);
+	}
+	printf("SPL: WOULD HAVE released %llu bytes (%llu spans) from arenas\n",
+	    total, total_count);
+	list_destroy(&freelist);
+	printf("SPL: %s: Brief delay for readability...\n", __func__);
+	delay(hz);
+	printf("SPL: %s: done!\n", __func__);
+}
+
+/*
+ * return true if inuse is much smaller than imported
+ */
+static inline bool
+bucket_fragmented(const uint16_t bn, const uint64_t now)
+{
+
+	// early during uptime, just let buckets grow.
+
+	if (now < 600 * hz)
+		return (false);
+
+	// if there has been no pressure in the past five minutes,
+	// then we will just let the bucket grow.
+
+	const uint64_t timeout = 5ULL * 60ULL * hz;
+
+	if (spl_free_last_pressure_wrapper() + timeout <  now)
+		return (false);
+
+	const vmem_t *vmp = vmem_bucket_arena[bn];
+
+	const int64_t imported =
+	    (int64_t)vmp->vm_kstat.vk_mem_import.value.ui64;
+	const int64_t inuse =
+	    (int64_t)vmp->vm_kstat.vk_mem_inuse.value.ui64;
+	const int64_t tiny = 64LL*1024LL*1024LL;
+	const int64_t small = tiny * 2LL;		// 128 M
+	const int64_t medium = small * 2LL;		// 256
+	const int64_t large = medium * 2LL;		// 512
+	const int64_t huge = large * 2LL;		// 1 G
+	const int64_t super_huge = huge * 2LL;	// 2
+
+	const int64_t amount_free = imported - inuse;
+
+	if (amount_free <= tiny || imported <= small)
+		return (false);
+
+	const int64_t percent_free = (amount_free * 100LL) / imported;
+
+	if (percent_free > 75LL) {
+		return (true);
+	} else if (imported <= medium) {
+		return (percent_free >= 50);
+	} else if (imported <= large) {
+		return (percent_free >= 33);
+	} else if (imported <= huge) {
+		return (percent_free >= 25);
+	} else if (imported <= super_huge) {
+		return (percent_free >= 15);
+	} else {
+		return (percent_free >= 10);
+	}
+}
+
+/*
+ * return true if the bucket for size is fragmented
+ */
+static inline bool
+spl_arc_no_grow_impl(const uint16_t b, const size_t size,
+    const boolean_t buf_is_metadata, kmem_cache_t **kc)
+{
+	static _Atomic uint8_t frag_suppression_counter[VMEM_BUCKETS] = { 0 };
+
+	const uint64_t now = zfs_lbolt();
+
+	const bool fragmented = bucket_fragmented(b, now);
+
+	if (fragmented) {
+		if (size < 32768) {
+			// Don't suppress small qcached blocks when the
+			// qcache size (bucket_262144) is fragmented,
+			// since they will push everything else towards
+			// the tails of ARC lists without eating up a large
+			// amount of space themselves.
+			return (false);
+		}
+		const uint32_t b_bit = (uint32_t)1 << (uint32_t)b;
+		spl_arc_no_grow_bits |= b_bit;
+		const uint32_t sup_at_least_every = MIN(b_bit, 255);
+		const uint32_t sup_at_most_every = MAX(b_bit, 16);
+		const uint32_t sup_every = MIN(sup_at_least_every,
+		    sup_at_most_every);
+		if (frag_suppression_counter[b] >= sup_every) {
+			frag_suppression_counter[b] = 0;
+			return (true);
+		} else {
+			frag_suppression_counter[b]++;
+			return (false);
+		}
+	} else {
+		const uint32_t b_bit = (uint32_t)1 << (uint32_t)b;
+		spl_arc_no_grow_bits &= ~b_bit;
+	}
+
+	extern bool spl_zio_is_suppressed(const size_t, const uint64_t,
+	    const boolean_t, kmem_cache_t **);
+
+	return (spl_zio_is_suppressed(size, now, buf_is_metadata, kc));
+}
+
+static inline uint16_t
+vmem_bucket_number_arc_no_grow(const size_t size)
+{
+	// qcaching on arc
+	if (size < 128*1024)
+		return (vmem_bucket_number(262144));
+	else
+		return (vmem_bucket_number(size));
+}
+
+boolean_t
+spl_arc_no_grow(size_t size, boolean_t buf_is_metadata, kmem_cache_t **zp)
+{
+	const uint16_t b = vmem_bucket_number_arc_no_grow(size);
+
+	const bool rv = spl_arc_no_grow_impl(b, size, buf_is_metadata, zp);
+
+	if (rv) {
+		atomic_inc_64(&spl_arc_no_grow_count);
+	}
+
+	return ((boolean_t)rv);
+}
diff --git a/module/os/macos/spl/spl-vnode.c b/module/os/macos/spl/spl-vnode.c
new file mode 100644
index 0000000000..df69fdf667
--- /dev/null
+++ b/module/os/macos/spl/spl-vnode.c
@@ -0,0 +1,496 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/vnode.h>
+#include <sys/debug.h>
+#include <sys/malloc.h>
+#include <sys/list.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <IOKit/IOLib.h>
+
+#include <sys/taskq.h>
+
+int
+vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode,
+    struct vnode **vpp, enum create crwhy, mode_t umask)
+{
+	vfs_context_t vctx;
+	int fmode;
+	int error;
+
+	fmode = filemode;
+	if (crwhy)
+		fmode |= O_CREAT;
+	// TODO I think this should be 'fmode' instead of 'filemode'
+	vctx = vfs_context_create((vfs_context_t)0);
+	error = vnode_open(pnamep, filemode, createmode, 0, vpp, vctx);
+	(void) vfs_context_rele(vctx);
+	return (error);
+}
+
+int
+vn_openat(char *pnamep, enum uio_seg seg, int filemode, int createmode,
+    struct vnode **vpp, enum create crwhy,
+    mode_t umask, struct vnode *startvp)
+{
+	char *path;
+	int pathlen = MAXPATHLEN;
+	int error;
+
+	path = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+	error = vn_getpath(startvp, path, &pathlen);
+	if (error == 0) {
+		strlcat(path, pnamep, MAXPATHLEN);
+		error = vn_open(path, seg, filemode, createmode, vpp, crwhy,
+		    umask);
+	}
+
+	kmem_free(path, MAXPATHLEN);
+	return (error);
+}
+
+extern errno_t vnode_rename(const char *, const char *, int, vfs_context_t);
+
+errno_t
+vnode_rename(const char *from, const char *to, int flags, vfs_context_t vctx)
+{
+	/*
+	 * We need proper KPI changes to be able to safely update
+	 * the zpool.cache file. For now, we return EPERM.
+	 */
+	return (EPERM);
+}
+
+int
+vn_rename(char *from, char *to, enum uio_seg seg)
+{
+	vfs_context_t vctx;
+	int error;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+
+	error = vnode_rename(from, to, 0, vctx);
+
+	(void) vfs_context_rele(vctx);
+
+	return (error);
+}
+
+extern errno_t vnode_remove(const char *, int, enum vtype, vfs_context_t);
+
+errno_t
+vnode_remove(const char *name, int flag, enum vtype type, vfs_context_t vctx)
+{
+	/*
+	 * Now that zed ZFS Event Daemon can handle the rename of zpool.cache
+	 * we will silence this limitation, and look in zed.d/config.sync.sh
+	 */
+	return (EPERM);
+}
+
+
+int
+vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
+{
+	vfs_context_t vctx;
+	enum vtype type;
+	int error;
+
+	type = dirflag == RMDIRECTORY ? VDIR : VREG;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+
+	error = vnode_remove(fnamep, 0, type, vctx);
+
+	(void) vfs_context_rele(vctx);
+
+	return (error);
+}
+
+int
+VOP_SPACE(struct vnode *vp, int cmd, struct flock *fl, int flags, offset_t off,
+    cred_t *cr, void *ctx)
+{
+	int error = 0;
+#ifdef F_PUNCHHOLE
+	if (cmd == F_FREESP) {
+		fpunchhole_t fpht;
+		fpht.fp_flags = 0;
+		fpht.fp_offset = fl->l_start;
+		fpht.fp_length = fl->l_len;
+		if (vnode_getwithref(vp) == 0) {
+			error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&fpht, 0,
+			    ctx);
+			(void) vnode_put(vp);
+		}
+	}
+#endif
+	return (error);
+}
+
+int
+VOP_CLOSE(struct vnode *vp, int flag, int count, offset_t off,
+    void *cr, void *k)
+{
+	vfs_context_t vctx;
+	int error;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	error = vnode_close(vp, flag & FWRITE, vctx);
+	(void) vfs_context_rele(vctx);
+	return (error);
+}
+
+int
+VOP_FSYNC(struct vnode *vp, int flags, void* unused, void *uused2)
+{
+	vfs_context_t vctx;
+	int error;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	error = VNOP_FSYNC(vp, (flags == FSYNC), vctx);
+	(void) vfs_context_rele(vctx);
+	return (error);
+}
+
+int
+VOP_GETATTR(struct vnode *vp, vattr_t *vap, int flags, void *x3, void *x4)
+{
+	vfs_context_t vctx;
+	int error;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	error = vnode_getattr(vp, vap, vctx);
+	(void) vfs_context_rele(vctx);
+	return (error);
+}
+
+errno_t VNOP_LOOKUP(struct vnode *, struct vnode **,
+    struct componentname *, vfs_context_t);
+
+errno_t
+VOP_LOOKUP(struct vnode *vp, struct vnode **vpp,
+    struct componentname *cn, vfs_context_t ct)
+{
+	return (VNOP_LOOKUP(vp, vpp, cn, ct));
+}
+
+#undef VFS_ROOT
+
+extern int VFS_ROOT(mount_t, struct vnode **, vfs_context_t);
+int
+spl_vfs_root(mount_t mount, struct vnode **vp)
+{
+	return (VFS_ROOT(mount, vp, vfs_context_current()));
+}
+
+void
+vfs_mountedfrom(struct mount *vfsp, char *osname)
+{
+	(void) copystr(osname, vfs_statfs(vfsp)->f_mntfromname, MNAMELEN - 1,
+	    0);
+}
+
+static kmutex_t	spl_getf_lock;
+static list_t	spl_getf_list;
+
+int
+spl_vnode_init(void)
+{
+	mutex_init(&spl_getf_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&spl_getf_list, sizeof (struct spl_fileproc),
+	    offsetof(struct spl_fileproc, f_next));
+	return (0);
+}
+
+void
+spl_vnode_fini(void)
+{
+	mutex_destroy(&spl_getf_lock);
+	list_destroy(&spl_getf_list);
+}
+
+#include <sys/file.h>
+struct fileproc;
+
+extern int fp_drop(struct proc *p, int fd, struct fileproc *fp, int locked);
+extern int fp_drop_written(struct proc *p, int fd, struct fileproc *fp,
+    int locked);
+extern int fp_lookup(struct proc *p, int fd, struct fileproc **resultfp,
+    int locked);
+extern int fo_read(struct fileproc *fp, struct uio *uio, int flags,
+    vfs_context_t ctx);
+extern int fo_write(struct fileproc *fp, struct uio *uio, int flags,
+    vfs_context_t ctx);
+extern int file_vnode_withvid(int, struct vnode **, uint32_t *);
+extern int file_drop(int);
+
+/*
+ * getf(int fd) - hold a lock on a file descriptor, to be released by calling
+ * releasef(). On OSX we will also look up the vnode of the fd for calls
+ * to spl_vn_rdwr().
+ */
+void *
+getf(int fd)
+{
+	struct fileproc *fp  = NULL;
+	struct spl_fileproc *sfp = NULL;
+	struct vnode *vp;
+	uint32_t vid;
+
+	/*
+	 * We keep the "fp" pointer as well, both for unlocking in releasef()
+	 * and used in vn_rdwr().
+	 */
+
+	sfp = kmem_alloc(sizeof (*sfp), KM_SLEEP);
+	if (!sfp)
+		return (NULL);
+
+	if (fp_lookup(current_proc(), fd, &fp, 0 /* !locked */)) {
+		kmem_free(sfp, sizeof (*sfp));
+		return (NULL);
+	}
+
+	printf("current_proc %p: fd %d fp %p vp %p\n", current_proc(),
+	    fd, fp, vp);
+
+	sfp->f_vnode	= vp;
+	sfp->f_fd		= fd;
+	sfp->f_offset	= 0;
+	sfp->f_proc		= current_proc();
+	sfp->f_fp		= fp;
+
+	/* Also grab vnode, so we can fish out the minor, for onexit */
+	if (!file_vnode_withvid(fd, &vp, &vid)) {
+		sfp->f_vnode = vp;
+		if (vnode_vtype(vp) != VDIR) {
+			sfp->f_file = minor(vnode_specrdev(vp));
+		}
+		file_drop(fd);
+	}
+
+	mutex_enter(&spl_getf_lock);
+	list_insert_tail(&spl_getf_list, sfp);
+	mutex_exit(&spl_getf_lock);
+
+	return (sfp);
+}
+
+struct vnode *
+getf_vnode(void *fp)
+{
+	struct spl_fileproc *sfp = (struct spl_fileproc *)fp;
+	struct vnode *vp = NULL;
+	uint32_t vid;
+
+	if (!file_vnode_withvid(sfp->f_fd, &vp, &vid)) {
+		file_drop(sfp->f_fd);
+	}
+
+	return (vp);
+}
+
+void
+releasef(int fd)
+{
+	struct spl_fileproc *fp = NULL;
+	struct proc *p;
+
+	p = current_proc();
+	mutex_enter(&spl_getf_lock);
+	for (fp = list_head(&spl_getf_list); fp != NULL;
+	    fp = list_next(&spl_getf_list, fp)) {
+		if ((fp->f_proc == p) && fp->f_fd == fd) break;
+	}
+	mutex_exit(&spl_getf_lock);
+	if (!fp)
+		return; // Not found
+
+	if (fp->f_writes)
+		fp_drop_written(p, fd, fp->f_fp, 0 /* !locked */);
+	else
+		fp_drop(p, fd, fp->f_fp, 0 /* !locked */);
+
+	/* Remove node from the list */
+	mutex_enter(&spl_getf_lock);
+	list_remove(&spl_getf_list, fp);
+	mutex_exit(&spl_getf_lock);
+
+	/* Free the node */
+	kmem_free(fp, sizeof (*fp));
+}
+
+/*
+ * getf()/releasef() IO handler.
+ */
+int spl_vn_rdwr(enum uio_rw rw,	struct spl_fileproc *sfp,
+    caddr_t base, ssize_t len, offset_t offset, enum uio_seg seg,
+    int ioflag, rlim64_t ulimit, cred_t *cr, ssize_t *residp)
+{
+	uio_t *auio;
+	int spacetype;
+	int error = 0;
+	vfs_context_t vctx;
+
+	spacetype = UIO_SEG_IS_USER_SPACE(seg) ? UIO_USERSPACE32 : UIO_SYSSPACE;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	auio = uio_create(1, 0, spacetype, rw);
+	uio_reset(auio, offset, spacetype, rw);
+	uio_addiov(auio, (uint64_t)(uintptr_t)base, len);
+
+	if (rw == UIO_READ) {
+		error = fo_read(sfp->f_fp, auio, ioflag, vctx);
+	} else {
+		error = fo_write(sfp->f_fp, auio, ioflag, vctx);
+	}
+
+	if (residp) {
+		*residp = uio_resid(auio);
+	} else {
+		if (uio_resid(auio) && error == 0)
+			error = EIO;
+	}
+
+	uio_free(auio);
+	vfs_context_rele(vctx);
+
+	return (error);
+}
+
+/* Regular vnode vn_rdwr */
+int zfs_vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
+    offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
+    cred_t *cr, ssize_t *residp)
+{
+	uio_t *auio;
+	int spacetype;
+	int error = 0;
+	vfs_context_t vctx;
+
+	spacetype = UIO_SEG_IS_USER_SPACE(seg) ? UIO_USERSPACE32 : UIO_SYSSPACE;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	auio = uio_create(1, 0, spacetype, rw);
+	uio_reset(auio, offset, spacetype, rw);
+	uio_addiov(auio, (uint64_t)(uintptr_t)base, len);
+
+	if (rw == UIO_READ) {
+		error = VNOP_READ(vp, auio, ioflag, vctx);
+	} else {
+		error = VNOP_WRITE(vp, auio, ioflag, vctx);
+	}
+
+	if (residp) {
+		*residp = uio_resid(auio);
+	} else {
+		if (uio_resid(auio) && error == 0)
+			error = EIO;
+	}
+
+	uio_free(auio);
+	vfs_context_rele(vctx);
+
+	return (error);
+}
+
+void
+spl_rele_async(void *arg)
+{
+	struct vnode *vp = (struct vnode *)arg;
+	if (vp) vnode_put(vp);
+}
+
+void
+vn_rele_async(struct vnode *vp, void *taskq)
+{
+	VERIFY(taskq_dispatch((taskq_t *)taskq,
+	    (task_func_t *)spl_rele_async, vp, TQ_SLEEP) != 0);
+}
+
+vfs_context_t
+spl_vfs_context_kernel(void)
+{
+	return (vfs_context_kernel());
+}
+
+#undef build_path
+extern int build_path(struct vnode *vp, char *buff, int buflen, int *outlen,
+    int flags, vfs_context_t ctx);
+
+int spl_build_path(struct vnode *vp, char *buff, int buflen, int *outlen,
+    int flags, vfs_context_t ctx)
+{
+	return (build_path(vp, buff, buflen, outlen, flags, ctx));
+}
+
+/*
+ * vnode_notify was moved from KERNEL_PRIVATE to KERNEL in 10.11, but to be
+ * backward compatible, we keep the wrapper for now.
+ */
+extern int vnode_notify(struct vnode *, uint32_t, struct vnode_attr *);
+int
+spl_vnode_notify(struct vnode *vp, uint32_t type, struct vnode_attr *vap)
+{
+	return (vnode_notify(vp, type, vap));
+}
+
+extern int vfs_get_notify_attributes(struct vnode_attr *vap);
+int
+spl_vfs_get_notify_attributes(struct vnode_attr *vap)
+{
+	return (vfs_get_notify_attributes(vap));
+}
+
+/* Root directory vnode for the system a.k.a. '/' */
+/*
+ * Must use vfs_rootvnode() to acquire a reference, and
+ * vnode_put() to release it
+ */
+
+extern struct vnode *rootvnode;
+
+struct vnode *
+getrootdir(void)
+{
+	struct vnode *rvnode;
+
+	// Unfortunately, Apple's vfs_rootvnode() fails to check for
+	// NULL rootvp, and just panics. We aren't technically allowed to
+	// see rootvp, but in the interest of avoiding a panic...
+	if (rootvnode == NULL)
+		return (NULL);
+
+	rvnode = vfs_rootvnode();
+	if (rvnode)
+		vnode_put(rvnode);
+	return (rvnode);
+}
diff --git a/module/os/macos/spl/spl-xdr.c b/module/os/macos/spl/spl-xdr.c
new file mode 100644
index 0000000000..4eb115017d
--- /dev/null
+++ b/module/os/macos/spl/spl-xdr.c
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ *
+ * Copyright (C) 2008 MacZFS
+ * Copyright (C) 2013 Jorgen Lundman <lundman@lundman.net>
+ *
+ */
+
+#include <sys/kmem.h>
+#include <sys/types.h>
+#include <rpc/types.h>
+#include <rpc/xdr.h>
+#include <sys/debug.h>
+#include <sys/byteorder.h>
+
+
+/*
+ * SPL's XDR mem implementation.
+ *
+ * This is used by libnvpair to serialize/deserialize the name-value pair data
+ * structures into byte arrays in a well-defined and portable manner.
+ *
+ * These data structures are used by the DMU/ZFS to flexibly manipulate various
+ * information in memory and later serialize it/deserialize it to disk.
+ * Examples of usages include the pool configuration, lists of pool and dataset
+ * properties, etc.
+ *
+ * Reference documentation for the XDR representation and XDR operations can be
+ * found in RFC 1832 and xdr(3), respectively.
+ *
+ * ===  Implementation shortcomings ===
+ *
+ * It is assumed that the following C types have the following sizes:
+ *
+ * char/unsigned char:      1 byte
+ * short/unsigned short:    2 bytes
+ * int/unsigned int:        4 bytes
+ * longlong_t/u_longlong_t: 8 bytes
+ *
+ * The C standard allows these types to be larger (and in the case of ints,
+ * shorter), so if that is the case on some compiler/architecture, the build
+ * will fail (on purpose).
+ *
+ * If someone wants to fix the code to work properly on such environments, then:
+ *
+ * 1) Preconditions should be added to xdrmem_enc functions to make sure the
+ *    caller doesn't pass arguments which exceed the expected range.
+ * 2) Functions which take signed integers should be changed to properly do
+ *    sign extension.
+ * 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
+ *    problems than this implementation.
+ *
+ * It is also assumed that:
+ *
+ * 1) Chars have 8 bits.
+ * 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
+ *    memcpy, memset and memcmp.
+ * 3) Arrays passed to xdr_array() are packed and the compiler/architecture
+ *    supports element-sized-aligned memory accesses.
+ * 4) Negative integers are natively stored in two's complement binary
+ *    representation.
+ *
+ * No checks are done for the 4 assumptions above, though.
+ *
+ * === Caller expectations ===
+ *
+ * Existing documentation does not describe the semantics of XDR operations very
+ * well.  Therefore, some assumptions about failure semantics will be made and
+ * will be described below:
+ *
+ * 1) If any encoding operation fails (e.g., due to lack of buffer space), the
+ * the stream should be considered valid only up to the encoding operation
+ * previous to the one that first failed. However, the stream size as returned
+ * by xdr_control() cannot be considered to be strictly correct (it may be
+ * bigger).
+ *
+ * Putting it another way, if there is an encoding failure it's undefined
+ * whether anything is added to the stream in that operation and therefore
+ * neither xdr_control() nor future encoding operations on the same stream can
+ * be relied upon to produce correct results.
+ *
+ * 2) If a decoding operation fails, it's undefined whether anything will be
+ * decoded into passed buffers/pointers during that operation, or what the
+ * values on those buffers will look like.
+ *
+ * Future decoding operations on the same stream will also have similar
+ * undefined behavior.
+ *
+ * 3) When the first decoding operation fails it is OK to trust the results of
+ * previous decoding operations on the same stream, as long as the caller
+ * expects a failure to be possible (e.g. due to end-of-stream).
+ *
+ * However, this is highly discouraged because the caller should know the
+ * stream size and should be coded to expect any decoding failure to be data
+ * corruption due to hardware, accidental or even malicious causes, which should
+ * be handled gracefully in all cases.
+ *
+ * In very rare situations where there are strong reasons to believe the data
+ * can be trusted to be valid and non-tampered with, then the caller may assume
+ * a decoding failure to be a bug (e.g. due to mismatched data types) and may
+ * fail non-gracefully.
+ *
+ * 4) Non-zero padding bytes will cause the decoding operation to fail.
+ *
+ * 5) Zero bytes on string types will also cause the decoding operation to fail.
+ *
+ * 6) It is assumed that either the pointer to the stream buffer given by the
+ * caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
+ * memory accesses.
+ *
+ * 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
+ *
+ * 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
+ * space or MMIO space), the computer may explode.
+ */
+
+static struct xdr_ops xdrmem_encode_ops;
+static struct xdr_ops xdrmem_decode_ops;
+
+void
+xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
+    const enum xdr_op op)
+{
+	switch (op) {
+		case XDR_ENCODE:
+			xdrs->x_ops = &xdrmem_encode_ops;
+			break;
+		case XDR_DECODE:
+			xdrs->x_ops = &xdrmem_decode_ops;
+			break;
+		default:
+			printf("SPL: Invalid op value: %d\n", op);
+			xdrs->x_ops = NULL; /* Let the caller know we failed */
+			return;
+	}
+
+	xdrs->x_op = op;
+	xdrs->x_addr = addr;
+	xdrs->x_addr_end = addr + size;
+
+	if (xdrs->x_addr_end < xdrs->x_addr) {
+		printf("SPL: Overflow while creating xdrmem: %p, %u\n", addr,
+		    size);
+		xdrs->x_ops = NULL;
+	}
+}
+EXPORT_SYMBOL(xdrmem_create);
+
+static bool_t
+xdrmem_control(XDR *xdrs, int req, void *info)
+{
+	struct xdr_bytesrec *rec = (struct xdr_bytesrec *)info;
+
+	if (req != XDR_GET_BYTES_AVAIL) {
+		printf("SPL: Called with unknown request: %d\n", req);
+		return (FALSE);
+	}
+
+	rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
+	rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	uint_t size = roundup(cnt, 4);
+	uint_t pad;
+
+	if (size < cnt)
+		return (FALSE); /* Integer overflow */
+
+	if (xdrs->x_addr > xdrs->x_addr_end)
+		return (FALSE);
+
+	if (xdrs->x_addr_end - xdrs->x_addr < size)
+		return (FALSE);
+
+	memcpy(xdrs->x_addr, cp, cnt);
+
+	xdrs->x_addr += cnt;
+
+	pad = size - cnt;
+	if (pad > 0) {
+		memset(xdrs->x_addr, 0, pad);
+		xdrs->x_addr += pad;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
+{
+	static uint32_t zero = 0;
+	uint_t size = roundup(cnt, 4);
+	uint_t pad;
+
+	if (size < cnt)
+		return (FALSE); /* Integer overflow */
+
+	if (xdrs->x_addr > xdrs->x_addr_end)
+		return (FALSE);
+
+	if (xdrs->x_addr_end - xdrs->x_addr < size)
+		return (FALSE);
+
+	memcpy(cp, xdrs->x_addr, cnt);
+	xdrs->x_addr += cnt;
+
+	pad = size - cnt;
+	if (pad > 0) {
+		/* An inverted memchr() would be useful here... */
+		if (memcmp(&zero, xdrs->x_addr, pad) != 0)
+			return (FALSE);
+
+		xdrs->x_addr += pad;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
+{
+	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
+		return (FALSE);
+
+	*((uint32_t *)xdrs->x_addr) = BE_32(val);
+
+	xdrs->x_addr += sizeof (uint32_t);
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
+{
+	if (xdrs->x_addr + sizeof (uint32_t) > xdrs->x_addr_end)
+		return (FALSE);
+
+	*val = BE_32(*((uint32_t *)xdrs->x_addr));
+
+	xdrs->x_addr += sizeof (uint32_t);
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_char(XDR *xdrs, char *cp)
+{
+	uint32_t val;
+
+	// BUILD_BUG_ON(sizeof(char) != 1);
+	val = *((unsigned char *) cp);
+
+	return (xdrmem_enc_uint32(xdrs, val));
+}
+
+static bool_t
+xdrmem_dec_char(XDR *xdrs, char *cp)
+{
+	uint32_t val;
+
+	// BUILD_BUG_ON(sizeof(char) != 1);
+
+	if (!xdrmem_dec_uint32(xdrs, &val))
+		return (FALSE);
+
+	/*
+	 * If any of the 3 other bytes are non-zero then val will be greater
+	 * than 0xff and we fail because according to the RFC, this block does
+	 * not have a char encoded in it.
+	 */
+	if (val > 0xff)
+		return (FALSE);
+
+	*((unsigned char *) cp) = val;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
+{
+	// BUILD_BUG_ON(sizeof(unsigned short) != 2);
+
+	return (xdrmem_enc_uint32(xdrs, *usp));
+}
+
+static bool_t
+xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
+{
+	uint32_t val;
+
+	// BUILD_BUG_ON(sizeof(unsigned short) != 2);
+
+	if (!xdrmem_dec_uint32(xdrs, &val))
+		return (FALSE);
+
+	/*
+	 * Short ints are not in the RFC, but we assume similar logic as in
+	 * xdrmem_dec_char().
+	 */
+	if (val > 0xffff)
+		return (FALSE);
+
+	*usp = val;
+
+	return (TRUE);
+}
+
+static bool_t
+xdrmem_enc_uint(XDR *xdrs, unsigned *up)
+{
+	// BUILD_BUG_ON(sizeof(unsigned) != 4);
+
+	return (xdrmem_enc_uint32(xdrs, *up));
+}
+
+static bool_t
+xdrmem_dec_uint(XDR *xdrs, unsigned *up)
+{
+	// BUILD_BUG_ON(sizeof(unsigned) != 4);
+
+	return (xdrmem_dec_uint32(xdrs, (uint32_t *)up));
+}
+
+static bool_t
+xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
+{
+	// BUILD_BUG_ON(sizeof(u_longlong_t) != 8);
+
+	if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
+		return (FALSE);
+
+	return (xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff));
+}
+
+static bool_t
+xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
+{
+	uint32_t low, high;
+
+	// BUILD_BUG_ON(sizeof(u_longlong_t) != 8);
+
+	if (!xdrmem_dec_uint32(xdrs, &high))
+		return (FALSE);
+	if (!xdrmem_dec_uint32(xdrs, &low))
+		return (FALSE);
+
+	*ullp = ((u_longlong_t)high << 32) | low;
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
+    const uint_t elsize, const xdrproc_t elproc)
+{
+	uint_t i;
+	caddr_t addr = *arrp;
+
+	if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
+		return (FALSE);
+
+	if (!xdrmem_enc_uint(xdrs, sizep))
+		return (FALSE);
+
+	for (i = 0; i < *sizep; i++) {
+		if (!elproc(xdrs, addr))
+			return (FALSE);
+		addr += elsize;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
+    const uint_t elsize, const xdrproc_t elproc)
+{
+	uint_t i, size;
+	bool_t alloc = FALSE;
+	caddr_t addr;
+
+	if (!xdrmem_dec_uint(xdrs, sizep))
+		return (FALSE);
+
+	size = *sizep;
+
+	if (size > maxsize || size > UINT_MAX / elsize)
+		return (FALSE);
+
+	/*
+	 * The Solaris man page says: "If *arrp is NULL when decoding,
+	 * xdr_array() allocates memory and *arrp points to it".
+	 */
+	if (*arrp == NULL) {
+		// BUILD_BUG_ON(sizeof(uint_t) > sizeof(size_t));
+
+		*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
+		if (*arrp == NULL)
+			return (FALSE);
+
+		alloc = TRUE;
+	}
+
+	addr = *arrp;
+
+	for (i = 0; i < size; i++) {
+		if (!elproc(xdrs, addr)) {
+			if (alloc)
+				kmem_free(*arrp, size * elsize);
+			return (FALSE);
+		}
+		addr += elsize;
+	}
+
+	return (TRUE);
+}
+
+static bool_t
+xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	size_t slen = strlen(*sp);
+	uint_t len;
+
+	if (slen > maxsize)
+		return (FALSE);
+
+	len = slen;
+
+	if (!xdrmem_enc_uint(xdrs, &len))
+		return (FALSE);
+
+	return (xdrmem_enc_bytes(xdrs, *sp, len));
+}
+
+static bool_t
+xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
+{
+	uint_t size;
+	bool_t alloc = FALSE;
+
+	if (!xdrmem_dec_uint(xdrs, &size))
+		return (FALSE);
+
+	if (size > maxsize || size > UINT_MAX - 1)
+		return (FALSE);
+
+	/*
+	 * Solaris man page: "If *sp is NULL when decoding, xdr_string()
+	 * allocates memory and *sp points to it".
+	 */
+	if (*sp == NULL) {
+		// BUILD_BUG_ON(sizeof(uint_t) > sizeof(size_t));
+
+		*sp = kmem_alloc(size + 1, KM_NOSLEEP);
+		if (*sp == NULL)
+			return (FALSE);
+
+		alloc = TRUE;
+	}
+
+	if (!xdrmem_dec_bytes(xdrs, *sp, size))
+		goto fail;
+
+	if (kmemchr(*sp, 0, size) != NULL)
+		goto fail;
+
+	(*sp)[size] = '\0';
+
+	return (TRUE);
+
+fail:
+	if (alloc)
+		kmem_free(*sp, size + 1);
+
+	return (FALSE);
+}
+
+static struct xdr_ops xdrmem_encode_ops = {
+	.xdr_control		= xdrmem_control,
+	.xdr_char		= xdrmem_enc_char,
+	.xdr_u_short		= xdrmem_enc_ushort,
+	.xdr_u_int		= xdrmem_enc_uint,
+	.xdr_u_longlong_t	= xdrmem_enc_ulonglong,
+	.xdr_opaque		= xdrmem_enc_bytes,
+	.xdr_string		= xdr_enc_string,
+	.xdr_array		= xdr_enc_array
+};
+
+static struct xdr_ops xdrmem_decode_ops = {
+	.xdr_control		= xdrmem_control,
+	.xdr_char		= xdrmem_dec_char,
+	.xdr_u_short		= xdrmem_dec_ushort,
+	.xdr_u_int		= xdrmem_dec_uint,
+	.xdr_u_longlong_t	= xdrmem_dec_ulonglong,
+	.xdr_opaque		= xdrmem_dec_bytes,
+	.xdr_string		= xdr_dec_string,
+	.xdr_array		= xdr_dec_array
+};
diff --git a/module/os/macos/spl/spl-zlib.c b/module/os/macos/spl/spl-zlib.c
new file mode 100644
index 0000000000..5aa92c324a
--- /dev/null
+++ b/module/os/macos/spl/spl-zlib.c
@@ -0,0 +1,199 @@
+/*
+ *
+ *  zlib.h -- interface of the 'zlib' general purpose compression library
+ *  version 1.2.5, April 19th, 2010
+ *
+ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ *
+ *  Jean-loup Gailly
+ *  Mark Adler
+ */
+
+#include <sys/kmem.h>
+#include <sys/zmod.h>
+#include <spl-debug.h>
+#include <libkern/zlib.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define	DEBUG_SUBSYSTEM SS_ZLIB
+
+static spl_kmem_cache_t *zlib_workspace_cache;
+
+/*
+ * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
+ * and vfree for every call.  Using a kmem_cache also has the advantage
+ * that improves the odds that the memory used will be local to this cpu.
+ * To further improve things it might be wise to create a dedicated per-cpu
+ * workspace for use.  This would take some additional care because we then
+ * must disable preemption around the critical section, and verify that
+ * zlib_deflate* and zlib_inflate* never internally call schedule().
+ */
+static void *
+zlib_workspace_alloc(int flags)
+{
+	return (kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS)));
+}
+
+static void
+zlib_workspace_free(void *workspace)
+{
+	kmem_cache_free(zlib_workspace_cache, workspace);
+}
+
+/*
+ * Compresses the source buffer into the destination buffer. The level
+ * parameter has the same meaning as in deflateInit.  sourceLen is the byte
+ * length of the source buffer. Upon entry, destLen is the total size of the
+ * destination buffer, which must be at least 0.1% larger than sourceLen plus
+ * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+ *
+ * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+ * Z_STREAM_ERROR if the level parameter is invalid.
+ */
+int
+z_compress_level(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen, int level)
+{
+	z_stream stream;
+	int err;
+
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
+	if (!stream.workspace)
+		return (Z_MEM_ERROR);
+
+	err = zlib_deflateInit(&stream, level);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.workspace);
+		return (err);
+	}
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_deflateEnd(&stream);
+		zlib_workspace_free(stream.workspace);
+		return (err == Z_OK ? Z_BUF_ERROR : err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_deflateEnd(&stream);
+	zlib_workspace_free(stream.workspace);
+
+	return (err);
+}
+EXPORT_SYMBOL(z_compress_level);
+
+/*
+ * Decompresses the source buffer into the destination buffer.  sourceLen is
+ * the byte length of the source buffer. Upon entry, destLen is the total
+ * size of the destination buffer, which must be large enough to hold the
+ * entire uncompressed data. (The size of the uncompressed data must have
+ * been saved previously by the compressor and transmitted to the decompressor
+ * by some mechanism outside the scope of this compression library.)
+ * Upon exit, destLen is the actual size of the compressed buffer.
+ * This function can be used to decompress a whole file at once if the
+ * input file is mmap'ed.
+ *
+ * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ * enough memory, Z_BUF_ERROR if there was not enough room in the output
+ * buffer, or Z_DATA_ERROR if the input data was corrupted.
+ */
+int
+z_uncompress(void *dest, size_t *destLen, const void *source,
+    size_t sourceLen)
+{
+	z_stream stream;
+	int err;
+
+	stream.next_in = (Byte *)source;
+	stream.avail_in = (uInt)sourceLen;
+	stream.next_out = dest;
+	stream.avail_out = (uInt)*destLen;
+
+	if ((size_t)stream.avail_out != *destLen)
+		return (Z_BUF_ERROR);
+
+	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
+	if (!stream.workspace)
+		return (Z_MEM_ERROR);
+
+	err = zlib_inflateInit(&stream);
+	if (err != Z_OK) {
+		zlib_workspace_free(stream.workspace);
+		return (err);
+	}
+
+	err = zlib_inflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		zlib_inflateEnd(&stream);
+		zlib_workspace_free(stream.workspace);
+
+		if (err == Z_NEED_DICT ||
+		    (err == Z_BUF_ERROR && stream.avail_in == 0))
+			return (Z_DATA_ERROR);
+
+		return (err);
+	}
+	*destLen = stream.total_out;
+
+	err = zlib_inflateEnd(&stream);
+	zlib_workspace_free(stream.workspace);
+
+	return (err);
+}
+EXPORT_SYMBOL(z_uncompress);
+
+int
+spl_zlib_init(void)
+{
+	int size;
+	SENTRY;
+
+	size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+	    zlib_inflate_workspacesize());
+
+	zlib_workspace_cache = kmem_cache_create(
+	    "spl_zlib_workspace_cache",
+	    size, 0, NULL, NULL, NULL, NULL, NULL,
+	    KMC_VMEM | KMC_NOEMERGENCY);
+	if (!zlib_workspace_cache)
+		SRETURN(1);
+
+	SRETURN(0);
+}
+
+void
+spl_zlib_fini(void)
+{
+	SENTRY;
+	kmem_cache_destroy(zlib_workspace_cache);
+	zlib_workspace_cache = NULL;
+	SEXIT;
+}
diff --git a/module/os/macos/zfs/.gitignore b/module/os/macos/zfs/.gitignore
new file mode 100644
index 0000000000..aaec2f8ea2
--- /dev/null
+++ b/module/os/macos/zfs/.gitignore
@@ -0,0 +1,2 @@
+zfs
+zfs.kext
diff --git a/module/os/macos/zfs/Info.plist b/module/os/macos/zfs/Info.plist
new file mode 100644
index 0000000000..761b080738
--- /dev/null
+++ b/module/os/macos/zfs/Info.plist
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>BuildMachineOSBuild</key>
+	<string>14C1514</string>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>zfs</string>
+	<key>CFBundleIdentifier</key>
+	<string>net.lundman.zfs</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>zfs</string>
+	<key>CFBundlePackageType</key>
+	<string>KEXT</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0.0</string>
+	<key>DTCompiler</key>
+	<string>com.apple.compilers.llvm.clang.1_0</string>
+	<key>DTPlatformBuild</key>
+	<string>6C131e</string>
+	<key>DTPlatformVersion</key>
+	<string>GM</string>
+	<key>DTSDKBuild</key>
+	<string>12F37</string>
+	<key>DTSDKName</key>
+	<string>macosx10.8</string>
+	<key>DTXcode</key>
+	<string>0620</string>
+	<key>DTXcodeBuild</key>
+	<string>6C131e</string>
+	<key>IOKitPersonalities</key>
+	<dict>
+		<key>net.lundman.zfs</key>
+		<dict>
+			<key>CFBundleIdentifier</key>
+			<string>net.lundman.zfs</string>
+			<key>IOClass</key>
+			<string>net_lundman_zfs_zvol</string>
+			<key>IOMatchCategory</key>
+			<string>net_lundman_zfs_zvol</string>
+			<key>IOMediaIcon</key>
+			<dict>
+				<key>CFBundleIdentifier</key>
+				<string>net.lundman.zfs</string>
+				<key>IOBundleResourceFile</key>
+				<string>VolumeIcon.icns</string>
+			</dict>
+			<key>IOProviderClass</key>
+			<string>IOResources</string>
+			<key>IOResourceMatch</key>
+			<string>IOBSD</string>
+		</dict>
+		<key>net.lundman.zfs.ZFSDatasetProxy</key>
+		<dict>
+			<key>CFBundleIdentifier</key>
+			<string>net.lundman.zfs</string>
+			<key>IOClass</key>
+			<string>ZFSDatasetProxy</string>
+			<key>IOProbeScore</key>
+			<integer>1000</integer>
+			<key>IOMatchCategory</key>
+			<string>ZFSPool</string>
+			<key>IOProviderClass</key>
+			<string>ZFSPool</string>
+		</dict>
+		<key>net.lundman.zfs.ZFSDatasetScheme</key>
+		<dict>
+			<key>CFBundleIdentifier</key>
+			<string>net.lundman.zfs</string>
+			<key>IOClass</key>
+			<string>ZFSDatasetScheme</string>
+			<key>IOProbeScore</key>
+			<integer>5000</integer>
+			<key>IOMatchCategory</key>
+			<string>IOStorage</string>
+			<key>IOPropertyMatch</key>
+			<dict>
+				<key>Whole</key>
+				<true/>
+			</dict>
+			<key>IOProviderClass</key>
+			<string>IOMedia</string>
+		</dict>
+	</dict>
+	<key>NSHumanReadableCopyright</key>
+	<string>CDDL (ZFS), BSD (FreeBSD), Copyright © 2012-2020 OpenZFS on OS X. All rights reserved.</string>
+	<key>OSBundleCompatibleVersion</key>
+	<string>1.0.0</string>
+	<key>OSBundleLibraries</key>
+	<dict>
+		<key>com.apple.iokit.IOStorageFamily</key>
+		<string>1.6</string>
+		<key>com.apple.kpi.bsd</key>
+		<string>8.0.0</string>
+		<key>com.apple.kpi.iokit</key>
+		<string>8.0.0</string>
+		<key>com.apple.kpi.libkern</key>
+		<string>10.0</string>
+		<key>com.apple.kpi.mach</key>
+		<string>8.0.0</string>
+		<key>com.apple.kpi.unsupported</key>
+		<string>8.0.0</string>
+		<key>net.lundman.kernel.dependencies</key>
+		<string>12.5.0</string>
+	</dict>
+</dict>
+</plist>
diff --git a/module/os/macos/zfs/InfoPlist.strings b/module/os/macos/zfs/InfoPlist.strings
new file mode 100644
index 0000000000..0c67376eba
--- /dev/null
+++ b/module/os/macos/zfs/InfoPlist.strings
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict/>
+</plist>
diff --git a/module/os/macos/zfs/Makefile.am b/module/os/macos/zfs/Makefile.am
new file mode 100644
index 0000000000..2ff35cb52b
--- /dev/null
+++ b/module/os/macos/zfs/Makefile.am
@@ -0,0 +1,345 @@
+
+INFO_PLIST = Info.plist
+PLIST_STRING = InfoPlist.strings
+
+ZFS_META_VERSION = @ZFS_META_VERSION@
+ZFS_DEBUG_STR = @ZFS_DEBUG_STR@
+
+zfs_CPPFLAGS = \
+	-Wall \
+	-nostdinc \
+	-mkernel \
+	-fno-builtin-printf \
+	-D__KERNEL__ \
+	-D_KERNEL \
+	-DKERNEL \
+	-DKERNEL_PRIVATE \
+	-DDRIVER_PRIVATE \
+	-DNAMEDSTREAMS=1 \
+	-DAPPLE \
+	-DNeXT \
+	-I$(top_srcdir)/include/os/macos/spl \
+	-I$(top_srcdir)/include/os/macos/zfs \
+	-I$(top_srcdir)/module/icp/include \
+	-I$(top_srcdir)/include \
+	-I@KERNEL_HEADERS@/Headers \
+	-I@KERNEL_HEADERS@/PrivateHeaders
+
+zfs_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
+
+zfs_CFLAGS =
+zfs_CXXFLAGS =
+
+zfs_LDFLAGS = \
+	-Xlinker \
+	-kext \
+	-nostdlib \
+	-lkmodc++ \
+	-lkmod \
+	-lcc_kext
+
+zfs_LDADD = \
+	$(top_builddir)/module/os/macos/spl/libspl.la
+
+zfs_LIBS =
+
+# If we don't set this to nothing, it adds "-lz -liconv"
+LIBS =
+
+bin_PROGRAMS = zfs.kext
+noinst_PROGRAMS = zfs
+
+zfs_kext_SOURCE =
+
+if TARGET_CPU_X86_64
+zfs_ASM_SOURCES_C = \
+	../../../icp/asm-x86_64/aes/aeskey.c \
+	../../../icp/algs/modes/gcm_pclmulqdq.c \
+	../../../zcommon/zfs_fletcher_intel.c \
+	../../../zcommon/zfs_fletcher_sse.c \
+	../../../zcommon/zfs_fletcher_avx512.c \
+	../../../zfs/vdev_raidz_math_sse2.c \
+	../../../zfs/vdev_raidz_math_ssse3.c \
+	../../../zfs/vdev_raidz_math_avx2.c \
+	../../../zfs/vdev_raidz_math_avx512f.c \
+	../../../zfs/vdev_raidz_math_avx512bw.c
+zfs_ASM_SOURCES_AS = \
+	../../../icp/asm-x86_64/os/macos/aes/aes_amd64.S \
+	../../../icp/asm-x86_64/os/macos/aes/aes_aesni.S \
+	../../../icp/asm-x86_64/os/macos/modes/gcm_pclmulqdq.S \
+	../../../icp/asm-x86_64/os/macos/sha1/sha1-x86_64.S \
+	../../../icp/asm-x86_64/os/macos/sha2/sha256_impl.S \
+	../../../icp/asm-x86_64/os/macos/sha2/sha512_impl.S
+else
+zfs_ASM_SOURCES_C =
+zfs_ASM_SOURCES_AS =
+endif
+
+zfs_SOURCES = \
+	../../../zfs/abd.c \
+	abd_os.c \
+	../../../zfs/aggsum.c \
+	../../../zfs/arc.c \
+	arc_os.c \
+	../../../avl/avl.c \
+	../../../zfs/blkptr.c \
+	../../../zfs/bplist.c \
+	../../../zfs/bpobj.c \
+	../../../zfs/bptree.c \
+	../../../zfs/bqueue.c \
+	../../../zfs/btree.c \
+	../../../zcommon/cityhash.c \
+	../../../zfs/dbuf.c \
+	../../../zfs/dbuf_stats.c \
+	../../../zfs/ddt.c \
+	../../../zfs/ddt_zap.c \
+	../../../zfs/dmu.c \
+	../../../zfs/dmu_diff.c \
+	../../../zfs/dmu_object.c \
+	../../../zfs/dmu_objset.c \
+	../../../zfs/dmu_recv.c \
+	../../../zfs/dmu_redact.c \
+	../../../zfs/dmu_send.c \
+	../../../zfs/dmu_traverse.c \
+	../../../zfs/dmu_tx.c \
+	../../../zfs/dmu_zfetch.c \
+	../../../zfs/dnode.c \
+	../../../zfs/dnode_sync.c \
+	../../../zfs/dsl_bookmark.c \
+	../../../zfs/dsl_crypt.c \
+	../../../zfs/dsl_dataset.c \
+	../../../zfs/dsl_deadlist.c \
+	../../../zfs/dsl_deleg.c \
+	../../../zfs/dsl_destroy.c \
+	../../../zfs/dsl_dir.c \
+	../../../zfs/dsl_pool.c \
+	../../../zfs/dsl_prop.c \
+	../../../zfs/dsl_scan.c \
+	../../../zfs/dsl_synctask.c \
+	../../../zfs/dsl_userhold.c \
+	../../../zfs/edonr_zfs.c \
+	../../../zfs/fm.c \
+	../../../zfs/gzip.c \
+	../../../zfs/hkdf.c \
+	ldi_osx.c \
+	ldi_vnode.c \
+	ldi_iokit.cpp \
+	../../../zfs/lz4.c \
+	../../../zfs/lzjb.c \
+	../../../zfs/metaslab.c \
+	../../../zfs/mmp.c \
+	../../../zfs/multilist.c \
+	../../../zfs/objlist.c \
+	../../../zfs/pathname.c \
+	../../../zfs/range_tree.c \
+	../../../zfs/refcount.c \
+	../../../zfs/rrwlock.c \
+	../../../zfs/sa.c \
+	../../../zfs/sha256.c \
+	../../../zfs/skein_zfs.c \
+	../../../zfs/spa.c \
+	../../../zfs/spa_boot.c \
+	../../../zfs/spa_checkpoint.c \
+	../../../zfs/spa_config.c \
+	../../../zfs/spa_errlog.c \
+	../../../zfs/spa_history.c \
+	../../../zfs/spa_log_spacemap.c \
+	../../../zfs/spa_misc.c \
+	spa_misc_os.c \
+	spa_stats.c \
+	../../../zfs/space_map.c \
+	../../../zfs/space_reftree.c \
+	../../../zfs/txg.c \
+	../../../zfs/uberblock.c \
+	../../../zfs/unique.c \
+	../../../zfs/vdev.c \
+	../../../zfs/vdev_cache.c \
+	vdev_disk.c \
+	vdev_file.c \
+	../../../zfs/vdev_indirect.c \
+	../../../zfs/vdev_indirect_births.c \
+	../../../zfs/vdev_indirect_mapping.c \
+	../../../zfs/vdev_initialize.c \
+	../../../zfs/vdev_label.c \
+	../../../zfs/vdev_mirror.c \
+	../../../zfs/vdev_missing.c \
+	../../../zfs/vdev_queue.c \
+	../../../zfs/vdev_raidz.c \
+	../../../zfs/vdev_raidz_math.c \
+	../../../zfs/vdev_raidz_math_scalar.c \
+	../../../zfs/vdev_removal.c \
+	../../../zfs/vdev_root.c \
+	../../../zfs/vdev_trim.c \
+	../../../zfs/zap.c \
+	../../../zfs/zap_leaf.c \
+	../../../zfs/zap_micro.c \
+	../../../zfs/zcp.c \
+	../../../zfs/zcp_get.c \
+	../../../zfs/zcp_global.c \
+	../../../zfs/zcp_iter.c \
+	../../../zfs/zcp_set.c \
+	../../../zfs/zcp_synctask.c \
+	../../../zfs/zfeature.c \
+	../../../zcommon/zfeature_common.c \
+	zfs_acl.c \
+	zfs_boot.cpp \
+	../../../zfs/zfs_byteswap.c \
+	zfs_ctldir.c \
+	zfs_debug.c \
+	zfs_dir.c \
+	../../../zfs/zfs_fm.c \
+	zfs_file_os.c \
+	../../../zfs/zfs_fuid.c \
+	zfs_fuid_os.c \
+	../../../zfs/zfs_ioctl.c \
+	zfs_ioctl_os.c \
+	zfs_kstat_osx.c \
+	../../../zfs/zfs_log.c \
+	../../../zfs/zfs_onexit.c \
+	zfs_osx.cpp \
+	../../../zfs/zfs_quota.c \
+	../../../zfs/zfs_ratelimit.c \
+	../../../zfs/zfs_replay.c \
+	../../../zfs/zfs_rlock.c \
+	../../../zfs/zfs_sa.c \
+	zfs_vfsops.c \
+	zfs_vnops.c \
+	zfs_vnops_osx.c \
+	zfs_vnops_osx_lib.c \
+	zfs_znode.c \
+	../../../zfs/zil.c \
+	../../../zfs/zio.c \
+	../../../zfs/zio_checksum.c \
+	zio_crypt.c \
+	../../../zfs/zio_compress.c \
+	../../../zfs/zio_inject.c \
+	../../../zfs/zle.c \
+	../../../zfs/zrlock.c \
+	../../../zfs/zthr.c \
+	../../../zfs/zvol.c \
+	zvol_os.c \
+	zvolIO.cpp \
+	ZFSDatasetProxy.cpp \
+	ZFSDatasetScheme.cpp \
+	ZFSDataset.cpp \
+	ZFSPool.cpp \
+	../../../nvpair/fnvpair.c \
+	../../../nvpair/nvpair.c \
+	../../../nvpair/nvpair_alloc_fixed.c \
+	../../../nvpair/nvpair_alloc_spl.c \
+	../../../unicode/u8_textprep.c \
+	../../../unicode/uconv.c \
+	../../../zcommon/zfs_comutil.c \
+	../../../zcommon/zfs_deleg.c \
+	../../../zcommon/zfs_fletcher.c \
+	../../../zcommon/zfs_fletcher_superscalar.c \
+	../../../zcommon/zfs_fletcher_superscalar4.c \
+	../../../zcommon/zfs_namecheck.c \
+	../../../zcommon/zfs_prop.c \
+	../../../zcommon/zpool_prop.c \
+	../../../zcommon/zprop_common.c \
+	../../../icp/api/kcf_cipher.c \
+	../../../icp/api/kcf_digest.c \
+	../../../icp/api/kcf_mac.c \
+	../../../icp/api/kcf_miscapi.c \
+	../../../icp/api/kcf_ctxops.c \
+	../../../icp/core/kcf_callprov.c \
+	../../../icp/core/kcf_prov_tabs.c \
+	../../../icp/core/kcf_sched.c \
+	../../../icp/core/kcf_mech_tabs.c \
+	../../../icp/core/kcf_prov_lib.c \
+	../../../icp/spi/kcf_spi.c \
+	../../../icp/io/aes.c \
+	../../../icp/io/edonr_mod.c \
+	../../../icp/io/sha2_mod.c \
+	../../../icp/io/sha1_mod.c \
+	../../../icp/io/skein_mod.c \
+	../../../icp/os/modhash.c \
+	../../../icp/os/modconf.c \
+	../../../icp/algs/edonr/edonr.c \
+	../../../icp/algs/modes/cbc.c \
+	../../../icp/algs/modes/ccm.c \
+	../../../icp/algs/modes/ctr.c \
+	../../../icp/algs/modes/ecb.c \
+	../../../icp/algs/modes/gcm_generic.c \
+	../../../icp/algs/modes/gcm.c \
+	../../../icp/algs/modes/modes.c \
+	../../../icp/algs/sha2/sha2.c \
+	../../../icp/algs/skein/skein.c \
+	../../../icp/algs/skein/skein_block.c \
+	../../../icp/algs/skein/skein_iv.c \
+	../../../icp/algs/aes/aes_impl_aesni.c \
+	../../../icp/algs/aes/aes_impl_generic.c \
+	../../../icp/algs/aes/aes_impl_x86-64.c \
+	../../../icp/algs/aes/aes_impl.c \
+	../../../icp/algs/aes/aes_modes.c \
+	../../../icp/illumos-crypto.c \
+	../../../lua/lapi.c \
+	../../../lua/lauxlib.c \
+	../../../lua/lbaselib.c \
+	../../../lua/lcode.c \
+	../../../lua/lcompat.c \
+	../../../lua/lcorolib.c \
+	../../../lua/lctype.c \
+	../../../lua/ldebug.c \
+	../../../lua/ldo.c \
+	../../../lua/lfunc.c \
+	../../../lua/lgc.c \
+	../../../lua/llex.c \
+	../../../lua/lmem.c \
+	../../../lua/lobject.c \
+	../../../lua/lopcodes.c \
+	../../../lua/lparser.c \
+	../../../lua/lstate.c \
+	../../../lua/lstring.c \
+	../../../lua/lstrlib.c \
+	../../../lua/ltable.c \
+	../../../lua/ltablib.c \
+	../../../lua/ltm.c \
+	../../../lua/lvm.c \
+	../../../lua/lzio.c \
+	../../../lua/setjmp/setjmp.S \
+        $(zfs_ASM_SOURCES_C) \
+        $(zfs_ASM_SOURCES_AS)
+
+# Ensure these files are always built with -O2 to avoid stack overflow.
+../../../zfs/zfs-dsl_scan.$(OBJEXT): CFLAGS := $(CFLAGS:-O0%=-O2)
+../../../lua/zfs-lvm.$(OBJEXT): CFLAGS := $(CFLAGS:-O0%=-O2)
+
+KERNEL_MODDIR=  $(DESTDIR)@KERNEL_MODPREFIX@/zfs.kext
+
+dist_noinst_DATA = $(PLIST_STRING) $(INFO_PLIST)
+
+zfs.kext$(EXEEXT): zfs $(PLIST_STRING) $(INFO_PLIST)
+	@echo ""
+	@mkdir -p zfs.kext/Contents/Resources/English.lproj zfs.kext/Contents/MacOS
+	@cp -f $(INFO_PLIST) zfs.kext/Contents/
+	/usr/libexec/PlistBuddy -c "Set :CFBundleShortVersionString $(ZFS_META_VERSION)" zfs.kext/Contents/Info.plist
+	/usr/libexec/PlistBuddy -c "Delete :OSBundleLibraries:net.lundman.kernel.dependencies" zfs.kext/Contents/Info.plist
+	/usr/libexec/PlistBuddy -c "Add :OSBundleLibraries:net.lundman.kernel.dependencies.$(ZFS_META_VERSION) string 12.5.0" zfs.kext/Contents/Info.plist
+	@cp -f $(PLIST_STRING) zfs.kext/Contents/Resources/English.lproj/
+	@cp -f zfs zfs.kext/Contents/MacOS/
+	@mkdir -p zfs.kext/Contents/PlugIns/KernelExports.kext/
+	@cp -f $(top_srcdir)/module/os/macos/kernel/kernelexports zfs.kext/Contents/PlugIns/KernelExports.kext/KernelExports
+	@cp -f $(top_srcdir)/module/os/macos/kernel/Info.plist zfs.kext/Contents/PlugIns/KernelExports.kext/
+	/usr/libexec/PlistBuddy -c "Set :CFBundleIdentifier net.lundman.kernel.dependencies.$(ZFS_META_VERSION)" zfs.kext/Contents/PlugIns/KernelExports.kext/Info.plist
+	/usr/libexec/PlistBuddy -c "Add :OSBundleRequired string Root" zfs.kext/Contents/Info.plist
+	cp -f $(top_srcdir)/module/os/macos/kernel/version.plist zfs.kext/Contents/PlugIns/KernelExports.kext/
+	@kextlibs -unsupported -undef-symbols -xml zfs.kext/ || echo "Ignoring errors..(Most of these are expected)" | grep -v -f $(top_srcdir)/module/os/macos/kernel/zfs.exports
+
+install-exec-local: zfs.kext
+	rm -rf $(KERNEL_MODDIR)
+	mkdir -p $(KERNEL_MODDIR)
+	rsync -r zfs.kext/ $(KERNEL_MODDIR)
+	chown -R root:wheel $(KERNEL_MODDIR) || echo "Unable to chown root:wheel $(KERNEL_MODDIR)"
+	@echo
+	@echo "To load module: kextload -v $(KERNEL_MODDIR)"
+	@echo "To uninstall module: rm -rf $(KERNEL_MODDIR)"
+	@echo
+
+uninstall-am:
+	rm -rf $(KERNEL_MODDIR)
+
+clean:
+	rm -rf zfs.kext/
+	rm -f *.o *.lo zfs
diff --git a/module/os/macos/zfs/ZFSDataset.cpp b/module/os/macos/zfs/ZFSDataset.cpp
new file mode 100644
index 0000000000..9dde5613f6
--- /dev/null
+++ b/module/os/macos/zfs/ZFSDataset.cpp
@@ -0,0 +1,867 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * ZFSDataset - proxy disk for legacy and com.apple.devicenode mounts.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/storage/IOBlockStorageDevice.h>
+#include <sys/ZFSDatasetScheme.h>
+#include <sys/ZFSDataset.h>
+#include <sys/spa_impl.h>
+#include <sys/dsl_prop.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/ZFSPool.h>
+
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#ifdef	dprintf
+#undef	dprintf
+#endif
+#define	dprintf(fmt, ...) do {						\
+	IOLog("ZFSDataset %s " fmt "\n", __func__, ##__VA_ARGS__);	\
+_NOTE(CONSTCOND) } while (0)
+#else
+#ifndef dprintf
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif
+#endif /* if DEBUG or ZFS_DEBUG */
+
+#define	DPRINTF_FUNC()	do { dprintf(""); } while (0);
+
+OSDefineMetaClassAndStructors(ZFSDataset, IOMedia);
+
+#if 0
+/* XXX Only for debug tracing */
+bool
+ZFSDataset::open(IOService *client,
+	    IOOptionBits options, IOStorageAccess access)
+{
+	bool ret;
+	DPRINTF_FUNC();
+
+	ret = IOMedia::open(client, options, access);
+
+	dprintf("ZFSDataset %s ret %d", ret);
+	return (ret);
+}
+
+bool
+ZFSDataset::isOpen(const IOService *forClient) const
+{
+	DPRINTF_FUNC();
+	return (false);
+}
+
+void
+ZFSDataset::close(IOService *client,
+	    IOOptionBits options)
+{
+	DPRINTF_FUNC();
+	IOMedia::close(client, options);
+}
+
+bool
+ZFSDataset::handleOpen(IOService *client,
+	    IOOptionBits options, void *access)
+{
+	bool ret;
+	DPRINTF_FUNC();
+
+	ret = IOMedia::handleOpen(client, options, access);
+
+	dprintf("ZFSDataset %s ret %d", ret);
+	return (ret);
+}
+
+bool
+ZFSDataset::handleIsOpen(const IOService *client) const
+{
+	bool ret;
+	DPRINTF_FUNC();
+
+	ret = IOMedia::handleIsOpen(client);
+
+	dprintf("ZFSDataset %s ret %d", ret);
+	return (ret);
+}
+
+void
+ZFSDataset::handleClose(IOService *client,
+    IOOptionBits options)
+{
+	DPRINTF_FUNC();
+	IOMedia::handleClose(client, options);
+}
+
+bool
+ZFSDataset::attach(IOService *provider)
+{
+	DPRINTF_FUNC();
+	return (IOMedia::attach(provider));
+}
+
+void
+ZFSDataset::detach(IOService *provider)
+{
+	DPRINTF_FUNC();
+	IOMedia::detach(provider);
+}
+
+bool
+ZFSDataset::start(IOService *provider)
+{
+	DPRINTF_FUNC();
+	return (IOMedia::start(provider));
+}
+
+void
+ZFSDataset::stop(IOService *provider)
+{
+	DPRINTF_FUNC();
+	IOMedia::stop(provider);
+}
+#endif
+
+/* XXX Only for debug tracing */
+void
+ZFSDataset::free()
+{
+	DPRINTF_FUNC();
+	IOMedia::free();
+}
+
+/*
+ * Override init to call IOMedia init then setup properties.
+ */
+bool
+ZFSDataset::init(UInt64 base, UInt64 size,
+    UInt64 preferredBlockSize,
+    IOMediaAttributeMask attributes,
+    bool isWhole, bool isWritable,
+    const char *contentHint,
+    OSDictionary *properties)
+{
+	OSDictionary *newProps = NULL, *deviceDict;
+	OSNumber *physSize, *logSize;
+#if 0
+	OSDictionary *protocolDict;
+	const OSSymbol *virtualSymbol, *internalSymbol;
+#endif
+	bool ret;
+
+	DPRINTF_FUNC();
+
+	/* Clone or create new properties dictionary */
+	if (properties) newProps = OSDictionary::withDictionary(properties);
+	if (!newProps) newProps = OSDictionary::withCapacity(2);
+
+	/* Allocate dictionaries, numbers, and string symbols */
+	deviceDict = OSDictionary::withCapacity(2);
+#if 0
+	protocolDict = OSDictionary::withCapacity(2);
+#endif
+
+	physSize = OSNumber::withNumber(4096, 32);
+	logSize = OSNumber::withNumber(512, 32);
+
+#if 0
+	kIOPropertyPhysicalInterconnectTypeVirtual
+	    kIOPropertyPhysicalInterconnectTypeKey
+	    kIOPropertyInterconnectFileKey
+	    kIOPropertyInternalKey
+	    kIOPropertyPhysicalInterconnectLocationKey
+
+	    kIOPropertyProtocolCharacteristicsKey
+	    kIOPropertyMediumTypeKey
+	    kIOPropertyLogicalBlockSizeKey
+	    kIOPropertyPhysicalBlockSizeKey
+	    kIOPropertyBytesPerPhysicalSectorKey
+	    kIOPropertyDeviceCharacteristicsKey
+	    kIOBlockStorageDeviceTypeKey
+	    kIOBlockStorageDeviceTypeGeneric
+#endif
+
+#if 0
+	    virtualSymbol = OSSymbol::withCString(
+	    kIOPropertyPhysicalInterconnectTypeVirtual);
+	internalSymbol = OSSymbol::withCString(
+	    kIOPropertyInternalKey);
+#endif
+
+	/* Validate allocations */
+	if (!newProps || !deviceDict || !physSize || !logSize
+#if 0
+	    // || !protocolDict || !virtualSymbol || !internalSymbol
+#endif
+	    ) {
+		dprintf("symbol allocation failed");
+		OSSafeReleaseNULL(newProps);
+		OSSafeReleaseNULL(deviceDict);
+#if 0
+		OSSafeReleaseNULL(protocolDict);
+#endif
+		OSSafeReleaseNULL(physSize);
+		OSSafeReleaseNULL(logSize);
+#if 0
+		OSSafeReleaseNULL(virtualSymbol);
+		OSSafeReleaseNULL(internalSymbol);
+#endif
+		return (false);
+	}
+
+	/* Setup device characteristics */
+	deviceDict->setObject(kIOPropertyPhysicalBlockSizeKey, physSize);
+	deviceDict->setObject(kIOPropertyLogicalBlockSizeKey, logSize);
+	OSSafeReleaseNULL(physSize);
+	OSSafeReleaseNULL(logSize);
+
+#if 0
+	/* Setup protocol characteristics */
+	protocolDict->setObject(kIOPropertyPhysicalInterconnectTypeKey,
+	    virtualSymbol);
+	protocolDict->setObject(kIOPropertyPhysicalInterconnectLocationKey,
+	    internalSymbol);
+	OSSafeReleaseNULL(virtualSymbol);
+	OSSafeReleaseNULL(internalSymbol);
+#endif
+
+	/* XXX Setup required IOMedia props */
+
+	/* Set new device and protocol dictionaries */
+	if (newProps->setObject(kIOPropertyDeviceCharacteristicsKey,
+	    deviceDict) == false
+#if 0
+		// ||
+	    // newProps->setObject(kIOPropertyProtocolCharacteristicsKey,
+	    // protocolDict) == false
+#endif
+	    ) {
+		dprintf("setup properties failed");
+		OSSafeReleaseNULL(newProps);
+		OSSafeReleaseNULL(deviceDict);
+#if 0
+		OSSafeReleaseNULL(protocolDict);
+#endif
+		return (false);
+	}
+	OSSafeReleaseNULL(deviceDict);
+#if 0
+	OSSafeReleaseNULL(protocolDict);
+#endif
+
+	/* Call IOMedia init with size and newProps */
+	ret = IOMedia::init(base, size, preferredBlockSize,
+	    attributes, isWhole, isWritable, contentHint,
+	    newProps);
+	OSSafeReleaseNULL(newProps);
+
+	if (!ret) dprintf("IOMedia init failed");
+
+	return (ret);
+
+#if 0
+	/* Get current device and protocol dictionaries */
+	lockForArbitration();
+	oldDeviceDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOStorageDeviceCharacteristicsKey));
+	oldProtocolDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOStorageProtocolCharacteristicsKey));
+	if (oldDeviceDict) oldDeviceDict->retain();
+	if (oldProtocolDict) oldProtocolDict->retain();
+	unlockForArbitration();
+
+	/* Clone existing dictionaries */
+	if (oldDeviceDict) {
+		newDeviceDict = OSDictionary::withDict(oldDeviceDict);
+		OSSafeReleaseNULL(oldDeviceDict);
+	}
+	if (oldProtocolDict) {
+		newProtocolDict = OSDictionary::withDict(oldProtocolDict);
+		OSSafeReleaseNULL(oldDeviceDict);
+	}
+
+	/* Make new if missing */
+	if (!newDeviceDict)
+		newDeviceDict = OSDictionary::withCapacity(2);
+	if (!newProtocolDict)
+		newProtocolDict = OSDictionary::withCapacity(2);
+
+	/* Setup device characteristics */
+	newDeviceDict->setObject(kIOStoragePhysicalBlocksizeKey, physSize);
+	newDeviceDict->setObject(kIOStorageLogicalBlocksizeKey, logSize);
+	OSSafeReleaseNULL(physSize);
+	OSSafeReleaseNULL(logSize);
+
+	/* Setup protocol characteristics */
+	newProtocolDict->setObject(kIOStorageProtocolInterconnectTypeKey,
+	    virtualSymbol);
+	newProtocolDict->setObject(kIOStorageProtocolInterconnectNameKey,
+	    internalSymbol);
+	OSSafeReleaseNULL(virtualSymbol);
+	OSSafeReleaseNULL(internalSymbol);
+
+	/* XXX Setup required IOMedia props */
+
+	/* Set new device and protocol dictionaries */
+	lockForArbitration();
+	setProperty(kIOStorageDeviceCharacteristicsKey, newDeviceDict);
+	setProperty(kIOStorageProtocolCharacteristicsKey, newProtocolDict);
+	unlockForArbitration();
+
+	/* Cleanup and return success */
+	OSSafeReleaseNULL(newDeviceDict);
+	OSSafeReleaseNULL(newProtocolDict);
+	return (true);
+#endif
+}
+
+/*
+ * Set both the IOService name and the ZFS Dataset property.
+ */
+bool
+ZFSDataset::setDatasetName(const char *name)
+{
+	OSDictionary *prevDict, *newDict = NULL;
+	OSString *datasetString;
+	const char *newname;
+
+	if (!name || name[0] == '\0') {
+		dprintf("missing name");
+		return (false);
+	}
+
+	if ((newname = strrchr(name, '/')) == NULL) {
+		newname = name;
+	} else {
+		/* Advance beyond slash */
+		newname++;
+	}
+
+#if 0
+	size_t len;
+	/* Length of IOMedia name plus null terminator */
+	len = (strlen(kZFSIOMediaPrefix) + strlen(name) +
+	    strlen(kZFSIOMediaSuffix) + 1);
+	// len = strlen("ZFS ") + strlen(name) + strlen(" Media") + 1;
+
+	newname = (char *)kmem_alloc(len, KM_SLEEP);
+#endif
+	datasetString = OSString::withCString(name);
+
+#if 0
+	nameString = OSString::withCString(newname);
+	if (newname == NULL || nameString == NULL) {
+		dprintf("couldn't make name strings");
+		OSSafeReleaseNULL(nameString);
+		if (newname) kmem_free(newname, len);
+		return (false);
+	}
+#else
+	if (datasetString == NULL) {
+		dprintf("couldn't make name strings");
+		return (false);
+	}
+#endif
+
+#if 0
+	bzero(newname, len);
+	snprintf(newname, len, "%s%s%s", kZFSIOMediaPrefix,
+	    name, kZFSIOMediaSuffix);
+
+	ASSERT3U(strlen(newname), ==, len-1);
+#endif
+
+	/* Lock IORegistryEntry and get current prop dict */
+	lockForArbitration();
+	if ((prevDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyDeviceCharacteristicsKey))) == NULL) {
+	    /* Unlock IORegistryEntry */
+		unlockForArbitration();
+		dprintf("couldn't get prop dict");
+	}
+	prevDict->retain();
+	unlockForArbitration();
+
+	/* Clone existing dictionary */
+	if (prevDict) {
+		if ((newDict = OSDictionary::withDictionary(prevDict)) ==
+		    NULL) {
+			dprintf("couldn't clone prop dict");
+		}
+		OSSafeReleaseNULL(prevDict);
+		/* Non-fatal at the moment */
+	}
+
+	/* If prevDict did not exist or couldn't be copied, make new */
+	if (!newDict && (newDict = OSDictionary::withCapacity(1)) == NULL) {
+		dprintf("couldn't make new prop dict");
+	}
+
+	/* If we have a new or copied dict at this point */
+	if (newDict) {
+		/* Add or replace dictionary Product Name string */
+		if (newDict->setObject(kIOPropertyProductNameKey,
+		    datasetString) == false) {
+			dprintf("couldn't set name");
+			OSSafeReleaseNULL(datasetString);
+			// OSSafeReleaseNULL(nameString);
+			// kmem_free(newname, len);
+			OSSafeReleaseNULL(newDict);
+			return (false);
+		}
+
+		/* Lock IORegistryEntry and replace prop dict */
+		lockForArbitration();
+		if (setProperty(kIOPropertyDeviceCharacteristicsKey,
+		    newDict) == false) {
+			unlockForArbitration();
+			dprintf("couldn't set name");
+			OSSafeReleaseNULL(datasetString);
+			// OSSafeReleaseNULL(nameString);
+			// kmem_free(newname, len);
+			OSSafeReleaseNULL(newDict);
+			return (false);
+		}
+		unlockForArbitration();
+		OSSafeReleaseNULL(newDict);
+	}
+
+	/* Lock IORegistryEntry to replace property and set name */
+	lockForArbitration();
+	/* Assign plain ZFS Dataset name */
+	setProperty(kZFSDatasetNameKey, datasetString);
+	/* Assign IOMedia name */
+	// setName(name);
+	setName(newname);
+
+	/* Unlock IORegistryEntry and cleanup allocations */
+	unlockForArbitration();
+	// kmem_free(newname, len);
+	// OSSafeReleaseNULL(nameString);
+	return (true);
+}
+
+#if 0
+static inline uint64_t
+get_objnum(const char *name)
+{
+	objset_t *os = NULL;
+	uint64_t objnum;
+	int error;
+
+	if (!name)
+		return (0);
+
+	error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+	if (error != 0) {
+		dprintf("couldn't open dataset %d", error);
+		return (0);
+	}
+
+	objnum = dmu_objset_id(os);
+
+	dmu_objset_disown(os, FTAG);
+
+	return (objnum);
+}
+#endif
+
+/*
+ * Create a proxy device, name it appropriately, and return it.
+ */
+ZFSDataset *
+ZFSDataset::withDatasetNameAndSize(const char *name, uint64_t size)
+{
+	ZFSDataset *dataset = NULL;
+	objset_t *os = NULL;
+	OSString *uuidStr = NULL;
+	OSObject *property = NULL;
+	char uuid_cstr[37];
+	uint64_t objnum, readonly, guid;
+#if 0
+	// uint64_t ref_size, avail_size, obj_count, obj_free;
+#endif
+	uuid_t uuid;
+	int error;
+	bool isWritable;
+
+	DPRINTF_FUNC();
+
+	if (!name || name[0] == '\0') {
+		dprintf("missing name");
+		/* Nothing allocated or retained yet */
+		return (NULL);
+	}
+	bzero(uuid_cstr, sizeof (uuid_cstr));
+
+#if 0
+	OSNumber *sizeNum = NULL;
+	property = copyProperty(kZFSPoolSizeKey, gIOServicePlane,
+	    kIORegistryIterateRecursively|kIORegistryIterateParents);
+	if (!property) {
+		dprintf("couldn't get pool size");
+		/* Nothing allocated or retained yet */
+		return (NULL);
+	}
+	if ((sizeNum = OSDynamicCast(OSNumber, property)) == NULL) {
+		dprintf("couldn't cast pool size");
+		goto error;
+	}
+	size = sizeNum->unsigned64BitValue();
+	sizeNum = NULL;
+	OSSafeReleaseNULL(property);
+#endif
+
+	if (zfs_vfs_uuid_gen(name, uuid) != 0) {
+		dprintf("UUID gen failed");
+		goto error;
+	}
+	// uuid_unparse(uuid, uuid_cstr);
+	zfs_vfs_uuid_unparse(uuid, uuid_cstr);
+	// snprintf(uuid_cstr, sizeof (uuid_cstr), "");
+
+	uuidStr = OSString::withCString(uuid_cstr);
+	if (!uuidStr) {
+		dprintf("uuidStr alloc failed");
+		goto error;
+	}
+
+	dataset = new ZFSDataset;
+	if (!dataset) {
+		dprintf("allocation failed");
+		goto error;
+	}
+
+	/* Own the dmu objset to get properties */
+	error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os);
+	if (error != 0) {
+		dprintf("couldn't open dataset %d", error);
+		goto error;
+	}
+
+	/* Get the dsl_dir to lookup object number */
+	objnum = dmu_objset_id(os);
+
+#if 0
+	dmu_objset_space(os, &ref_size, &avail_size, &obj_count, &obj_free);
+#endif
+
+	// if (os->os_dsl_dataset)
+	//	guid = dsl_dataset_phys(os->os_dsl_dataset)->ds_guid;
+	guid = dmu_objset_fsid_guid(os);
+	// dsl_prop_get_integer(name, "guid", &guid, NULL) != 0) {
+
+	if (dsl_prop_get_integer(name, "readonly", &readonly, NULL) != 0) {
+		dmu_objset_disown(os, B_FALSE, FTAG);
+		dprintf("get readonly property failed");
+		goto error;
+	}
+	// size = (1<<30);
+	// isWritable = true;
+	dmu_objset_disown(os, B_FALSE, FTAG);
+
+#if 0
+	size = ref_size + avail_size;
+#endif
+
+	isWritable = (readonly == 0ULL);
+
+	if (dataset->init(/* base */ 0, size, DEV_BSIZE,
+	    /* attributes */ 0, /* isWhole */ false, isWritable,
+	    kZFSContentHint, /* properties */ NULL) == false) {
+		dprintf("init failed");
+		goto error;
+	}
+
+	if (dataset->setDatasetName(name) == false) {
+		dprintf("invalid name");
+		goto error;
+	}
+
+	/* Set media UUID */
+	dataset->setProperty(kIOMediaUUIDKey, uuidStr);
+	OSSafeReleaseNULL(uuidStr);
+
+	return (dataset);
+
+error:
+	OSSafeReleaseNULL(property);
+	OSSafeReleaseNULL(uuidStr);
+	OSSafeReleaseNULL(dataset);
+	return (NULL);
+}
+
+/*
+ * Compatibility method simulates a read but returns all zeros.
+ */
+void
+ZFSDataset::read(IOService *client,
+    UInt64 byteStart, IOMemoryDescriptor *buffer,
+    IOStorageAttributes *attributes,
+    IOStorageCompletion *completion)
+{
+	IOByteCount total, cur_len, done = 0;
+	addr64_t cur;
+
+	DPRINTF_FUNC();
+	if (!buffer) {
+		if (completion) complete(completion, kIOReturnInvalid, 0);
+		return;
+	}
+
+	total = buffer->getLength();
+
+	/* XXX Get each physical segment of the buffer and zero it */
+	while (done < total) {
+		cur_len = 0;
+		cur = buffer->getPhysicalSegment(done, &cur_len);
+		if (cur == 0) break;
+		if (cur_len != 0) bzero_phys(cur, cur_len);
+		done += cur_len;
+		ASSERT3U(done, <=, total);
+	}
+	ASSERT3U(done, ==, total);
+
+	// if (!completion || !completion->action) {
+	if (!completion) {
+		dprintf("invalid completion");
+		return;
+	}
+
+//	(completion->action)(completion->target, completion->parameter,
+//	    kIOReturnSuccess, total);
+	complete(completion, kIOReturnSuccess, total);
+}
+
+/*
+ * Compatibility method simulates a write as a no-op.
+ */
+void
+ZFSDataset::write(IOService *client,
+    UInt64 byteStart, IOMemoryDescriptor *buffer,
+    IOStorageAttributes *attributes,
+    IOStorageCompletion *completion)
+{
+	IOByteCount total;
+	DPRINTF_FUNC();
+
+	if (!buffer) {
+		if (completion) complete(completion, kIOReturnInvalid);
+		return;
+	}
+
+	total = buffer->getLength();
+
+	// if (!completion || !completion->action) {
+	if (!completion) {
+		dprintf("invalid completion");
+		return;
+	}
+
+	/* XXX No-op, just return success */
+//	(completion->action)(completion->target, completion->parameter,
+//	    kIOReturnSuccess, total);
+	complete(completion, kIOReturnSuccess, total);
+}
+
+#ifdef DEBUG
+volatile SInt64 num_sync = 0;
+#endif
+
+/*
+ * Compatibility method simulates a barrier sync as a no-op.
+ */
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+IOReturn
+ZFSDataset::synchronize(IOService *client,
+    UInt64 byteStart, UInt64 byteCount,
+    IOStorageSynchronizeOptions options)
+#else
+IOReturn
+ZFSDataset::synchronizeCache(IOService *client)
+#endif
+{
+#ifdef DEBUG
+	SInt64 cur_sync = 0;
+	DPRINTF_FUNC();
+	cur_sync = OSIncrementAtomic64(&num_sync);
+	dprintf("sync called %lld times", cur_sync);
+#endif
+
+	/* XXX Needs to report success for mount_common() */
+	return (kIOReturnSuccess);
+}
+
+/*
+ * Compatibility method returns failure (unsupported).
+ */
+IOReturn
+ZFSDataset::unmap(IOService *client,
+    IOStorageExtent *extents, UInt32 extentsCount,
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	IOStorageUnmapOptions	options)
+#else
+	UInt32	options)
+#endif
+{
+	DPRINTF_FUNC();
+	return (kIOReturnUnsupported);
+}
+
+/*
+ * Compatibility method returns failure (no result).
+ */
+IOStorage *
+ZFSDataset::copyPhysicalExtent(IOService *client,
+    UInt64 *byteStart, UInt64 *byteCount)
+{
+	DPRINTF_FUNC();
+	return (0);
+	// return (IOMedia::copyPhysicalExtent(client, byteStart, byteCount));
+}
+
+/*
+ * Compatibility method simulates lock as a no-op.
+ */
+bool
+ZFSDataset::lockPhysicalExtents(IOService *client)
+{
+	DPRINTF_FUNC();
+	// return (IOMedia::unlockPhysicalExtents(client));
+	return (true);
+}
+
+/*
+ * Compatibility method simulates unlock as a no-op.
+ */
+void
+ZFSDataset::unlockPhysicalExtents(IOService *client)
+{
+	DPRINTF_FUNC();
+	// IOMedia::unlockPhysicalExtents(client);
+}
+
+/*
+ * Compatibility method returns failure (unsupported).
+ */
+#if defined(MAC_OS_X_VERSION_10_10) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
+IOReturn
+ZFSDataset::setPriority(IOService *client,
+    IOStorageExtent *extents, UInt32 extentsCount,
+    IOStoragePriority priority)
+{
+	DPRINTF_FUNC();
+	return (kIOReturnUnsupported);
+	// return (IOMedia::setPriority(client, extents,
+	// extentsCount, priority));
+}
+#endif
+
+/*
+ * Compatibility method returns default system blocksize.
+ */
+UInt64
+ZFSDataset::getPreferredBlockSize() const
+{
+	DPRINTF_FUNC();
+	return (DEV_BSIZE);
+	// return (IOMedia::getPreferredBlockSize());
+}
+
+/* XXX Only for debug tracing */
+UInt64
+ZFSDataset::getSize() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::getSize());
+}
+
+/* XXX Only for debug tracing */
+UInt64
+ZFSDataset::getBase() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::getBase());
+}
+
+/* XXX Only for debug tracing */
+bool
+ZFSDataset::isEjectable() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::isEjectable());
+}
+
+/* XXX Only for debug tracing */
+bool
+ZFSDataset::isFormatted() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::isFormatted());
+}
+
+/* XXX Only for debug tracing */
+bool
+ZFSDataset::isWhole() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::isWhole());
+}
+
+/* XXX Only for debug tracing */
+bool
+ZFSDataset::isWritable() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::isWritable());
+}
+
+/* XXX Only for debug tracing */
+const char *
+ZFSDataset::getContent() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::getContent());
+}
+
+/* XXX Only for debug tracing */
+const char *
+ZFSDataset::getContentHint() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::getContentHint());
+}
+
+/* XXX Only for debug tracing */
+IOMediaAttributeMask
+ZFSDataset::getAttributes() const
+{
+	DPRINTF_FUNC();
+	return (IOMedia::getAttributes());
+}
diff --git a/module/os/macos/zfs/ZFSDatasetProxy.cpp b/module/os/macos/zfs/ZFSDatasetProxy.cpp
new file mode 100644
index 0000000000..4036bd74b1
--- /dev/null
+++ b/module/os/macos/zfs/ZFSDatasetProxy.cpp
@@ -0,0 +1,478 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <sys/ZFSDatasetProxy.h>
+#include <sys/ZFSPool.h>
+
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#ifdef	dprintf
+#undef	dprintf
+#endif
+#define	dprintf(fmt, ...) do {						\
+	IOLog("ZFSDatasetProxy %s " fmt "\n", __func__, ##__VA_ARGS__);	\
+_NOTE(CONSTCOND) } while (0)
+#else
+#ifndef dprintf
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif
+#endif /* if DEBUG or ZFS_DEBUG */
+
+#define	DPRINTF_FUNC()	do { dprintf(""); } while (0);
+
+/* block size is 512 B, count is 512 M blocks */
+#define	ZFS_PROXY_DEV_BSIZE	(UInt64)(1<<9)
+#define	ZFS_PROXY_DEV_BCOUNT	(UInt64)(2<<29)
+#define	kZFSProxyGUIDKey	"ZFS Pool GUID"
+#define	kZFSProxyReadOnlyKey	"ZFS Pool Read-Only"
+
+OSDefineMetaClassAndStructors(ZFSDatasetProxy, IOBlockStorageDevice);
+
+void
+ZFSDatasetProxy::free()
+{
+	char *str;
+
+	/* vendor, revision, and info share a null char */
+	if (vendorString) {
+		str = (char *)vendorString;
+		vendorString = 0;
+		if (revisionString == str) revisionString = 0;
+		if (infoString == str) infoString = 0;
+		IOFree(str, strlen(str)+1);
+	}
+
+	/* Product string contains pool name */
+	if (productString) {
+		str = (char *)productString;
+		productString = 0;
+		IOFree(str, strlen(str)+1);
+	}
+
+	IOBlockStorageDevice::free();
+}
+
+bool
+ZFSDatasetProxy::init(OSDictionary *properties)
+{
+	char *str = (char *)IOMalloc(1);
+
+	if (!str) {
+		dprintf("string allocation failed\n");
+		return (false);
+	}
+	str[0] = '\0';
+	vendorString = str;
+	revisionString = str;
+	infoString = str;
+
+	if (IOBlockStorageDevice::init(properties) == false) {
+		dprintf("BlockStorageDevice start failed");
+		goto error;
+	}
+
+	return (true);
+
+error:
+	if (str) {
+		vendorString = 0;
+		revisionString = 0;
+		infoString = 0;
+		IOFree(str, 1);
+	}
+	return (false);
+}
+
+bool
+ZFSDatasetProxy::start(IOService *provider)
+{
+	OSObject *property = NULL, *size = NULL;
+	OSString *nameString = NULL;
+	OSNumber *sizeNum = NULL;
+	OSDictionary *deviceDict = NULL, *protocolDict = NULL;
+	const OSSymbol *virtualSymbol = NULL, *internalSymbol = NULL;
+	const char *cstr = NULL;
+	char *pstring = NULL;
+	int plen = 0;
+	bool started = false;
+
+	size = copyProperty(kZFSPoolSizeKey, gIOServicePlane,
+	    (kIORegistryIterateRecursively|kIORegistryIterateParents));
+	property = copyProperty(kZFSPoolNameKey, gIOServicePlane,
+	    (kIORegistryIterateRecursively|kIORegistryIterateParents));
+
+	if (!size || !property) {
+		dprintf("couldn't get pool name or size");
+		goto error;
+	}
+
+	nameString = OSDynamicCast(OSString, property);
+	if (!nameString) {
+		dprintf("missing pool name");
+		goto error;
+	}
+#if 0
+	/* Try hard to get the name string */
+	do {
+		nameString = OSDynamicCast(OSString, property);
+
+		if (nameString) nameString->retain();
+
+		if (!nameString) {
+			OSSymbol *nameSymbol;
+			nameSymbol = OSDynamicCast(OSSymbol, property);
+			if (!nameSymbol) {
+				dprintf("couldn't get name");
+				goto error;
+			}
+			nameString = OSString::withCString(
+			    nameSymbol->getCStringNoCopy());
+		}
+	} while (0);
+#endif
+
+	sizeNum = OSDynamicCast(OSNumber, size);
+	if (!sizeNum) {
+		dprintf("invalid size");
+		goto error;
+	}
+	_pool_bcount = sizeNum->unsigned64BitValue() / DEV_BSIZE;
+	sizeNum = 0;
+	size->release();
+	size = 0;
+
+	cstr = nameString->getCStringNoCopy();
+	if (!cstr || (plen = strlen(cstr) + 1) == 1) {
+		goto error;
+	}
+	pstring = (char *)IOMalloc(plen);
+	if (!pstring) {
+		goto error;
+	}
+	snprintf(pstring, plen, "%s", cstr);
+	productString = pstring;
+	pstring = 0;
+
+	if (IOBlockStorageDevice::start(provider) == false) {
+		dprintf("BlockStorageDevice start failed");
+		goto error;
+	}
+	started = true;
+
+	deviceDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyDeviceCharacteristicsKey));
+	if (deviceDict) {
+		/* Clone a new dictionary */
+		deviceDict = OSDictionary::withDictionary(deviceDict);
+		if (!deviceDict) {
+			dprintf("dict clone failed");
+			goto error;
+		}
+	}
+
+	if (!deviceDict) {
+		dprintf("creating new device dict");
+		deviceDict = OSDictionary::withCapacity(1);
+	}
+
+	if (!deviceDict) {
+		dprintf("missing device dict");
+		goto error;
+	}
+
+	deviceDict->setObject(kIOPropertyProductNameKey, nameString);
+	OSSafeReleaseNULL(nameString);
+
+	if (setProperty(kIOPropertyDeviceCharacteristicsKey,
+	    deviceDict) == false) {
+		dprintf("device dict setProperty failed");
+		goto error;
+	}
+	OSSafeReleaseNULL(deviceDict);
+
+	protocolDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyProtocolCharacteristicsKey));
+	if (protocolDict) {
+		/* Clone a new dictionary */
+		protocolDict = OSDictionary::withDictionary(protocolDict);
+		if (!protocolDict) {
+			dprintf("dict clone failed");
+			goto error;
+		}
+	}
+
+	if (!protocolDict) {
+		dprintf("creating new protocol dict");
+		protocolDict = OSDictionary::withCapacity(1);
+	}
+
+	if (!protocolDict) {
+		dprintf("missing protocol dict");
+		goto error;
+	}
+
+	virtualSymbol = OSSymbol::withCString(
+	    kIOPropertyPhysicalInterconnectTypeVirtual);
+	internalSymbol = OSSymbol::withCString(
+	    kIOPropertyInternalKey);
+	if (!virtualSymbol || !internalSymbol) {
+		dprintf("symbol alloc failed");
+		goto error;
+	}
+
+	protocolDict->setObject(kIOPropertyPhysicalInterconnectTypeKey,
+	    virtualSymbol);
+	protocolDict->setObject(kIOPropertyPhysicalInterconnectLocationKey,
+	    internalSymbol);
+
+	OSSafeReleaseNULL(virtualSymbol);
+	OSSafeReleaseNULL(internalSymbol);
+
+	if (setProperty(kIOPropertyProtocolCharacteristicsKey,
+	    protocolDict) == false) {
+		dprintf("protocol dict setProperty failed");
+		goto error;
+	}
+	OSSafeReleaseNULL(protocolDict);
+	registerService(kIOServiceAsynchronous);
+
+	return (true);
+
+error:
+	OSSafeReleaseNULL(size);
+	OSSafeReleaseNULL(property);
+	OSSafeReleaseNULL(deviceDict);
+	OSSafeReleaseNULL(protocolDict);
+	OSSafeReleaseNULL(nameString);
+	OSSafeReleaseNULL(virtualSymbol);
+	OSSafeReleaseNULL(internalSymbol);
+	if (pstring) IOFree(pstring, plen);
+	if (started) IOBlockStorageDevice::stop(provider);
+	return (false);
+}
+
+/* XXX IOBlockStorageDevice */
+IOReturn
+ZFSDatasetProxy::doSynchronizeCache(void)
+{
+	DPRINTF_FUNC();
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSDatasetProxy::doAsyncReadWrite(IOMemoryDescriptor *buffer,
+    UInt64 block, UInt64 nblks,
+    IOStorageAttributes *attributes,
+    IOStorageCompletion *completion)
+{
+	char zero[ZFS_PROXY_DEV_BSIZE];
+	size_t len, cur, off = 0;
+
+	DPRINTF_FUNC();
+
+	if (!buffer) {
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/* Read vs. write */
+	if (buffer->getDirection() == kIODirectionIn) {
+		/* Zero the read buffer */
+		bzero(zero, ZFS_PROXY_DEV_BSIZE);
+		len = buffer->getLength();
+		while (len > 0) {
+			cur = (len > ZFS_PROXY_DEV_BSIZE ?
+			    ZFS_PROXY_DEV_BSIZE : len);
+			buffer->writeBytes(/* offset */ off,
+			    /* buf */ zero, /* length */ cur);
+			off += cur;
+			len -= cur;
+		}
+		// dprintf("%s: read: %llu %llu",
+		//    __func__, block, nblks);
+		IOStorage::complete(completion, kIOReturnSuccess,
+			    buffer->getLength());
+		return (kIOReturnSuccess);
+	}
+
+	if (buffer->getDirection() != kIODirectionOut) {
+		dprintf("invalid direction %d", buffer->getDirection());
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/*
+	 * XXX For now this just returns error for all writes.
+	 * If it turns out that mountroot/bdevvp try to
+	 * verify writable status by reading a block and writing
+	 * it back to disk, lie and say it succeeded.
+	 */
+	dprintf("write: %llu %llu", block, nblks);
+	IOStorage::complete(completion, kIOReturnError, 0);
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSDatasetProxy::doEjectMedia()
+{
+	DPRINTF_FUNC();
+	/* XXX Called at shutdown, maybe return success? */
+	return (kIOReturnError);
+}
+
+IOReturn
+ZFSDatasetProxy::doFormatMedia(UInt64 byteCapacity)
+{
+	DPRINTF_FUNC();
+	/* XXX shouldn't need it */
+	return (kIOReturnError);
+	// return (kIOReturnSuccess);
+}
+
+UInt32
+ZFSDatasetProxy::doGetFormatCapacities(UInt64 *capacities,
+    UInt32 capacitiesMaxCount) const
+{
+	DPRINTF_FUNC();
+	if (capacities && capacitiesMaxCount > 0) {
+		capacities[0] = (ZFS_PROXY_DEV_BSIZE * ZFS_PROXY_DEV_BCOUNT);
+		dprintf("capacity %llu", capacities[0]);
+	}
+
+	/* Always inform caller of capacity count */
+	return (1);
+}
+
+/* Returns full pool name from instance private var */
+char *
+ZFSDatasetProxy::getProductString()
+{
+	if (productString) dprintf("[%s]", productString);
+	/* Return class private string */
+	return ((char *)productString);
+}
+
+/* Returns readonly status from instance private var */
+IOReturn
+ZFSDatasetProxy::reportWriteProtection(bool *isWriteProtected)
+{
+	DPRINTF_FUNC();
+	if (isWriteProtected) *isWriteProtected = isReadOnly;
+	return (kIOReturnSuccess);
+}
+
+/* These return class static string for all instances */
+char *
+ZFSDatasetProxy::getVendorString()
+{
+	dprintf("[%s]", vendorString);
+	/* Return class static string */
+	return ((char *)vendorString);
+}
+char *
+ZFSDatasetProxy::getRevisionString()
+{
+	dprintf("[%s]", revisionString);
+	/* Return class static string */
+	return ((char *)revisionString);
+}
+char *
+ZFSDatasetProxy::getAdditionalDeviceInfoString()
+{
+	dprintf("[%s]", infoString);
+	/* Return class static string */
+	return ((char *)infoString);
+}
+
+/* Always return media present and unchanged */
+IOReturn
+ZFSDatasetProxy::reportMediaState(bool *mediaPresent,
+    bool *changedState)
+{
+	DPRINTF_FUNC();
+	if (mediaPresent) *mediaPresent = true;
+	if (changedState) *changedState = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report nonremovable and nonejectable */
+IOReturn
+ZFSDatasetProxy::reportRemovability(bool *isRemoveable)
+{
+	DPRINTF_FUNC();
+	if (isRemoveable) *isRemoveable = false;
+	return (kIOReturnSuccess);
+}
+IOReturn
+ZFSDatasetProxy::reportEjectability(bool *isEjectable)
+{
+	DPRINTF_FUNC();
+	if (isEjectable) *isEjectable = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report 512b blocksize */
+IOReturn
+ZFSDatasetProxy::reportBlockSize(UInt64 *blockSize)
+{
+	DPRINTF_FUNC();
+	if (!blockSize)
+		return (kIOReturnError);
+
+	*blockSize = ZFS_PROXY_DEV_BSIZE;
+	return (kIOReturnSuccess);
+}
+
+/* XXX Calculate from dev_bcount, should get size from objset */
+/* XXX Can issue message kIOMessageMediaParametersHaveChanged to update */
+IOReturn
+ZFSDatasetProxy::reportMaxValidBlock(UInt64 *maxBlock)
+{
+	DPRINTF_FUNC();
+	if (!maxBlock)
+		return (kIOReturnError);
+
+	// *maxBlock = 0;
+	// *maxBlock = ZFS_PROXY_DEV_BCOUNT - 1;
+	*maxBlock = _pool_bcount - 1;
+	dprintf("maxBlock %llu", *maxBlock);
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSDatasetProxy::getWriteCacheState(bool *enabled)
+{
+	dprintf("getCacheState\n");
+	if (enabled) *enabled = true;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSDatasetProxy::setWriteCacheState(bool enabled)
+{
+	dprintf("setWriteCache\n");
+	return (kIOReturnSuccess);
+}
diff --git a/module/os/macos/zfs/ZFSDatasetScheme.cpp b/module/os/macos/zfs/ZFSDatasetScheme.cpp
new file mode 100644
index 0000000000..6bba6e3e58
--- /dev/null
+++ b/module/os/macos/zfs/ZFSDatasetScheme.cpp
@@ -0,0 +1,1121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ * Copyright (c) 2017, Jorgen Lundman.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <libkern/libkern.h>
+#include <sys/ZFSDatasetScheme.h>
+#include <sys/ZFSDatasetProxy.h>
+#include <sys/ZFSDataset.h>
+#include <sys/ZFSPool.h>
+#include <sys/spa_impl.h>
+#include <IOKit/storage/IOBlockStorageDriver.h>
+
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#ifdef	dprintf
+#undef	dprintf
+#endif
+#define	dprintf(fmt, ...) do {	\
+	IOLog("ZFSDatasetScheme %s " fmt "\n", __func__, ##__VA_ARGS__);\
+_NOTE(CONSTCOND) } while (0)
+#else
+#ifndef dprintf
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif
+#endif /* if DEBUG or ZFS_DEBUG */
+
+static ZFSDatasetScheme *
+zfs_osx_proxy_scheme_by_osname(const char *osname)
+{
+	ZFSDatasetScheme *scheme = NULL;
+	OSDictionary *matching;
+	OSObject *object;
+	OSString *str;
+	OSIterator *iter;
+	char *pool_name, *slash;
+	size_t len;
+
+	slash = strchr(osname, '/');
+	if (slash) {
+		len = (slash - osname) + 1;
+	} else {
+		len = strlen(osname) + 1;
+	}
+
+	pool_name = (char *)kmem_alloc(len, KM_SLEEP);
+	if (!pool_name) {
+		dprintf("string alloc failed");
+		return (NULL);
+	}
+	snprintf(pool_name, len, "%s", osname);
+	dprintf("pool_name [%s] from %s", pool_name, osname);
+
+	matching = IOService::serviceMatching(kZFSDatasetSchemeClass);
+	if (!matching) {
+		dprintf("couldn't get match dict");
+		kmem_free(pool_name, len);
+		return (NULL);
+	}
+
+	/* Add the pool name for exact match */
+	str = OSString::withCString(pool_name);
+	matching->setObject(kZFSPoolNameKey, str);
+	OSSafeReleaseNULL(str);
+
+	object = IOService::copyMatchingService(matching);
+
+	if (object && (scheme = OSDynamicCast(ZFSDatasetScheme,
+	    object)) == NULL) {
+		object->release();
+	}
+	object = NULL;
+
+	if (scheme && ((str = OSDynamicCast(OSString,
+	    scheme->getProperty(kZFSPoolNameKey))) == NULL ||
+	    str->isEqualTo(pool_name) == false)) {
+		scheme->release();
+		scheme = NULL;
+	}
+
+	if (!scheme) {
+		int i;
+		for (i = 0; i < 12; i++) { // up to 6s
+			iter = IOService::getMatchingServices(matching);
+			if (iter) break;
+			IOSleep(500);
+		}
+
+		if (i) dprintf("%s: tried %d times\n", __func__, i);
+
+		if (!iter) {
+			dprintf("couldn't get iterator");
+			kmem_free(pool_name, len);
+			OSSafeReleaseNULL(matching);
+			return (NULL);
+		}
+
+		while ((object = iter->getNextObject())) {
+			if (iter->isValid() == false) {
+				iter->reset();
+				continue;
+			}
+			scheme = OSDynamicCast(ZFSDatasetScheme, object);
+			if (!scheme) continue;
+
+			object = scheme->getProperty(kZFSPoolNameKey,
+			    gIOServicePlane, kIORegistryIterateParents |
+			    kIORegistryIterateRecursively);
+			if (!object) continue;
+
+			str = OSDynamicCast(OSString, object);
+			if (!str) continue;
+
+			if (str->isEqualTo(pool_name)) break;
+
+			str = NULL;
+			object = NULL;
+			scheme = NULL;
+		}
+
+		if (scheme) scheme->retain();
+		OSSafeReleaseNULL(iter);
+	}
+	OSSafeReleaseNULL(matching);
+	kmem_free(pool_name, len);
+	pool_name = 0;
+
+	if (scheme == NULL) {
+		dprintf("no matching pool proxy");
+	}
+	return (scheme);
+
+#if 0
+	spa_t *spa;
+	ZFSPool *pool = 0;
+
+	if (!osname || osname[0] == '\0') {
+		dprintf("missing dataset argument");
+		return (EINVAL);
+	}
+
+	/* Lookup the pool spa */
+	mutex_enter(&spa_namespace_lock);
+	spa = spa_lookup(osname);
+	if (spa && spa->spa_iokit_proxy) {
+		pool = spa->spa_iokit_proxy->proxy;
+		if (pool) pool->retain();
+	}
+	mutex_exit(&spa_namespace_lock);
+
+	/* Need a pool proxy to attach to */
+	if (!pool) {
+		dprintf("couldn't get pool proxy");
+		return (EINVAL);
+	}
+	return (0);
+#endif
+}
+
+/*
+ * Get the proxy device by matching a property name and value.
+ *
+ * Inputs:
+ * property: const char string.
+ * value: const char string.
+ *
+ * Return:
+ * Pointer to proxy on success, NULL on error or missing.
+ */
+static ZFSDataset *
+zfs_osx_proxy_lookup(const char *property, OSObject *value)
+{
+	OSIterator *iter = NULL;
+	OSDictionary *matching = NULL;
+	OSObject *next = NULL, *prop = NULL;
+	ZFSDataset *dataset = NULL;
+
+	/* Validate arguments */
+	if (!property || !value || property[0] == '\0') {
+		dprintf("invalid argument");
+		return (NULL);
+	}
+
+	/*
+	 * Create the matching dictionary for class.
+	 * Add property and value to match dict.
+	 */
+	matching = IOService::serviceMatching(kZFSDatasetClassKey);
+	if ((matching) == NULL ||
+	    (matching->setObject(property, value) == false)) {
+		dprintf("match dictionary create failed");
+		OSSafeReleaseNULL(matching);
+		return (NULL);
+	}
+
+	/* Try to copy if there is only one match */
+	next = IOService::copyMatchingService(matching);
+	if (next != NULL && ((dataset = OSDynamicCast(ZFSDataset,
+	    next)) != NULL) &&
+	    (prop = dataset->getProperty(property)) != NULL &&
+	    (prop->isEqualTo(value))) {
+		dprintf("quick matched dataset");
+		OSSafeReleaseNULL(matching);
+		/* Leave retain taken by copyMatching */
+		return (dataset);
+	}
+	/* Unretained references */
+	prop = NULL;
+	dataset = NULL;
+	/* If set, it was retained by copyMatchingService */
+	OSSafeReleaseNULL(next);
+
+	iter = IOService::getMatchingServices(matching);
+	OSSafeReleaseNULL(matching);
+	if (iter == NULL) {
+		dprintf("iterator failed");
+		return (NULL);
+	}
+
+	while ((next = iter->getNextObject())) {
+		dataset = OSDynamicCast(ZFSDataset, next);
+		if (!dataset) continue;
+
+		if ((prop = dataset->getProperty(property)) == NULL) {
+			dataset = NULL;
+			continue;
+		}
+
+		if (prop->isEqualTo(value)) {
+			/* Take a reference on the match */
+			dprintf("found match");
+			dataset->retain();
+			prop = NULL;
+			break;
+		}
+
+		prop = NULL;
+		dataset = NULL;
+	}
+	/* Release iterator */
+	OSSafeReleaseNULL(iter);
+
+	/* Leave retain */
+	return (dataset);
+#if 0
+	/*
+	 * Copy (first) matching service.
+	 * Cast service to proxy class.
+	 */
+	if ((service = IOService::copyMatchingService(matching)) == NULL ||
+	    (dataset = OSDynamicCast(ZFSDataset, service)) == NULL) {
+		dprintf("matching failed");
+		OSSafeReleaseNULL(service);
+		return (NULL);
+	}
+
+	/* Leave retain from copyMatchingService */
+	return (dataset);
+#endif
+}
+
+/*
+ * Get the proxy device for a given dataset name.
+ *
+ * Input:
+ * osname: dataset name e.g. pool/dataset
+ *
+ * Return:
+ * Valid ZFSDataset service, or NULL on error or missing.
+ */
+ZFSDataset *
+zfs_osx_proxy_get(const char *osname)
+{
+	ZFSDataset *dataset;
+	OSString *osstr;
+
+	/* Validate arguments, osname is limited to MAXNAMELEN */
+	if (!osname || osname[0] == '\0' || osname[0] == '/' ||
+	    strnlen(osname, MAXNAMELEN+1) == (MAXNAMELEN+1)) {
+		dprintf("invalid argument");
+		return (NULL);
+	}
+
+	osstr = OSString::withCString(osname);
+	if (!osstr) {
+		dprintf("string alloc failed");
+		return (NULL);
+	}
+
+	dataset = zfs_osx_proxy_lookup(kZFSDatasetNameKey, osstr);
+	OSSafeReleaseNULL(osstr);
+
+	if (!dataset) {
+		dprintf("lookup failed");
+		return (NULL);
+	}
+
+	return (dataset);
+}
+
+/*
+ * Get the proxy device for a given a device name or path.
+ *
+ * Input:
+ * devpath: BSD name as const char* string, e.g. "/dev/diskN" or "diskN"
+ *  must be null-terminated
+ *
+ * Return:
+ * Valid ZFSDataset service, or NULL on error or missing.
+ */
+static ZFSDataset *
+zfs_osx_proxy_from_devpath(const char *devpath)
+{
+	/* XXX No need to init, will be assigned */
+	ZFSDataset *dataset;
+	OSString *bsdstr;
+	const char *bsdname;
+
+	/* Validate arguments, devpath is limited to MAXPATHLEN */
+	if (!devpath || devpath[0] == '\0' ||
+	    strnlen(devpath, MAXPATHLEN+1) == (MAXPATHLEN+1)) {
+		dprintf("invalid argument");
+		return (NULL);
+	}
+
+	/* If we have a path, remove prefix */
+	if (strncmp(devpath, "/dev/", 5) == 0) {
+		bsdname = devpath + 5;
+	} else {
+		bsdname = devpath;
+	}
+
+	/* Make sure we have (at least) "diskN" at this point */
+	if (strncmp(bsdname, "disk", 4) != 0 || bsdname[4] == '\0') {
+		dprintf("invalid bsdname %s from %s", bsdname, devpath);
+		return (NULL);
+	}
+
+	bsdstr = OSString::withCString(bsdname);
+	if (!bsdstr) {
+		dprintf("string alloc failed");
+		return (NULL);
+	}
+
+	dataset = zfs_osx_proxy_lookup(kIOBSDNameKey, bsdstr);
+	OSSafeReleaseNULL(bsdstr);
+
+	if (!dataset) {
+		dprintf("lookup with %s failed", bsdname);
+		return (NULL);
+	}
+
+	return (dataset);
+}
+
+/*
+ * Given a dataset, get the desired property and write its
+ * value to the caller-supplied buffer.
+ *
+ * Inputs:
+ * dataset: valid ZFSDataset object, should be retained by
+ * caller.
+ * property: const char* of the desired property name key.
+ * value: char* buffer which should be at least 'len' bytes.
+ * len: length of value buffer.
+ *
+ * Return:
+ * 0 on success, positive int on error.
+ */
+static int
+zfs_osx_proxy_get_prop_string(ZFSDataset *dataset,
+    const char *property, char *value, int len)
+{
+	OSObject *obj;
+	OSString *valueString;
+
+	/* Validate arguments */
+	if (!dataset || !property || !value || len == 0) {
+		dprintf("invalid argument");
+		return (EINVAL);
+	}
+
+	/* Lock proxy while getting property */
+	dataset->lockForArbitration();
+	obj = dataset->copyProperty(property);
+	dataset->unlockForArbitration();
+
+	if (!obj) {
+		dprintf("no property %s", property);
+		return (ENXIO);
+	}
+
+	valueString = OSDynamicCast(OSString, obj);
+	/* Validate property value */
+	if (!valueString) {
+		dprintf("couldn't cast value for %s", property);
+		OSSafeReleaseNULL(obj);
+		return (ENXIO);
+	}
+
+	/* Write up to len bytes */
+	snprintf(value, len, "%s", valueString->getCStringNoCopy());
+
+	/* Release string and proxy */
+	valueString = 0;
+	OSSafeReleaseNULL(obj);
+
+	return (0);
+}
+
+extern "C" {
+
+/*
+ * Given a ZFS dataset name, get the proxy device and write the
+ * BSD Name to the caller-supplied buffer.
+ *
+ * Inputs:
+ * osname: dataset name as char* string, e.g. "pool/dataset"
+ *  must be null-terminated
+ * bsdname: char* string buffer where bsdname will be written
+ * len: length of bsdname buffer
+ *
+ * Return:
+ * 0 on success, positive int errno on failure.
+ */
+int
+zfs_osx_proxy_get_bsdname(const char *osname,
+    char *bsdname, int len)
+{
+	/* XXX No need to init, will be assigned */
+	ZFSDataset *dataset;
+	int ret;
+
+	/* Validate arguments */
+	if (!osname || !bsdname || len == 0) {
+		dprintf("invalid argument");
+		return (EINVAL);
+	}
+
+	/* Get dataset proxy (takes a retain) */
+	dataset = zfs_osx_proxy_get(osname);
+	if (!dataset) {
+		dprintf("no proxy matching %s", osname);
+		return (ENOENT);
+	}
+
+	/* Get BSD name property and write to bsdname buffer */
+	ret = zfs_osx_proxy_get_prop_string(dataset,
+	    kIOBSDNameKey, bsdname, len);
+	OSSafeReleaseNULL(dataset);
+
+	if (ret != 0) {
+		dprintf("ret %d", ret);
+	}
+
+	return (ret);
+}
+
+/*
+ * Given a device name or path, get the proxy device and write the
+ * ZFS Dataset name to the caller-supplied buffer.
+ *
+ * Inputs:
+ * devpath: BSD name as const char* string, e.g. "/dev/diskN" or "diskN"
+ *  must be null-terminated
+ * osname: char* string buffer where osname will be written
+ * len: length of osname buffer
+ *
+ * Return:
+ * 0 on success, positive int errno on failure.
+ */
+int
+zfs_osx_proxy_get_osname(const char *devpath, char *osname, int len)
+{
+	/* XXX No need to init, will be assigned */
+	ZFSDataset *dataset;
+	int ret;
+
+	/* Validate arguments */
+	if (!devpath || !osname || len == 0) {
+		dprintf("invalid argument");
+		return (EINVAL);
+	}
+
+	/* Get dataset proxy (takes a retain) */
+	dataset = zfs_osx_proxy_from_devpath(devpath);
+	if (!dataset) {
+		dprintf("no proxy matching %s", devpath);
+		return (ENOENT);
+	}
+
+	/* Get dataset name property and write to osname buffer */
+	ret = zfs_osx_proxy_get_prop_string(dataset,
+	    kZFSDatasetNameKey, osname, len);
+	OSSafeReleaseNULL(dataset);
+
+	if (ret != 0) {
+		dprintf("ret %d", ret);
+	}
+
+	return (ret);
+}
+
+/*
+ * Check if a dataset has a proxy device.
+ *
+ * Input:
+ * osname: dataset name e.g. pool/dataset
+ *
+ * Return:
+ * 1 if exists, 0 on error or missing.
+ */
+int
+zfs_osx_proxy_exists(const char *osname)
+{
+	ZFSDataset *dataset;
+
+	/* Get dataset proxy (takes a retain) */
+	if ((dataset = zfs_osx_proxy_get(osname)) != NULL) {
+		OSSafeReleaseNULL(dataset);
+		return (1);
+	}
+
+	return (0);
+}
+
+/*
+ * Remove the proxy device for a given dataset name.
+ *
+ * Input:
+ * osname: dataset name e.g. pool/dataset
+ */
+void
+zfs_osx_proxy_remove(const char *osname)
+{
+	ZFSDataset *dataset;
+	ZFSDatasetScheme *provider;
+
+	/* Get dataset proxy (takes a retain) */
+	dataset = zfs_osx_proxy_get(osname);
+	if (dataset == NULL) {
+		dprintf("couldn't get dataset");
+		return;
+	}
+#if 0
+	/* Terminate and release retain */
+	dataset->terminate(kIOServiceSynchronous | kIOServiceRequired);
+	OSSafeReleaseNULL(dataset);
+#endif
+	provider = OSDynamicCast(ZFSDatasetScheme,
+	    dataset->getProvider());
+	if (!provider) {
+		dprintf("invalid provider");
+		return;
+	}
+
+	OSSafeReleaseNULL(dataset);
+	dprintf("removing %s", osname);
+	provider->removeDataset(osname, /* force */ true);
+}
+
+/*
+ * Create a proxy device for a given dataset name, unless one exists.
+ *
+ * Input:
+ * osname: dataset name e.g. pool/dataset
+ *
+ * Return:
+ * 0 on success, or positive int on error.
+ */
+int
+zfs_osx_proxy_create(const char *osname)
+{
+	ZFSDatasetScheme *provider = NULL;
+
+	if (!osname || osname[0] == '\0') {
+		dprintf("missing dataset argument");
+		return (EINVAL);
+	}
+
+	provider = zfs_osx_proxy_scheme_by_osname(osname);
+	if (provider == NULL) {
+		dprintf("can't get pool proxy");
+		return (ENOENT);
+	}
+
+	if (provider->addDataset(osname) == false) {
+		dprintf("couldn't add dataset");
+		provider->release();
+		return (ENXIO);
+	}
+
+	provider->release();
+	return (0);
+}
+
+} /* extern "C" */
+
+static SInt32
+orderHoles(const OSMetaClassBase *obj1, const OSMetaClassBase *obj2,
+    __unused void *context)
+{
+	OSNumber *num1, *num2;
+
+	if (obj1 == NULL ||
+	    (num1 = OSDynamicCast(OSNumber, obj1)) == NULL) {
+		/* Push invalid OSNumbers to end of list */
+		return (-1);
+	}
+	if (obj2 == NULL ||
+	    (num2 = OSDynamicCast(OSNumber, obj2)) == NULL) {
+		/* If both are non-OSNumber, same ordering */
+		if (num1 == NULL)
+			return (0);
+		/* If num1 is a valid OSNumber, push num2 to end */
+		return (1);
+	}
+
+	/*
+	 * A comparison result of the object:
+	 * <ul>
+	 *   <li>a negative value if obj2 should precede obj1,</li>
+	 *   <li>a positive value if obj1 should precede obj2,</li>
+	 *   <li>and 0 if obj1 and obj2 have an equivalent ordering.</li>
+	 * </ul>
+	 */
+	if (num1->isEqualTo(num2))
+		return (0);
+
+	if (num1->unsigned32BitValue() < num2->unsigned32BitValue()) {
+		return (1);
+	} else {
+		return (-1);
+	}
+}
+
+OSDefineMetaClassAndStructors(ZFSDatasetScheme, IOPartitionScheme);
+
+void
+ZFSDatasetScheme::free()
+{
+	OSSafeReleaseNULL(_datasets);
+	OSSafeReleaseNULL(_holes);
+	_max_id = 0;
+
+	IOPartitionScheme::free();
+}
+
+bool
+ZFSDatasetScheme::init(OSDictionary *properties)
+{
+	_datasets = OSSet::withCapacity(1);
+	_holes = OSOrderedSet::withCapacity(1, orderHoles);
+	_max_id = 0;
+
+	if (!_datasets || !_holes) {
+		dprintf("OSSet allocation failed");
+		OSSafeReleaseNULL(_datasets);
+		OSSafeReleaseNULL(_holes);
+		return (false);
+	}
+
+	OSDictionary *newProps = NULL;
+	if (properties) newProps = OSDictionary::withDictionary(properties);
+	if (!newProps) newProps = OSDictionary::withCapacity(2);
+	OSString *str;
+	str = OSString::withCString("IOGUIDPartitionScheme");
+	newProps->setObject("IOClass", str);
+	OSSafeReleaseNULL(str);
+	str = OSString::withCString("GUID_partition_scheme");
+	newProps->setObject("Content Mask", str);
+	OSSafeReleaseNULL(str);
+
+	if (IOPartitionScheme::init(newProps) == false) {
+		dprintf("IOPartitionScheme init failed");
+		OSSafeReleaseNULL(newProps);
+		OSSafeReleaseNULL(_datasets);
+		OSSafeReleaseNULL(_holes);
+		return (false);
+	}
+	OSSafeReleaseNULL(newProps);
+
+	return (true);
+}
+
+bool
+ZFSDatasetScheme::start(IOService *provider)
+{
+	OSObject *pool_name;
+
+	if (IOPartitionScheme::start(provider) == false) {
+		dprintf("IOPartitionScheme start failed");
+		return (false);
+	}
+
+	pool_name = getProperty(kZFSPoolNameKey,
+	    gIOServicePlane, kIORegistryIterateRecursively|
+	    kIORegistryIterateParents);
+	if (pool_name) {
+		setProperty(kZFSPoolNameKey, pool_name);
+	}
+
+	// registerService(kIOServiceAsynchronous);
+	registerService(kIOServiceSynchronous);
+
+	return (true);
+}
+
+IOService *
+ZFSDatasetScheme::probe(IOService *provider, SInt32 *score)
+{
+	OSObject *property;
+	IOService *parent;
+
+	/* First ask IOPartitionScheme to probe */
+	if (IOPartitionScheme::probe(provider, score) == 0) {
+		dprintf("IOPartitionScheme probe failed");
+		return (0);
+	}
+
+	/* Check for ZFS Pool Name first */
+	property = getProperty(kZFSPoolNameKey, gIOServicePlane,
+	    kIORegistryIterateRecursively|kIORegistryIterateParents);
+	if (!property) {
+		dprintf("no pool name");
+		return (0);
+	}
+
+	/* Make sure we have a target, and valid provider below */
+	if (provider == NULL ||
+	    OSDynamicCast(IOMedia, provider) == NULL ||
+	    (parent = provider->getProvider()) == NULL) {
+		dprintf("invalid provider");
+		return (0);
+	}
+
+	/* Make sure provider is driver, and has valid provider below */
+	if (OSDynamicCast(IOBlockStorageDriver, parent) == NULL ||
+	    (parent = parent->getProvider()) == NULL) {
+		dprintf("invalid parent");
+		return (0);
+	}
+
+	/* Make sure the parent provider is a proxy */
+	if (OSDynamicCast(ZFSDatasetProxy, parent) == NULL) {
+		dprintf("invalid grandparent");
+		return (0);
+	}
+
+	/* Successful match */
+	dprintf("Match");
+	// *score = 5000;
+	return (this);
+}
+
+uint32_t
+ZFSDatasetScheme::getNextPartitionID()
+{
+	uint32_t ret_id = 0ULL;
+
+	/* Try to lock, unless service is terminated */
+	if (lockForArbitration(false) == false) {
+		dprintf("service is terminated");
+		return (0ULL);
+	}
+
+	/* If the partiton list is sparse (has holes) */
+	if (_holes->getCount() != 0) {
+		OSNumber *id_num = OSDynamicCast(OSNumber,
+		    _holes->getFirstObject());
+
+		/* Just in case the list is invalid */
+#ifdef DEBUG
+		if (!id_num) panic("invalid hole list");
+#endif
+
+		if (id_num) {
+			id_num->retain();
+			_holes->removeObject(id_num);
+			ret_id = id_num->unsigned32BitValue();
+			OSSafeReleaseNULL(id_num);
+			goto out;
+		}
+	}
+
+	/* If no holes were found, just get next id */
+	ret_id = (_max_id += 1);
+
+out:
+	unlockForArbitration();
+	return (ret_id);
+}
+
+void ZFSDatasetScheme::returnPartitionID(uint32_t part_id)
+{
+	OSNumber *id_num = OSNumber::withNumber(part_id, 32);
+
+	if (!id_num) dprintf("alloc failed");
+	/* XXX Continue and try to decrement max_id if possible */
+
+	if (lockForArbitration(false) == false) {
+		dprintf("service is terminated");
+		OSSafeReleaseNULL(id_num);
+		return;
+	}
+
+	/* Decrementing highest part id */
+	if (part_id == _max_id) {
+		/* First, decrement max */
+		_max_id--;
+		/* no longer needed */
+		OSSafeReleaseNULL(id_num);
+
+		/* Now iterate down the hole list */
+		while ((id_num = OSDynamicCast(OSNumber,
+		    _holes->getLastObject()))) {
+			/* Only need to remove consecutive matches */
+			if (id_num->unsigned32BitValue() != (_max_id)) {
+				break;
+			}
+
+			/* Remove this num from hole list */
+			id_num->retain();
+			_holes->removeObject(id_num);
+			OSSafeReleaseNULL(id_num);
+			/* Decrement max */
+			_max_id--;
+		}
+	/* Creating a new 'hole' in the ID namespace */
+	} else {
+		/* Better have been able to allocate OSNum */
+		if (!id_num) {
+			unlockForArbitration();
+#ifdef DEBUG
+			panic("ZFSDatasetScheme %s failed to return partID",
+			    __func__);
+#endif
+			return;
+		}
+
+		/*
+		 * OSOrderedSet only enforces ordering when
+		 * using setObject(anObject) interface.
+		 * Therefore _holes must not use setFirstObject,
+		 * setLastObject, setObject(index, anObject)
+		 */
+
+		/* Add a new OSNum to hole list */
+		_holes->setObject(id_num);
+		OSSafeReleaseNULL(id_num);
+	}
+
+	unlockForArbitration();
+}
+
+bool
+ZFSDatasetScheme::addDataset(const char *osname)
+{
+	ZFSDataset *dataset;
+	OSObject *obj;
+	OSNumber *sizeNum;
+	char location[24];
+	uint64_t size;
+	uint32_t part_id;
+
+	obj = copyProperty(kZFSPoolSizeKey, gIOServicePlane,
+	    kIORegistryIterateRecursively|kIORegistryIterateParents);
+	if (!obj) {
+		dprintf("missing pool size");
+		return (false);
+	}
+	sizeNum = OSDynamicCast(OSNumber, obj);
+	if (!sizeNum) {
+		dprintf("invalid pool size");
+		return (false);
+	}
+	size = sizeNum->unsigned64BitValue();
+	sizeNum = 0;
+	OSSafeReleaseNULL(obj);
+
+	part_id = getNextPartitionID();
+	/* Only using non-zero partition ids */
+	if (part_id == 0) {
+		dprintf("invalid partition ID");
+		return (false);
+	}
+	snprintf(location, sizeof (location), "%u", part_id);
+
+#if 0
+	OSString *locationStr;
+	locationStr = OSString::withCString(location);
+	if (!locationStr) {
+		dprintf("location string alloc failed");
+		return (false);
+	}
+	OSSafeReleaseNULL(locationStr);
+#endif
+
+	dataset = ZFSDataset::withDatasetNameAndSize(osname, size);
+	if (!dataset) {
+		dprintf("couldn't add %s", osname);
+		return (false);
+	}
+
+	/* Set location in plane and partiton ID property */
+	dataset->setLocation(location);
+#ifdef kIOMediaBaseKey
+	dataset->setProperty(kIOMediaBaseKey, 0ULL, 64);
+#endif
+	dataset->setProperty(kIOMediaPartitionIDKey, part_id, 32);
+
+	// This sets the "diskutil list -> TYPE" field
+	dataset->setProperty("Content", "ZFS Dataset");
+	// This matches with Info.plist, so it calls zfs.util for NAME
+	dataset->setProperty("Content Hint",
+	    "6A898CC3-1DD2-11B2-99A6-080020736631");
+
+	if (dataset->attach(this) == false) {
+		dprintf("attach failed");
+		OSSafeReleaseNULL(dataset);
+		return (false);
+	}
+
+	if (dataset->start(this) == false) {
+		dprintf("start failed");
+		dataset->detach(this);
+		OSSafeReleaseNULL(dataset);
+		return (false);
+	}
+
+	/* Protect the OSSet by taking IOService lock */
+	lockForArbitration();
+	_datasets->setObject(dataset);
+	unlockForArbitration();
+
+	// dataset->registerService(kIOServiceAsynchronous);
+	dataset->registerService(kIOServiceSynchronous);
+
+	/* Adding to OSSet takes a retain */
+	OSSafeReleaseNULL(dataset);
+
+	return (true);
+}
+
+bool
+ZFSDatasetScheme::removeDataset(const char *osname, bool force)
+{
+	OSCollectionIterator *iter;
+	ZFSDataset *dataset = NULL;
+	OSNumber *partNum;
+	uint32_t part_id = 0;
+	bool locked;
+
+	if ((locked = lockForArbitration(false)) == false) {
+		dprintf("couldn't lock terminated service");
+	}
+
+	iter = OSCollectionIterator::withCollection(_datasets);
+	if (!iter) {
+		dprintf("couldn't get dataset iterator");
+		return (false);
+	}
+
+	while ((dataset = OSDynamicCast(ZFSDataset,
+	    iter->getNextObject())) != NULL) {
+		OSObject *property;
+		OSString *str;
+
+		property = dataset->getProperty(kZFSDatasetNameKey);
+		if (!property) continue;
+
+		str = OSDynamicCast(OSString, property);
+		if (!str) continue;
+
+		if (str->isEqualTo(osname)) {
+			_datasets->removeObject(dataset);
+			break;
+		}
+	}
+
+	if (!dataset) {
+		dprintf("couldn't get dataset");
+		iter->release();
+		return (false);
+	}
+
+	dataset->retain();
+	iter->release();
+	iter = 0;
+
+	if (locked) unlockForArbitration();
+
+	partNum = OSDynamicCast(OSNumber,
+	    dataset->getProperty(kIOMediaPartitionIDKey));
+	if (!partNum) {
+		dprintf("couldn't get partition number");
+	} else {
+		part_id = partNum->unsigned32BitValue();
+	}
+
+	if (force) {
+		dataset->terminate(kIOServiceSynchronous|
+		    kIOServiceRequired);
+	} else {
+		dataset->terminate(kIOServiceSynchronous);
+	}
+
+	dataset->release();
+	dataset = 0;
+
+	/* Only return non-zero partition ids */
+	if (part_id != 0) {
+		dprintf("terminated partition %u", part_id);
+		returnPartitionID(part_id);
+	}
+
+	return (true);
+}
+
+/* Compatibility shims */
+void
+ZFSDatasetScheme::read(IOService *client,
+    UInt64		byteStart,
+    IOMemoryDescriptor	*buffer,
+    IOStorageAttributes	*attributes,
+    IOStorageCompletion	*completion)
+{
+	IOStorage::complete(completion, kIOReturnError, 0);
+}
+
+void
+ZFSDatasetScheme::write(IOService *client,
+    UInt64		byteStart,
+    IOMemoryDescriptor	*buffer,
+    IOStorageAttributes	*attributes,
+    IOStorageCompletion	*completion)
+{
+	IOStorage::complete(completion, kIOReturnError, 0);
+}
+
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+IOReturn
+ZFSDatasetScheme::synchronize(IOService *client,
+    UInt64			byteStart,
+    UInt64			byteCount,
+    IOStorageSynchronizeOptions	options)
+#else
+IOReturn
+ZFSDatasetScheme::synchronizeCache(IOService *client)
+#endif
+{
+	return (kIOReturnUnsupported);
+}
+
+IOReturn
+ZFSDatasetScheme::unmap(IOService *client,
+    IOStorageExtent		*extents,
+    UInt32			extentsCount,
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	    IOStorageUnmapOptions	options)
+#else
+	    UInt32	options)
+#endif
+{
+	return (kIOReturnUnsupported);
+}
+
+bool
+ZFSDatasetScheme::lockPhysicalExtents(IOService *client)
+{
+	return (false);
+}
+
+IOStorage *
+ZFSDatasetScheme::copyPhysicalExtent(IOService *client,
+    UInt64 *    byteStart,
+    UInt64 *    byteCount)
+{
+	return (NULL);
+}
+
+void
+ZFSDatasetScheme::unlockPhysicalExtents(IOService *client)
+{
+}
+
+#if defined(MAC_OS_X_VERSION_10_10) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_10)
+IOReturn
+ZFSDatasetScheme::setPriority(IOService *client,
+    IOStorageExtent	*extents,
+    UInt32		extentsCount,
+    IOStoragePriority	priority)
+{
+	return (kIOReturnUnsupported);
+}
+#endif
diff --git a/module/os/macos/zfs/ZFSPool.cpp b/module/os/macos/zfs/ZFSPool.cpp
new file mode 100644
index 0000000000..9d62b204f6
--- /dev/null
+++ b/module/os/macos/zfs/ZFSPool.cpp
@@ -0,0 +1,881 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Evan Susarret.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/IOService.h>
+
+extern "C" {
+#include <sys/spa_impl.h>
+#include <sys/spa_boot.h>
+#include <sys/spa.h>
+} /* extern "C" */
+
+#include <sys/ZFSPool.h>
+
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#ifdef	dprintf
+#undef	dprintf
+#endif
+#define	dprintf(fmt, ...) do {						\
+	printf("ZFSPool %s " fmt "\n", __func__, ##__VA_ARGS__);	\
+_NOTE(CONSTCOND) } while (0)
+#else
+#ifndef dprintf
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif
+#endif /* if DEBUG or ZFS_DEBUG */
+
+#define	DPRINTF_FUNC()	do { dprintf("%s\n", __func__); } while (0);
+
+#if 0
+/* block size is 512 B, count is 512 M blocks */
+#define	ZFS_POOL_DEV_BSIZE	(UInt64)(1<<9)
+#define	ZFS_POOL_DEV_BCOUNT	(UInt64)(2<<29)
+#endif
+
+/*
+ * Returns handle to ZFS IOService, with a retain count.
+ */
+static IOService *
+copy_zfs_handle()
+{
+	/* Get the ZFS service handle the 'hard way' */
+	OSDictionary *matching;
+	IOService *service = 0;
+
+	matching = IOService::serviceMatching("net_lundman_zfs_zvol");
+	if (matching) {
+		service = IOService::copyMatchingService(matching);
+		OSSafeReleaseNULL(matching);
+	}
+
+	if (!service) {
+		dprintf("couldn't get zfs IOService");
+		return (NULL);
+	}
+
+	return (service);
+#if 0
+	/* Got service, make sure it casts */
+	zfs_hl = OSDynamicCast(net_lundman_zfs_zvol, service);
+	if (zfs_hl == NULL) {
+		dprintf("couldn't get zfs_hl");
+		/* Drop retain from copyMatchingService */
+		OSSafeReleaseNULL(service);
+		return (NULL);
+	}
+
+	return (zfs_hl);
+#endif
+}
+
+OSDefineMetaClassAndStructors(ZFSPool, IOService);
+
+#if 0
+bool
+ZFSPool::open(IOService *client, IOOptionBits options, void *arg)
+{
+	bool ret;
+
+	IOLog("ZFSPool %s\n", __func__);
+
+	ret = IOService::open(client, options, arg);
+
+	IOLog("ZFSPool %s ret %d\n", __func__, ret);
+
+	return (ret);
+}
+
+bool
+ZFSPool::isOpen(const IOService *forClient) const
+{
+	IOLog("ZFSPool %s\n", __func__);
+	return (false);
+}
+
+void
+ZFSPool::close(IOService *client, IOOptionBits options)
+{
+	IOLog("ZFSPool %s\n", __func__);
+	IOService::close(client, options);
+}
+#endif
+
+bool
+ZFSPool::handleOpen(IOService *client,
+    IOOptionBits options, void *arg)
+{
+	bool ret = true;
+
+	dprintf("");
+	// IOLog("ZFSPool %s\n", __func__);
+
+	/* XXX IOService open() locks for arbitration around handleOpen */
+	// lockForArbitration();
+	_openClients->setObject(client);
+	ret = _openClients->containsObject(client);
+	// unlockForArbitration();
+
+	return (ret);
+//	return (IOService::handleOpen(client, options, NULL));
+}
+
+bool
+ZFSPool::handleIsOpen(const IOService *client) const
+{
+	bool ret;
+
+	dprintf("");
+	// IOLog("ZFSPool %s\n", __func__);
+
+	/* XXX IOService isOpen() locks for arbitration around handleIsOpen */
+	// lockForArbitration();
+	ret = _openClients->containsObject(client);
+	// unlockForArbitration();
+
+	return (ret);
+//	return (IOService::handleIsOpen(client));
+}
+
+void
+ZFSPool::handleClose(IOService *client,
+    IOOptionBits options)
+{
+	dprintf("");
+	// IOLog("ZFSPool %s\n", __func__);
+
+	/* XXX IOService close() locks for arbitration around handleClose */
+	// lockForArbitration();
+	if (_openClients->containsObject(client) == false) {
+		dprintf("not open");
+	}
+	/* Remove client from set */
+	_openClients->removeObject(client);
+	// unlockForArbitration();
+
+//	IOService::handleClose(client, options);
+}
+
+#if 0
+/* XXX IOBlockStorageDevice */
+void
+ZFSPool::read(IOService *client, UInt64 byteStart,
+    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+    IOStorageCompletion *completion)
+{
+	IOLog("ZFSPool %s\n", __func__);
+	IOStorage::complete(completion, kIOReturnError, 0);
+}
+
+void
+ZFSPool::write(IOService *client, UInt64 byteStart,
+    IOMemoryDescriptor *buffer, IOStorageAttributes *attr,
+    IOStorageCompletion *completion)
+{
+	IOLog("ZFSPool %s\n", __func__);
+	IOStorage::complete(completion, kIOReturnError, 0);
+}
+#endif
+
+bool
+ZFSPool::setPoolName(const char *name)
+{
+/* Assign dataset name from null-terminated string */
+	OSString *dsstr;
+	// const OSSymbol *dsstr;
+#if 0
+	OSDictionary *dict;
+	char *newname, *oldname;
+#else
+	char *newname;
+#endif
+	size_t len;
+
+	DPRINTF_FUNC();
+
+	/* Validate arguments */
+	if (!name || (len = strnlen(name,
+	    ZFS_MAX_DATASET_NAME_LEN)) == 0) {
+		dprintf("missing argument");
+		return (false);
+	}
+
+	/* Truncate too-long names (shouldn't happen) */
+	if (len == ZFS_MAX_DATASET_NAME_LEN &&
+	    name[ZFS_MAX_DATASET_NAME_LEN] != '\0') {
+		dprintf("name too long [%s]", name);
+		/* XXX Just truncate the name */
+		len--;
+	}
+
+	/* Allocate room for name plus null char */
+	newname = (char *)kmem_alloc(len+1, KM_SLEEP);
+	if (!newname) {
+		dprintf("string alloc failed");
+		return (false);
+	}
+	snprintf(newname, len+1, "%s", name);
+	newname[len] = '\0'; /* just in case */
+
+	/* Save an OSString copy for IORegistry */
+	dsstr = OSString::withCString(newname);
+	// dsstr = OSSymbol::withCString(newname);
+
+	kmem_free(newname, len+1);
+
+	if (!dsstr) {
+		dprintf("OSString failed");
+		return (false);
+	}
+
+#if 0
+	/* Swap into class private var */
+	oldname = (char *)productString;
+	productString = newname;
+	newname = 0;
+	if (oldname) {
+		kmem_free(oldname, strlen(oldname)+1);
+		oldname = 0;
+	}
+
+	/* Get and clone device characteristics prop dict */
+	if ((dict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyDeviceCharacteristicsKey))) == NULL ||
+	    (dict = OSDictionary::withDictionary(dict)) == NULL) {
+		dprintf("couldn't clone prop dict");
+		/* Should only happen during initialization */
+	}
+
+	if (dict) {
+		/* Copy string, add to dictionary, and replace prop dict */
+		if (dict->setObject(kIOPropertyProductNameKey,
+		    dsstr) == false ||
+		    setProperty(kIOPropertyDeviceCharacteristicsKey,
+		    dict) == false) {
+			dprintf("couldn't set name");
+			OSSafeReleaseNULL(dsstr);
+			OSSafeReleaseNULL(dict);
+			return (false);
+		}
+		OSSafeReleaseNULL(dict);
+	}
+#endif
+
+	/* Set Pool name IOregistry property */
+	setProperty(kZFSPoolNameKey, dsstr);
+
+	/* Finally, set the IORegistryEntry/IOService name */
+	setName(dsstr->getCStringNoCopy());
+	OSSafeReleaseNULL(dsstr);
+
+	return (true);
+}
+
+bool
+ZFSPool::init(OSDictionary *properties, spa_t *spa)
+{
+#if 0
+	/* Allocate dictionaries and symbols */
+	OSDictionary *pdict = OSDictionary::withCapacity(2);
+	OSDictionary *ddict = OSDictionary::withCapacity(4);
+	const OSSymbol *virtualSymbol = OSSymbol::withCString(
+	    kIOPropertyPhysicalInterconnectTypeVirtual);
+	const OSSymbol *locationSymbol = OSSymbol::withCString(
+	    kIOPropertyInternalExternalKey);
+	const OSSymbol *ssdSymbol = OSSymbol::withCString(
+	    kIOPropertyMediumTypeSolidStateKey);
+	OSNumber *physSize = NULL, *logSize = NULL;
+	const OSSymbol *vendorSymbol = 0;
+	const OSSymbol *revisionSymbol = 0;
+	const OSSymbol *blankSymbol = 0;
+	OSBoolean *rdonly = 0;
+	UInt64 phys_bsize, log_bsize;
+	OSString *str = 0;
+	const char *cstr = 0;
+#endif
+	uint64_t space;
+	bool ret = false;
+
+	DPRINTF_FUNC();
+
+#if 0
+	physSize = OSNumber::withNumber((uint32_t)ZFS_POOL_DEV_BSIZE, 32);
+	logSize = OSNumber::withNumber((uint32_t)ZFS_POOL_DEV_BSIZE, 32);
+#endif
+	if (!spa) {
+		dprintf("missing spa");
+		goto error;
+	}
+
+#if 0
+	/* Get physical and logical size from spa */
+	phys_bsize = (1ULL<<spa->spa_max_ashift);
+	log_bsize = (1ULL<<spa->spa_min_ashift);
+#endif
+
+#if 0
+	/* Workaround glitchy behavior with large bsize in xnu */
+	if (log_bsize > 8192) log_bsize = 8192;
+#endif
+
+#if 0
+	/* XXX Shouldn't be possible */
+	if (log_bsize == 0) log_bsize = DEV_BSIZE;
+
+	physSize = OSNumber::withNumber((uint32_t)phys_bsize, 32);
+	logSize = OSNumber::withNumber((uint32_t)log_bsize, 32);
+
+	/* Validate allocations */
+	if (!pdict || !ddict || !virtualSymbol || !locationSymbol ||
+	    !ssdSymbol || !physSize || !logSize) {
+		dprintf("allocation failed");
+		goto error;
+	}
+#endif
+
+	/* Need an OSSet for open clients */
+	_openClients = OSSet::withCapacity(1);
+	if (_openClients == NULL) {
+		dprintf("client OSSet failed");
+		goto error;
+	}
+
+	/* Set spa pointer and this Pool object's name to match */
+	if (!spa) {
+		dprintf("missing spa");
+		goto error;
+	}
+	_spa = spa;
+	// setName(spa_name(spa));
+
+#if 0
+	/* Init class statics every time an instance inits */
+	/* Shared across instances, but doesn't hurt to reprint */
+	if (vendorString == NULL) {
+		char *string;
+		int len = strlen("zpool")+1;
+		string = (char *)kmem_alloc(len, KM_SLEEP);
+		if (!string) goto error;
+		snprintf(string, len, "zpool");
+		vendorString = string;
+	}
+
+	if (revisionString == NULL) {
+		char *string;
+		int len = strlen("0.1")+1;
+		string = (char *)kmem_alloc(len, KM_SLEEP);
+		if (!string) goto error;
+		snprintf(string, len, "0.1");
+		revisionString = string;
+	}
+
+	if (revisionString == NULL) {
+		char *string;
+		int len = strlen("ZFS Pool")+1;
+		string = (char *)kmem_alloc(len, KM_SLEEP);
+		if (!string) goto error;
+		snprintf(string, len, "ZFS pool");
+		infoString = string;
+	}
+
+	/* For IORegistry keys, cache OSSymbols for class statics */
+	/* Leverages OSSymbol cahce pool to reuse across instances */
+	vendorSymbol = OSSymbol::withCString(vendorString);
+	revisionSymbol = OSSymbol::withCString(revisionString);
+	blankSymbol = OSSymbol::withCString("");
+	if (!vendorSymbol || !revisionSymbol || !blankSymbol) {
+		dprintf("class symbols failed");
+		goto error;
+	}
+#endif
+
+	/* Call super init */
+	if (IOService::init(properties) == false) {
+		dprintf("device init failed");
+		goto error;
+	}
+
+#if 0
+	/* Set class private vars */
+	productString = NULL;
+	isReadOnly = false; // XXX should really be true initially
+
+	/* Set Protocol Characteristics */
+	if (pdict->setObject(kIOPropertyPhysicalInterconnectLocationKey,
+	    locationSymbol) == false ||
+	    pdict->setObject(kIOPropertyPhysicalInterconnectTypeKey,
+	    virtualSymbol) == false) {
+		dprintf("pdict set properties failed");
+		goto error;
+	}
+	setProperty(kIOPropertyProtocolCharacteristicsKey, pdict);
+
+	/* Set Device Characteristics */
+	if (ddict->setObject(kIOPropertyVendorNameKey,
+	    vendorSymbol) == false ||
+	    ddict->setObject(kIOPropertyProductRevisionLevelKey,
+	    revisionSymbol) == false ||
+	    ddict->setObject(kIOPropertyProductSerialNumberKey,
+	    blankSymbol) == false ||
+	    ddict->setObject(kIOPropertyPhysicalBlockSizeKey,
+	    physSize) == false ||
+	    ddict->setObject(kIOPropertyLogicalBlockSizeKey,
+	    logSize) == false ||
+	    ddict->setObject(kIOPropertyMediumTypeKey,
+	    ssdSymbol) == false) {
+		dprintf("ddict set properties failed");
+		goto error;
+	}
+	setProperty(kIOPropertyDeviceCharacteristicsKey, ddict);
+
+	/* Check for passed in readonly status */
+	if (properties && (rdonly = OSDynamicCast(OSBoolean,
+	    properties->getObject(kZFSPoolReadOnlyKey))) != NULL) {
+		/* Got the boolean */
+		isReadOnly = rdonly->getValue();
+		dprintf("set %s", (isReadOnly ? "readonly" : "readwrite"));
+	}
+
+	/* Check for passed in pool GUID */
+	if (properties && (str = OSDynamicCast(OSString,
+	    properties->getObject(kZFSPoolGUIDKey))) != NULL) {
+		/* Got the string, try to set GUID */
+		str->retain();
+		if (ddict->setObject(kZFSPoolGUIDKey, str) == false) {
+			dprintf("couldn't set GUID");
+			OSSafeReleaseNULL(str);
+			goto error;
+		}
+#ifdef DEBUG
+		cstr = str->getCStringNoCopy();
+		dprintf("set GUID");
+		cstr = 0;
+#endif
+		OSSafeReleaseNULL(str);
+	}
+#endif
+
+	if (setPoolName(spa_name(spa)) == false) {
+		dprintf("setPoolName failed");
+		goto error;
+	}
+
+	space = spa_get_dspace(spa);
+dprintf("space %llu", space);
+	setProperty(kZFSPoolSizeKey, space, 64);
+
+#if 0
+	/* Check for passed in pool name */
+	if (properties && (str = OSDynamicCast(OSString,
+	    properties->getObject(kZFSPoolNameKey))) != NULL &&
+	    (cstr = str->getCStringNoCopy()) != NULL) {
+		/* Got the string, try to set name */
+		str->retain();
+		if (setPoolName(cstr) == false) {
+			/* Unlikely */
+			dprintf("couldn't setup pool"
+			    " name property [%s]", cstr);
+			OSSafeReleaseNULL(str);
+			goto error;
+		}
+
+		dprintf("set pool name [%s]", cstr);
+		OSSafeReleaseNULL(str);
+	} else {
+		if (setPoolName("invalid") == false) {
+			dprintf("setPoolName failed");
+			goto error;
+		}
+		dprintf("set name [invalid]");
+	}
+#endif
+
+	/* Success */
+	ret = true;
+
+error:
+#if 0
+	/* All of these will be released on error */
+	OSSafeReleaseNULL(pdict);
+	OSSafeReleaseNULL(ddict);
+	OSSafeReleaseNULL(virtualSymbol);
+	OSSafeReleaseNULL(locationSymbol);
+	OSSafeReleaseNULL(ssdSymbol);
+	OSSafeReleaseNULL(physSize);
+	OSSafeReleaseNULL(logSize);
+	OSSafeReleaseNULL(vendorSymbol);
+	OSSafeReleaseNULL(revisionSymbol);
+	OSSafeReleaseNULL(blankSymbol);
+	OSSafeReleaseNULL(str);
+#endif
+	return (ret);
+}
+
+void
+ZFSPool::free()
+{
+	OSSet *oldSet;
+#if 0
+	char *pstring;
+#endif
+
+	if (_openClients) {
+		oldSet = _openClients;
+		_openClients = 0;
+		OSSafeReleaseNULL(oldSet);
+	}
+	_spa = 0;
+
+#if 0
+	pstring = (char *)productString;
+	productString = 0;
+	if (pstring) kmem_free(pstring, strlen(pstring) + 1);
+#endif
+
+	IOService::free();
+}
+
+extern "C" {
+
+void
+spa_iokit_pool_proxy_destroy(spa_t *spa)
+{
+	ZFSPool *proxy;
+	spa_iokit_t *wrapper;
+
+	if (!spa) {
+		printf("missing spa");
+		return;
+	}
+
+	/* Get pool proxy */
+	wrapper = spa->spa_iokit_proxy;
+	spa->spa_iokit_proxy = NULL;
+
+	if (wrapper == NULL) {
+		printf("missing spa_iokit_proxy");
+		return;
+	}
+
+	proxy = wrapper->proxy;
+
+	/* Free the struct */
+	kmem_free(wrapper, sizeof (spa_iokit_t));
+	if (!proxy) {
+		printf("missing proxy");
+		return;
+	}
+
+	if (proxy->terminate(kIOServiceSynchronous|
+	    kIOServiceRequired) == false) {
+		dprintf("terminate failed");
+	}
+	proxy->release();
+
+	/*
+	 * IOService *provider;
+	 * provider = proxy->getProvider();
+	 *
+	 * proxy->detach(provider);
+	 * proxy->stop(provider);
+	 *
+	 * proxy->release();
+	 */
+}
+
+int
+spa_iokit_pool_proxy_create(spa_t *spa)
+{
+	IOService *zfs_hl;
+	ZFSPool *proxy;
+	spa_iokit_t *wrapper;
+
+	if (!spa) {
+		dprintf("missing spa");
+		return (EINVAL);
+	}
+
+	/* Allocate C struct */
+	if ((wrapper = (spa_iokit_t *)kmem_alloc(sizeof (spa_iokit_t),
+	    KM_SLEEP)) == NULL) {
+		dprintf("couldn't allocate wrapper");
+		return (ENOMEM);
+	}
+
+	/* Get ZFS IOService */
+	if ((zfs_hl = copy_zfs_handle()) == NULL) {
+		dprintf("couldn't get ZFS handle");
+		kmem_free(wrapper, sizeof (spa_iokit_t));
+		return (ENODEV);
+	}
+
+	/* Allocate and init ZFS pool proxy */
+	proxy = ZFSPool::withProviderAndPool(zfs_hl, spa);
+	if (!proxy) {
+		dprintf("Pool proxy creation failed");
+		kmem_free(wrapper, sizeof (spa_iokit_t));
+		OSSafeReleaseNULL(zfs_hl);
+		return (ENOMEM);
+	}
+	/* Drop retain from copy_zfs_handle */
+	OSSafeReleaseNULL(zfs_hl);
+
+	/* Set pool proxy */
+	wrapper->proxy = proxy;
+	spa->spa_iokit_proxy = wrapper;
+
+	return (0);
+}
+
+} /* extern "C" */
+
+ZFSPool *
+ZFSPool::withProviderAndPool(IOService *zfs_hl, spa_t *spa)
+{
+	ZFSPool *proxy = new ZFSPool;
+
+	if (!proxy) {
+		printf("allocation failed");
+		return (0);
+	}
+
+	if (proxy->init(0, spa) == false ||
+	    proxy->attach(zfs_hl) == false) {
+		printf("init/attach failed");
+		OSSafeReleaseNULL(proxy);
+		return (0);
+	}
+
+	if (proxy->start(zfs_hl) == false) {
+		printf("start failed");
+		proxy->detach(zfs_hl);
+		OSSafeReleaseNULL(proxy);
+		return (0);
+	}
+
+	/* Open zfs_hl, adding proxy to its open clients */
+	// if (proxy->open(zfs_hl) == false) {
+	if (zfs_hl->open(proxy) == false) {
+		printf("open failed");
+		proxy->stop(zfs_hl);
+		proxy->detach(zfs_hl);
+		OSSafeReleaseNULL(proxy);
+		return (0);
+	}
+	proxy->registerService(kIOServiceAsynchronous);
+
+	return (proxy);
+}
+
+#if 0
+/* XXX IOBlockStorageDevice */
+IOReturn
+ZFSPool::doSynchronizeCache(void)
+{
+	dprintf("");
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSPool::doAsyncReadWrite(IOMemoryDescriptor *buffer,
+    UInt64 block, UInt64 nblks,
+    IOStorageAttributes *attributes,
+    IOStorageCompletion *completion)
+{
+	char zero[ZFS_POOL_DEV_BSIZE];
+	size_t len, cur, off = 0;
+
+	DPRINTF_FUNC();
+
+	if (!buffer) {
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/* Read vs. write */
+	if (buffer->getDirection() == kIODirectionIn) {
+		/* Zero the read buffer */
+		bzero(zero, ZFS_POOL_DEV_BSIZE);
+		len = buffer->getLength();
+		while (len > 0) {
+			cur = (len > ZFS_POOL_DEV_BSIZE ?
+			    ZFS_POOL_DEV_BSIZE : len);
+			buffer->writeBytes(/* offset */ off,
+			    /* buf */ zero, /* length */ cur);
+			off += cur;
+			len -= cur;
+		}
+		// dprintf("read: %llu %llu", block, nblks);
+		IOStorage::complete(completion, kIOReturnSuccess,
+			    buffer->getLength());
+		return (kIOReturnSuccess);
+	}
+
+	if (buffer->getDirection() != kIODirectionOut) {
+		dprintf("invalid direction %d", buffer->getDirection());
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/*
+	 * XXX For now this just returns error for all writes.
+	 * If it turns out that mountroot/bdevvp try to
+	 * verify writable status by reading a block and writing
+	 * it back to disk, lie and say it succeeded.
+	 */
+	dprintf("write: %llu %llu", block, nblks);
+	IOStorage::complete(completion, kIOReturnError, 0);
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSPool::doEjectMedia()
+{
+	DPRINTF_FUNC();
+	/* XXX Called at shutdown, maybe return success? */
+	return (kIOReturnError);
+}
+
+IOReturn
+ZFSPool::doFormatMedia(UInt64 byteCapacity)
+{
+	DPRINTF_FUNC();
+	/* XXX shouldn't need it */
+	return (kIOReturnError);
+	// return (kIOReturnSuccess);
+}
+
+UInt32
+ZFSPool::doGetFormatCapacities(UInt64 *capacities,
+    UInt32 capacitiesMaxCount) const
+{
+	DPRINTF_FUNC();
+	if (capacities && capacitiesMaxCount > 0) {
+		capacities[0] = (ZFS_POOL_DEV_BSIZE * ZFS_POOL_DEV_BCOUNT);
+		dprintf("capacity %llu", capacities[0]);
+	}
+
+	/* Always inform caller of capacity count */
+	return (1);
+}
+
+/* Returns full pool name from instance private var */
+char *
+ZFSPool::getProductString()
+{
+	if (productString) dprintf("[%s]", productString);
+	/* Return class private string */
+	return ((char *)productString);
+}
+
+/* Returns readonly status from instance private var */
+IOReturn
+ZFSPool::reportWriteProtection(bool *isWriteProtected)
+{
+	DPRINTF_FUNC();
+	if (isWriteProtected) *isWriteProtected = isReadOnly;
+	return (kIOReturnSuccess);
+}
+
+/* These return class static string for all instances */
+char *
+ZFSPool::getVendorString()
+{
+	dprintf("[%s]", vendorString);
+	/* Return class static string */
+	return ((char *)vendorString);
+}
+char *
+ZFSPool::getRevisionString()
+{
+	dprintf("[%s]", revisionString);
+	/* Return class static string */
+	return ((char *)revisionString);
+}
+char *
+ZFSPool::getAdditionalDeviceInfoString()
+{
+	dprintf("[%s]", infoString);
+	/* Return class static string */
+	return ((char *)infoString);
+}
+
+/* Always return media present and unchanged */
+IOReturn
+ZFSPool::reportMediaState(bool *mediaPresent,
+    bool *changedState)
+{
+	DPRINTF_FUNC();
+	if (mediaPresent) *mediaPresent = true;
+	if (changedState) *changedState = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report nonremovable and nonejectable */
+IOReturn
+ZFSPool::reportRemovability(bool *isRemoveable)
+{
+	DPRINTF_FUNC();
+	if (isRemoveable) *isRemoveable = false;
+	return (kIOReturnSuccess);
+}
+IOReturn
+ZFSPool::reportEjectability(bool *isEjectable)
+{
+	DPRINTF_FUNC();
+	if (isEjectable) *isEjectable = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report 512b blocksize */
+IOReturn
+ZFSPool::reportBlockSize(UInt64 *blockSize)
+{
+	DPRINTF_FUNC();
+	if (!blockSize)
+		return (kIOReturnError);
+
+	*blockSize = ZFS_POOL_DEV_BSIZE;
+	return (kIOReturnSuccess);
+}
+
+/* XXX Calculate from dev_bcount, should get size from objset */
+/* XXX Can issue message kIOMessageMediaParametersHaveChanged to update */
+IOReturn
+ZFSPool::reportMaxValidBlock(UInt64 *maxBlock)
+{
+	DPRINTF_FUNC();
+	if (!maxBlock)
+		return (kIOReturnError);
+
+	// *maxBlock = 0;
+	*maxBlock = ZFS_POOL_DEV_BCOUNT - 1;
+	dprintf("maxBlock %llu", *maxBlock);
+
+	return (kIOReturnSuccess);
+}
+#endif
diff --git a/module/os/macos/zfs/abd_os.c b/module/os/macos/zfs/abd_os.c
new file mode 100644
index 0000000000..4a702b1ab4
--- /dev/null
+++ b/module/os/macos/zfs/abd_os.c
@@ -0,0 +1,482 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2020 by Jorgen Lundman. All rights reserved.
+ */
+
+/*
+ * See abd.c for a general overview of the arc buffered data (ABD).
+ *
+ * Using a large proportion of scattered ABDs decreases ARC fragmentation since
+ * when we are at the limit of allocatable space, using equal-size chunks will
+ * allow us to quickly reclaim enough space for a new large allocation (assuming
+ * it is also scattered).
+ *
+ * ABDs are allocated scattered by default unless the caller uses
+ * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
+ */
+
+#include <sys/abd_impl.h>
+#include <sys/param.h>
+#include <sys/zio.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_znode.h>
+
+typedef struct abd_stats {
+	kstat_named_t abdstat_struct_size;
+	kstat_named_t abdstat_scatter_cnt;
+	kstat_named_t abdstat_scatter_data_size;
+	kstat_named_t abdstat_scatter_chunk_waste;
+	kstat_named_t abdstat_linear_cnt;
+	kstat_named_t abdstat_linear_data_size;
+} abd_stats_t;
+
+static abd_stats_t abd_stats = {
+	/* Amount of memory occupied by all of the abd_t struct allocations */
+	{ "struct_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The number of scatter ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset()).
+	 */
+	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
+	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
+	/*
+	 * The amount of space wasted at the end of the last chunk across all
+	 * scatter ABDs tracked by scatter_cnt.
+	 */
+	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
+	/*
+	 * The number of linear ABDs which are currently allocated, excluding
+	 * ABDs which don't own their data (for instance the ones which were
+	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
+	 * ABD takes ownership of its buf then it will become tracked.
+	 */
+	{ "linear_cnt",				KSTAT_DATA_UINT64 },
+	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
+	{ "linear_data_size",			KSTAT_DATA_UINT64 },
+};
+
+/*
+ * The size of the chunks ABD allocates. Because the sizes allocated from the
+ * kmem_cache can't change, this tunable can only be modified at boot. Changing
+ * it at runtime would cause ABD iteration to work incorrectly for ABDs which
+ * were allocated with the old size, so a safeguard has been put in place which
+ * will cause the machine to panic if you change it and try to access the data
+ * within a scattered ABD.
+ */
+size_t zfs_abd_chunk_size = 4096;
+
+kmem_cache_t *abd_chunk_cache;
+static kstat_t *abd_ksp;
+
+
+/*
+ * We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
+ * just a single zero'd sized zfs_abd_chunk_size buffer. This
+ * allows us to conserve memory by only using a single zero buffer
+ * for the scatter chunks.
+ */
+abd_t *abd_zero_scatter = NULL;
+static char *abd_zero_buf = NULL;
+
+static void
+abd_free_chunk(void *c)
+{
+	kmem_cache_free(abd_chunk_cache, c);
+}
+
+static size_t
+abd_chunkcnt_for_bytes(size_t size)
+{
+	return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size);
+}
+
+static inline size_t
+abd_scatter_chunkcnt(abd_t *abd)
+{
+	ASSERT(!abd_is_linear(abd));
+	return (abd_chunkcnt_for_bytes(
+	    ABD_SCATTER(abd).abd_offset + abd->abd_size));
+}
+
+boolean_t
+abd_size_alloc_linear(size_t size)
+{
+	return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE);
+}
+
+void
+abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
+{
+	size_t n = abd_scatter_chunkcnt(abd);
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste,
+		    n * zfs_abd_chunk_size - abd->abd_size);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
+		ABDSTAT_INCR(abdstat_scatter_chunk_waste,
+		    abd->abd_size - n * zfs_abd_chunk_size);
+	}
+}
+
+void
+abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
+{
+	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
+	if (op == ABDSTAT_INCR) {
+		ABDSTAT_BUMP(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
+	} else {
+		ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
+		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+	}
+}
+
+void
+abd_verify_scatter(abd_t *abd)
+{
+	/*
+	 * There is no scatter linear pages in FreeBSD so there is an
+	 * if an error if the ABD has been marked as a linear page.
+	 */
+	VERIFY(!abd_is_linear_page(abd));
+	ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
+	    zfs_abd_chunk_size);
+	size_t n = abd_scatter_chunkcnt(abd);
+	for (int i = 0; i < n; i++) {
+		ASSERT3P(
+		    ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
+	}
+}
+
+void
+abd_alloc_chunks(abd_t *abd, size_t size)
+{
+	size_t n = abd_chunkcnt_for_bytes(size);
+	for (int i = 0; i < n; i++) {
+		void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
+		ASSERT3P(c, !=, NULL);
+		ABD_SCATTER(abd).abd_chunks[i] = c;
+	}
+	ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
+}
+
+void
+abd_free_chunks(abd_t *abd)
+{
+	size_t n = abd_scatter_chunkcnt(abd);
+	for (int i = 0; i < n; i++) {
+		abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]);
+	}
+}
+
+abd_t *
+abd_alloc_struct(size_t size)
+{
+	size_t chunkcnt = abd_chunkcnt_for_bytes(size);
+	size_t abd_size = offsetof(abd_t,
+	    abd_u.abd_scatter.abd_chunks[chunkcnt]);
+	abd_t *abd = kmem_alloc(MAX(abd_size, sizeof (abd_t)), KM_PUSHPAGE);
+	ASSERT3P(abd, !=, NULL);
+	abd->abd_orig_size = MAX(abd_size, sizeof (abd_t));
+	list_link_init(&abd->abd_gang_link);
+	mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
+	ABDSTAT_INCR(abdstat_struct_size, abd_size);
+
+	return (abd);
+}
+
+void
+abd_free_struct(abd_t *abd)
+{
+	size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd);
+	int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
+	mutex_destroy(&abd->abd_mtx);
+	ASSERT(!list_link_active(&abd->abd_gang_link));
+	kmem_free(abd, MAX(size, sizeof(abd_t)));
+	ABDSTAT_INCR(abdstat_struct_size, -size);
+}
+
+/*
+ * Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
+ * each chunk in the scatterlist will be set to abd_zero_buf.
+ */
+static void
+abd_alloc_zero_scatter(void)
+{
+	size_t n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
+	abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
+	abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
+
+	abd_zero_scatter->abd_flags = ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
+	abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
+	abd_zero_scatter->abd_parent = NULL;
+	zfs_refcount_create(&abd_zero_scatter->abd_children);
+
+	ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
+	ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
+	    zfs_abd_chunk_size;
+
+	for (int i = 0; i < n; i++) {
+		ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
+		    abd_zero_buf;
+	}
+
+	ABDSTAT_BUMP(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size);
+}
+
+static void
+abd_free_zero_scatter(void)
+{
+	zfs_refcount_destroy(&abd_zero_scatter->abd_children);
+	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);
+
+	abd_free_struct(abd_zero_scatter);
+	abd_zero_scatter = NULL;
+	kmem_free(abd_zero_buf, zfs_abd_chunk_size);
+}
+
+void
+abd_init(void)
+{
+	abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
+	    NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG);
+
+	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
+	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+	if (abd_ksp != NULL) {
+		abd_ksp->ks_data = &abd_stats;
+		kstat_install(abd_ksp);
+	}
+
+	abd_alloc_zero_scatter();
+}
+
+void
+abd_fini(void)
+{
+	abd_free_zero_scatter();
+
+	if (abd_ksp != NULL) {
+		kstat_delete(abd_ksp);
+		abd_ksp = NULL;
+	}
+
+	kmem_cache_destroy(abd_chunk_cache);
+	abd_chunk_cache = NULL;
+}
+
+void
+abd_free_linear_page(abd_t *abd)
+{
+	/*
+	 * FreeBSD does not have have scatter linear pages
+	 * so there is an error.
+	 */
+	VERIFY(0);
+}
+
+/*
+ * If we're going to use this ABD for doing I/O using the block layer, the
+ * consumer of the ABD data doesn't care if it's scattered or not, and we don't
+ * plan to store this ABD in memory for a long period of time, we should
+ * allocate the ABD type that requires the least data copying to do the I/O.
+ *
+ * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
+ * using a scatter/gather list we should switch to that and replace this call
+ * with vanilla abd_alloc().
+ */
+abd_t *
+abd_alloc_for_io(size_t size, boolean_t is_metadata)
+{
+	return (abd_alloc_linear(size, is_metadata));
+}
+
+/*
+ * This is just a helper function to abd_get_offset_scatter() to alloc a
+ * scatter ABD using the calculated chunkcnt based on the offset within the
+ * parent ABD.
+ */
+static abd_t *
+abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
+{
+	size_t abd_size = offsetof(abd_t,
+	    abd_u.abd_scatter.abd_chunks[chunkcnt]);
+	abd_t *abd = kmem_alloc(MAX(abd_size, sizeof (abd_t)), KM_PUSHPAGE);
+	ASSERT3P(abd, !=, NULL);
+	abd->abd_orig_size = MAX(abd_size, sizeof (abd_t));
+	list_link_init(&abd->abd_gang_link);
+	mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
+	ABDSTAT_INCR(abdstat_struct_size, abd_size);
+
+	return (abd);
+}
+
+abd_t *
+abd_get_offset_scatter(abd_t *sabd, size_t off)
+{
+	abd_t *abd = NULL;
+
+	abd_verify(sabd);
+	ASSERT3U(off, <=, sabd->abd_size);
+
+	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
+	size_t chunkcnt = abd_scatter_chunkcnt(sabd) -
+	    (new_offset / zfs_abd_chunk_size);
+
+	abd = abd_alloc_scatter_offset_chunkcnt(chunkcnt);
+
+	/*
+	 * Even if this buf is filesystem metadata, we only track that
+	 * if we own the underlying data buffer, which is not true in
+	 * this case. Therefore, we don't ever use ABD_FLAG_META here.
+	 */
+	abd->abd_flags = 0;
+
+	ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
+	ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
+
+	/* Copy the scatterlist starting at the correct offset */
+	(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
+	    &ABD_SCATTER(sabd).abd_chunks[new_offset /
+	    zfs_abd_chunk_size],
+	    chunkcnt * sizeof (void *));
+
+	return (abd);
+}
+
+static inline size_t
+abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
+{
+	ASSERT(!abd_is_linear(aiter->iter_abd));
+	return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
+	    aiter->iter_pos) % zfs_abd_chunk_size);
+}
+
+static inline size_t
+abd_iter_scatter_chunk_index(struct abd_iter *aiter)
+{
+	ASSERT(!abd_is_linear(aiter->iter_abd));
+	return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
+	    aiter->iter_pos) / zfs_abd_chunk_size);
+}
+
+/*
+ * Initialize the abd_iter.
+ */
+void
+abd_iter_init(struct abd_iter *aiter, abd_t *abd)
+{
+	ASSERT(!abd_is_gang(abd));
+	abd_verify(abd);
+	aiter->iter_abd = abd;
+	aiter->iter_pos = 0;
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+}
+
+/*
+ * This is just a helper function to see if we have exhausted the
+ * abd_iter and reached the end.
+ */
+boolean_t
+abd_iter_at_end(struct abd_iter *aiter)
+{
+	return (aiter->iter_pos == aiter->iter_abd->abd_size);
+}
+
+/*
+ * Advance the iterator by a certain amount. Cannot be called when a chunk is
+ * in use. This can be safely called when the aiter has already exhausted, in
+ * which case this does nothing.
+ */
+void
+abd_iter_advance(struct abd_iter *aiter, size_t amount)
+{
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* There's nothing left to advance to, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	aiter->iter_pos += amount;
+}
+
+/*
+ * Map the current chunk into aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_map(struct abd_iter *aiter)
+{
+	void *paddr;
+	size_t offset = 0;
+
+	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+	ASSERT0(aiter->iter_mapsize);
+
+	/* Panic if someone has changed zfs_abd_chunk_size */
+	IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
+	    ABD_SCATTER(aiter->iter_abd).abd_chunk_size);
+
+	/* There's nothing left to iterate over, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	if (abd_is_linear(aiter->iter_abd)) {
+		offset = aiter->iter_pos;
+		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
+		paddr = ABD_LINEAR_BUF(aiter->iter_abd);
+	} else {
+		size_t index = abd_iter_scatter_chunk_index(aiter);
+		offset = abd_iter_scatter_chunk_offset(aiter);
+		aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
+		    aiter->iter_abd->abd_size - aiter->iter_pos);
+		paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index];
+	}
+	aiter->iter_mapaddr = (char *)paddr + offset;
+}
+
+/*
+ * Unmap the current chunk from aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+void
+abd_iter_unmap(struct abd_iter *aiter)
+{
+	/* There's nothing left to unmap, so do nothing */
+	if (abd_iter_at_end(aiter))
+		return;
+
+	ASSERT3P(aiter->iter_mapaddr, !=, NULL);
+	ASSERT3U(aiter->iter_mapsize, >, 0);
+
+	aiter->iter_mapaddr = NULL;
+	aiter->iter_mapsize = 0;
+}
+
+void
+abd_cache_reap_now(void)
+{
+	kmem_cache_reap_now(abd_chunk_cache);
+}
diff --git a/module/os/macos/zfs/arc_os.c b/module/os/macos/zfs/arc_os.c
new file mode 100644
index 0000000000..2bac6ee73a
--- /dev/null
+++ b/module/os/macos/zfs/arc_os.c
@@ -0,0 +1,845 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <IOKit/IOLib.h>
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/spa_impl.h>
+#include <sys/zio_compress.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/arc.h>
+#include <sys/arc_impl.h>
+#include <sys/refcount.h>
+#include <sys/vdev.h>
+#include <sys/vdev_trim.h>
+#include <sys/vdev_impl.h>
+#include <sys/dsl_pool.h>
+#include <sys/zio_checksum.h>
+#include <sys/multilist.h>
+#include <sys/abd.h>
+#include <sys/zil.h>
+#include <sys/fm/fs/zfs.h>
+#ifdef _KERNEL
+#include <sys/vmsystm.h>
+#include <sys/zpl.h>
+#endif
+#include <sys/callb.h>
+#include <sys/kstat.h>
+#include <sys/zthr.h>
+#include <zfs_fletcher.h>
+#include <sys/arc_impl.h>
+#include <sys/trace_zfs.h>
+#include <sys/aggsum.h>
+#include <sys/kstat_osx.h>
+
+int64_t last_free_memory;
+free_memory_reason_t last_free_reason;
+
+extern arc_stats_t arc_stats;
+
+static kmutex_t			arc_reclaim_lock;
+static kcondvar_t		arc_reclaim_thread_cv;
+static boolean_t		arc_reclaim_thread_exit;
+static kcondvar_t		arc_reclaim_waiters_cv;
+
+/*
+ * log2(fraction of ARC which must be free to allow growing).
+ * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
+ * when reading a new block into the ARC, we will evict an equal-sized block
+ * from the ARC.
+ *
+ * This must be less than arc_shrink_shift, so that when we shrink the ARC,
+ * we will still not allow it to grow.
+ */
+extern int	arc_no_grow_shift;
+
+
+/*
+ * Return a default max arc size based on the amount of physical memory.
+ */
+uint64_t
+arc_default_max(uint64_t min, uint64_t allmem)
+{
+	/* Default to 1/3 of all memory. */
+	return (MAX(allmem / 3, min));
+}
+
+#ifdef _KERNEL
+
+/* Remove these uses of _Atomic */
+static _Atomic boolean_t arc_reclaim_in_loop = B_FALSE;
+static _Atomic int64_t reclaim_shrink_target = 0;
+
+/*
+ * Return maximum amount of memory that we could possibly use.  Reduced
+ * to half of all memory in user space which is primarily used for testing.
+ */
+uint64_t
+arc_all_memory(void)
+{
+	return (kmem_size());
+}
+
+/*
+ * Return the amount of memory that is considered free.  In user space
+ * which is primarily used for testing we pretend that free memory ranges
+ * from 0-20% of all memory.
+ */
+uint64_t
+arc_free_memory(void)
+{
+	int64_t avail;
+
+	avail = spl_free_wrapper();
+	return (avail >= 0LL ? avail : 0LL);
+}
+
+/*
+ * Return the amount of memory that can be consumed before reclaim will be
+ * needed.  Positive if there is sufficient free memory, negative indicates
+ * the amount of memory that needs to be freed up.
+ */
+int64_t
+arc_available_memory(void)
+{
+	int64_t lowest = INT64_MAX;
+	free_memory_reason_t r = FMR_UNKNOWN;
+
+	/*
+	 *  memory logic is in spl_free_wrapper(), including absorption
+	 * of pressure terms
+	 */
+	lowest = spl_free_wrapper();
+	r = FMR_NEEDFREE;
+	if (spl_free_fast_pressure_wrapper() != FALSE) {
+		/* wake up arc_reclaim_thread() if it is sleeping */
+		cv_signal(&arc_reclaim_thread_cv);
+	}
+
+	last_free_memory = lowest;
+	last_free_reason = r;
+
+	return (lowest);
+}
+
+int
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
+{
+	int64_t available_memory = spl_free_wrapper();
+	int64_t freemem = available_memory / PAGESIZE;
+	static uint64_t page_load = 0;
+	static uint64_t last_txg = 0;
+
+#if defined(__i386)
+	available_memory =
+	    MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
+#endif
+
+	if (txg > last_txg) {
+		last_txg = txg;
+		page_load = 0;
+	}
+
+	if (freemem > physmem * arc_lotsfree_percent / 100) {
+		page_load = 0;
+		return (0);
+	}
+
+	/*
+	 * If we are in pageout, we know that memory is already tight,
+	 * the arc is already going to be evicting, so we just want to
+	 * continue to let page writes occur as quickly as possible.
+	 */
+
+	if (spl_free_manual_pressure_wrapper() != 0 &&
+	    arc_reclaim_in_loop == B_FALSE) {
+		cv_signal(&arc_reclaim_thread_cv);
+		kpreempt(KPREEMPT_SYNC);
+		page_load = 0;
+	}
+
+	if (!spl_minimal_physmem_p() && page_load > 0) {
+		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
+		printf("ZFS: %s: !spl_minimal_physmem_p(), available_memory "
+		    "== %lld, page_load = %llu, txg = %llu, reserve = %llu\n",
+		    __func__, available_memory, page_load, txg, reserve);
+		if (arc_reclaim_in_loop == B_FALSE)
+			cv_signal(&arc_reclaim_thread_cv);
+		kpreempt(KPREEMPT_SYNC);
+		page_load = 0;
+		return (SET_ERROR(EAGAIN));
+	}
+
+	if (arc_reclaim_needed() && page_load > 0) {
+		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
+		printf("ZFS: %s: arc_reclaim_needed(), available_memory "
+		    "== %lld, page_load = %llu, txg = %llu, reserve = %lld\n",
+		    __func__, available_memory, page_load, txg, reserve);
+		if (arc_reclaim_in_loop == B_FALSE)
+			cv_signal(&arc_reclaim_thread_cv);
+		kpreempt(KPREEMPT_SYNC);
+		page_load = 0;
+		return (SET_ERROR(EAGAIN));
+	}
+
+	/* as with sun, assume we are reclaiming */
+	if (available_memory <= 0 || page_load > available_memory / 4) {
+		return (SET_ERROR(ERESTART));
+	}
+
+	if (!spl_minimal_physmem_p()) {
+		page_load += reserve/8;
+		return (0);
+	}
+
+	page_load = 0;
+
+	return (0);
+}
+
+int64_t
+arc_shrink(int64_t to_free)
+{
+	int64_t shrank = 0;
+	int64_t arc_c_before = arc_c;
+	int64_t arc_adjust_evicted = 0;
+
+	uint64_t asize = aggsum_value(&arc_size);
+	if (arc_c > arc_c_min) {
+
+		if (arc_c > arc_c_min + to_free)
+			atomic_add_64(&arc_c, -to_free);
+		else
+			arc_c = arc_c_min;
+
+		atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
+		if (asize < arc_c)
+			arc_c = MAX(asize, arc_c_min);
+		if (arc_p > arc_c)
+			arc_p = (arc_c >> 1);
+		ASSERT(arc_c >= arc_c_min);
+		ASSERT((int64_t)arc_p >= 0);
+	}
+
+	shrank = arc_c_before - arc_c;
+
+	if (aggsum_value(&arc_size) > arc_c) {
+		zthr_wakeup(arc_adjust_zthr);
+		arc_adjust_evicted = 0;
+	}
+	return (shrank + arc_adjust_evicted);
+}
+
+
+/*
+ * arc.c has a arc_reap_zthr we should probably use, instead of
+ * having our own legacy arc_reclaim_thread().
+ */
+static void arc_kmem_reap_now(void)
+{
+
+	/* arc.c will do the heavy lifting */
+	arc_kmem_reap_soon();
+
+	/* Now some OsX additionals */
+	extern kmem_cache_t *abd_chunk_cache;
+	extern kmem_cache_t *znode_cache;
+
+	kmem_cache_reap_now(abd_chunk_cache);
+	if (znode_cache) kmem_cache_reap_now(znode_cache);
+
+	if (zio_arena_parent != NULL) {
+		/*
+		 * Ask the vmem arena to reclaim unused memory from its
+		 * quantum caches.
+		 */
+		vmem_qcache_reap(zio_arena_parent);
+	}
+}
+
+
+
+/*
+ * Threads can block in arc_get_data_impl() waiting for this thread to evict
+ * enough data and signal them to proceed. When this happens, the threads in
+ * arc_get_data_impl() are sleeping while holding the hash lock for their
+ * particular arc header. Thus, we must be careful to never sleep on a
+ * hash lock in this thread. This is to prevent the following deadlock:
+ *
+ *  - Thread A sleeps on CV in arc_get_data_impl() holding hash lock "L",
+ *    waiting for the reclaim thread to signal it.
+ *
+ *  - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter,
+ *    fails, and goes to sleep forever.
+ *
+ * This possible deadlock is avoided by always acquiring a hash lock
+ * using mutex_tryenter() from arc_reclaim_thread().
+ */
+static void
+arc_reclaim_thread(void *unused)
+{
+	hrtime_t growtime = 0;
+	callb_cpr_t cpr;
+
+	CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
+
+	mutex_enter(&arc_reclaim_lock);
+	while (!arc_reclaim_thread_exit) {
+		arc_reclaim_in_loop = B_TRUE;
+		uint64_t evicted = 0;
+
+		mutex_exit(&arc_reclaim_lock);
+
+		if (reclaim_shrink_target > 0) {
+			int64_t t = reclaim_shrink_target;
+			reclaim_shrink_target = 0;
+			evicted = arc_shrink(t);
+			extern kmem_cache_t *abd_chunk_cache;
+			kmem_cache_reap_now(abd_chunk_cache);
+			IOSleep(1);
+			goto lock_and_sleep;
+		}
+
+		int64_t pre_adjust_free_memory = MIN(spl_free_wrapper(),
+		    arc_available_memory());
+
+		int64_t manual_pressure = spl_free_manual_pressure_wrapper();
+		spl_free_set_pressure(0); // clears both spl pressure variables
+
+		/*
+		 * We call arc_adjust() before (possibly) calling
+		 * arc_kmem_reap_now(), so that we can wake up
+		 * arc_get_data_impl() sooner.
+		 */
+		zthr_wakeup(arc_adjust_zthr);
+
+		int64_t free_memory = arc_available_memory();
+
+		int64_t post_adjust_manual_pressure =
+		    spl_free_manual_pressure_wrapper();
+		manual_pressure = MAX(manual_pressure,
+		    post_adjust_manual_pressure);
+		spl_free_set_pressure(0);
+
+		int64_t post_adjust_free_memory =
+		    MIN(spl_free_wrapper(), arc_available_memory());
+
+		// if arc_adjust() evicted, we expect post_adjust_free_memory
+		// to be larger than pre_adjust_free_memory (as there should
+		// be more free memory).
+		int64_t d_adj = post_adjust_free_memory -
+		    pre_adjust_free_memory;
+
+		if (manual_pressure > 0 && post_adjust_manual_pressure == 0) {
+			// pressure did not get re-signalled during arc_adjust()
+			if (d_adj >= 0) {
+				manual_pressure -= MIN(evicted, d_adj);
+			} else {
+				manual_pressure -= evicted;
+			}
+		} else if (evicted > 0 && manual_pressure > 0 &&
+		    post_adjust_manual_pressure > 0) {
+			// otherwise use the most recent pressure value
+			manual_pressure = post_adjust_manual_pressure;
+		}
+
+		free_memory = post_adjust_free_memory;
+
+		if (free_memory >= 0 && manual_pressure <= 0 && evicted > 0) {
+			extern kmem_cache_t *abd_chunk_cache;
+			kmem_cache_reap_now(abd_chunk_cache);
+		}
+
+		if (free_memory < 0 || manual_pressure > 0) {
+
+			if (free_memory <=
+			    (arc_c >> arc_no_grow_shift) + SPA_MAXBLOCKSIZE) {
+				arc_no_grow = B_TRUE;
+
+		/*
+		 * Absorb occasional low memory conditions, as they
+		 * may be caused by a single sequentially writing thread
+		 * pushing a lot of dirty data into the ARC.
+		 *
+		 * In particular, we want to quickly
+		 * begin re-growing the ARC if we are
+		 * not in chronic high pressure.
+		 * However, if we're in chronic high
+		 * pressure, we want to reduce reclaim
+		 * thread work by keeping arc_no_grow set.
+		 *
+		 * If growtime is in the past, then set it to last
+		 * half a second (which is the length of the
+		 * cv_timedwait_hires() call below; if this works,
+		 * that value should be a parameter, #defined or constified.
+		 *
+		 * If growtime is in the future, then make sure that it
+		 * is no further than 60 seconds into the future.
+		 * If it's in the nearer future, then grow growtime by
+		 * an exponentially increasing value starting with 500msec.
+		 *
+		 */
+				const hrtime_t curtime = gethrtime();
+				const hrtime_t agr = SEC2NSEC(arc_grow_retry);
+				static int grow_pass = 0;
+
+				if (growtime == 0) {
+					growtime = curtime + MSEC2NSEC(500);
+					grow_pass = 0;
+				} else {
+					// check for 500ms not being enough
+					ASSERT3U(growtime, >, curtime);
+					if (growtime <= curtime)
+						growtime = curtime +
+						    MSEC2NSEC(500);
+
+					// growtime is in the future!
+					const hrtime_t difference =
+					    growtime - curtime;
+
+					if (difference >= agr) {
+						// cap arc_grow_retry secs now
+						growtime = curtime + agr - 1LL;
+						grow_pass = 0;
+					} else {
+						hrtime_t grow_by =
+						    MSEC2NSEC(500) *
+						    (1LL << grow_pass);
+
+						if (grow_by > (agr >> 1))
+							grow_by = agr >> 1;
+
+						growtime += grow_by;
+
+						// add 512 seconds maximum
+						if (grow_pass < 10)
+							grow_pass++;
+					}
+				}
+			}
+
+			arc_warm = B_TRUE;
+
+			arc_kmem_reap_now();
+
+			/*
+			 * If we are still low on memory, shrink the ARC
+			 * so that we have arc_shrink_min free space.
+			 */
+			free_memory = arc_available_memory();
+
+			static int64_t old_to_free = 0;
+
+			int64_t to_free =
+			    (arc_c >> arc_shrink_shift) - free_memory;
+
+			if (to_free > 0 || manual_pressure != 0) {
+				// 2 * SPA_MAXBLOCKSIZE
+				const int64_t large_amount =
+				    32LL * 1024LL * 1024LL;
+				const int64_t huge_amount =
+				    128LL * 1024LL * 1024LL;
+
+				if (to_free > large_amount ||
+				    evicted > huge_amount)
+					printf("SPL: %s: post-reap %lld "
+					    "post-evict %lld adjusted %lld "
+					    "pre-adjust %lld to-free %lld"
+					    " pressure %lld\n",
+					    __func__, free_memory, d_adj,
+					    evicted, pre_adjust_free_memory,
+					    to_free, manual_pressure);
+				to_free = MAX(to_free, manual_pressure);
+
+				int64_t old_arc_size =
+				    (int64_t)aggsum_value(&arc_size);
+				(void) arc_shrink(to_free);
+				int64_t new_arc_size =
+				    (int64_t)aggsum_value(&arc_size);
+				int64_t arc_shrink_freed =
+				    old_arc_size - new_arc_size;
+				int64_t left_to_free =
+				    to_free - arc_shrink_freed;
+				if (left_to_free <= 0) {
+					if (arc_shrink_freed > large_amount) {
+						printf("ZFS: %s, arc_shrink "
+						    "freed %lld, zeroing "
+						    "old_to_free from %lld\n",
+						    __func__, arc_shrink_freed,
+						    old_to_free);
+					}
+					old_to_free = 0;
+				} else if (arc_shrink_freed > 2LL *
+				    (int64_t)SPA_MAXBLOCKSIZE) {
+					printf("ZFS: %s, arc_shrink freed "
+					    "%lld, setting old_to_free to "
+					    "%lld from %lld\n",
+					    __func__, arc_shrink_freed,
+					    left_to_free, old_to_free);
+					old_to_free = left_to_free;
+				} else {
+					old_to_free = left_to_free;
+				}
+
+				// If we have reduced ARC by a lot before
+				// this point, try to give memory back to
+				// lower arenas (and possibly xnu).
+
+				int64_t total_freed =
+				    arc_shrink_freed + evicted;
+				if (total_freed >= huge_amount) {
+					if (zio_arena_parent != NULL)
+						vmem_qcache_reap(
+						    zio_arena_parent);
+				}
+				if (arc_shrink_freed > 0)
+					evicted += arc_shrink_freed;
+			} else if (old_to_free > 0) {
+				printf("ZFS: %s, (old_)to_free has "
+				    "returned to zero from %lld\n",
+				    __func__, old_to_free);
+				old_to_free = 0;
+			}
+
+		} else if (free_memory < (arc_c >> arc_no_grow_shift) &&
+		    aggsum_value(&arc_size) >
+		    arc_c_min + SPA_MAXBLOCKSIZE) {
+			// relatively low memory and arc is above arc_c_min
+			arc_no_grow = B_TRUE;
+			growtime = gethrtime() + SEC2NSEC(1);
+		}
+
+		if (growtime > 0 && gethrtime() >= growtime) {
+			if (arc_no_grow == B_TRUE)
+				dprintf("ZFS: arc growtime expired\n");
+			growtime = 0;
+			arc_no_grow = B_FALSE;
+		}
+
+lock_and_sleep:
+
+		mutex_enter(&arc_reclaim_lock);
+
+		/*
+		 * If evicted is zero, we couldn't evict anything via
+		 * arc_adjust(). This could be due to hash lock
+		 * collisions, but more likely due to the majority of
+		 * arc buffers being unevictable. Therefore, even if
+		 * arc_size is above arc_c, another pass is unlikely to
+		 * be helpful and could potentially cause us to enter an
+		 * infinite loop.
+		 */
+		if (aggsum_compare(&arc_size, arc_c) <= 0 || evicted == 0) {
+			/*
+			 * We're either no longer overflowing, or we
+			 * can't evict anything more, so we should wake
+			 * up any threads before we go to sleep.
+			 */
+			cv_broadcast(&arc_reclaim_waiters_cv);
+
+			arc_reclaim_in_loop = B_FALSE;
+			/*
+			 * Block until signaled, or after one second (we
+			 * might need to perform arc_kmem_reap_now()
+			 * even if we aren't being signalled)
+			 */
+			CALLB_CPR_SAFE_BEGIN(&cpr);
+			(void) cv_timedwait_hires(&arc_reclaim_thread_cv,
+			    &arc_reclaim_lock, MSEC2NSEC(500), MSEC2NSEC(1), 0);
+			CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_lock);
+
+		} else if (evicted >= SPA_MAXBLOCKSIZE * 3) {
+			// we evicted plenty of buffers, so let's wake up
+			// all the waiters rather than having them stall
+			cv_broadcast(&arc_reclaim_waiters_cv);
+		} else {
+			// we evicted some buffers but are still overflowing,
+			// so wake up only one waiter
+			cv_signal(&arc_reclaim_waiters_cv);
+		}
+	}
+
+	arc_reclaim_thread_exit = B_FALSE;
+	cv_broadcast(&arc_reclaim_thread_cv);
+	CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_lock */
+	thread_exit();
+}
+
+/* This is called before arc is initialized, and threads are not running */
+void
+arc_lowmem_init(void)
+{
+}
+
+/* This is called after arc is initialized, and thread are running */
+void
+arc_os_init(void)
+{
+	mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
+
+	arc_reclaim_thread_exit = B_FALSE;
+
+	(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
+	    TS_RUN, minclsyspri);
+
+	arc_warm = B_FALSE;
+
+}
+
+void
+arc_lowmem_fini(void)
+{
+}
+
+void
+arc_os_fini(void)
+{
+	mutex_enter(&arc_reclaim_lock);
+	arc_reclaim_thread_exit = B_TRUE;
+	/*
+	 * The reclaim thread will set arc_reclaim_thread_exit back to
+	 * B_FALSE when it is finished exiting; we're waiting for that.
+	 */
+	while (arc_reclaim_thread_exit) {
+		cv_signal(&arc_reclaim_thread_cv);
+		cv_wait(&arc_reclaim_thread_cv, &arc_reclaim_lock);
+	}
+	mutex_exit(&arc_reclaim_lock);
+
+	mutex_destroy(&arc_reclaim_lock);
+	cv_destroy(&arc_reclaim_thread_cv);
+	cv_destroy(&arc_reclaim_waiters_cv);
+}
+
+/*
+ * Uses ARC static variables in logic.
+ */
+#define	arc_meta_limit	ARCSTAT(arcstat_meta_limit) /* max size for metadata */
+/* max size for dnodes */
+#define	arc_dnode_size_limit	ARCSTAT(arcstat_dnode_limit)
+#define	arc_meta_min	ARCSTAT(arcstat_meta_min) /* min size for metadata */
+#define	arc_meta_max	ARCSTAT(arcstat_meta_max) /* max size of metadata */
+
+/* So close, they made arc_min_prefetch_ms be static, but no others */
+
+int
+arc_kstat_update_osx(kstat_t *ksp, int rw)
+{
+	osx_kstat_t *ks = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE) {
+
+		/* Did we change the value ? */
+		if (ks->arc_zfs_arc_max.value.ui64 != zfs_arc_max) {
+
+			/* Assign new value */
+			zfs_arc_max = ks->arc_zfs_arc_max.value.ui64;
+
+			/* Update ARC with new value */
+			if (zfs_arc_max > 64<<20 && zfs_arc_max <
+			    physmem * PAGESIZE)
+				arc_c_max = zfs_arc_max;
+
+			arc_c = arc_c_max;
+			arc_p = (arc_c >> 1);
+
+			/* If meta_limit is not set, adjust it automatically */
+			if (!zfs_arc_meta_limit)
+				arc_meta_limit = arc_c_max / 4;
+		}
+
+		if (ks->arc_zfs_arc_min.value.ui64 != zfs_arc_min) {
+			zfs_arc_min = ks->arc_zfs_arc_min.value.ui64;
+			if (zfs_arc_min > 64<<20 && zfs_arc_min <= arc_c_max) {
+				arc_c_min = zfs_arc_min;
+				printf("ZFS: set arc_c_min %llu, arc_meta_min "
+				    "%llu, zfs_arc_meta_min %llu\n",
+				    arc_c_min, arc_meta_min, zfs_arc_meta_min);
+				if (arc_c < arc_c_min) {
+					printf("ZFS: raise arc_c %llu to "
+					    "arc_c_min %llu\n", arc_c,
+					    arc_c_min);
+					arc_c = arc_c_min;
+					if (arc_p < (arc_c >> 1)) {
+						printf("ZFS: raise arc_p %llu "
+						    "to %llu\n",
+						    arc_p, (arc_c >> 1));
+						arc_p = (arc_c >> 1);
+					}
+				}
+			}
+		}
+
+		if (ks->arc_zfs_arc_meta_limit.value.ui64 !=
+		    zfs_arc_meta_limit) {
+			zfs_arc_meta_limit =
+			    ks->arc_zfs_arc_meta_limit.value.ui64;
+
+			/* Allow the tunable to override if it is reasonable */
+			if (zfs_arc_meta_limit > 0 &&
+			    zfs_arc_meta_limit <= arc_c_max)
+				arc_meta_limit = zfs_arc_meta_limit;
+
+			if (arc_c_min < arc_meta_limit / 2 &&
+			    zfs_arc_min == 0)
+				arc_c_min = arc_meta_limit / 2;
+
+			printf("ZFS: set arc_meta_limit %llu, arc_c_min %llu,"
+			    "zfs_arc_meta_limit %lu\n",
+			    arc_meta_limit, arc_c_min, zfs_arc_meta_limit);
+		}
+
+		if (ks->arc_zfs_arc_meta_min.value.ui64 != zfs_arc_meta_min) {
+			zfs_arc_meta_min  = ks->arc_zfs_arc_meta_min.value.ui64;
+			if (zfs_arc_meta_min >= arc_c_min) {
+				printf("ZFS: probable error, zfs_arc_meta_min "
+				    "%llu >= arc_c_min %llu\n",
+				    zfs_arc_meta_min, arc_c_min);
+			}
+			if (zfs_arc_meta_min > 0 &&
+			    zfs_arc_meta_min <= arc_meta_limit)
+				arc_meta_min = zfs_arc_meta_min;
+			printf("ZFS: set arc_meta_min %llu\n", arc_meta_min);
+		}
+
+		zfs_arc_grow_retry = ks->arc_zfs_arc_grow_retry.value.ui64;
+		arc_grow_retry = zfs_arc_grow_retry;
+		zfs_arc_shrink_shift = ks->arc_zfs_arc_shrink_shift.value.ui64;
+		zfs_arc_p_min_shift = ks->arc_zfs_arc_p_min_shift.value.ui64;
+		zfs_arc_average_blocksize =
+		    ks->arc_zfs_arc_average_blocksize.value.ui64;
+
+	} else {
+
+		ks->arc_zfs_arc_max.value.ui64 = zfs_arc_max;
+		ks->arc_zfs_arc_min.value.ui64 = zfs_arc_min;
+
+		ks->arc_zfs_arc_meta_limit.value.ui64 = zfs_arc_meta_limit;
+		ks->arc_zfs_arc_meta_min.value.ui64 = zfs_arc_meta_min;
+
+		ks->arc_zfs_arc_grow_retry.value.ui64 =
+		    zfs_arc_grow_retry ? zfs_arc_grow_retry : arc_grow_retry;
+		ks->arc_zfs_arc_shrink_shift.value.ui64 = zfs_arc_shrink_shift;
+		ks->arc_zfs_arc_p_min_shift.value.ui64 = zfs_arc_p_min_shift;
+		ks->arc_zfs_arc_average_blocksize.value.ui64 =
+		    zfs_arc_average_blocksize;
+	}
+	return (0);
+}
+
+/*
+ * Helper function for arc_prune_async() it is responsible for safely
+ * handling the execution of a registered arc_prune_func_t.
+ */
+static void
+arc_prune_task(void *ptr)
+{
+	arc_prune_t *ap = (arc_prune_t *)ptr;
+	arc_prune_func_t *func = ap->p_pfunc;
+
+	if (func != NULL)
+		func(ap->p_adjust, ap->p_private);
+
+	zfs_refcount_remove(&ap->p_refcnt, func);
+}
+
+/*
+ * Notify registered consumers they must drop holds on a portion of the ARC
+ * buffered they reference.  This provides a mechanism to ensure the ARC can
+ * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
+ * is analogous to dnlc_reduce_cache() but more generic.
+ *
+ * This operation is performed asynchronously so it may be safely called
+ * in the context of the arc_reclaim_thread().  A reference is taken here
+ * for each registered arc_prune_t and the arc_prune_task() is responsible
+ * for releasing it once the registered arc_prune_func_t has completed.
+ */
+void
+arc_prune_async(int64_t adjust)
+{
+	arc_prune_t *ap;
+
+	mutex_enter(&arc_prune_mtx);
+	for (ap = list_head(&arc_prune_list); ap != NULL;
+	    ap = list_next(&arc_prune_list, ap)) {
+
+		if (zfs_refcount_count(&ap->p_refcnt) >= 2)
+			continue;
+
+		zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
+		ap->p_adjust = adjust;
+		if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
+		    ap, TQ_SLEEP) == TASKQID_INVALID) {
+			zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
+			continue;
+		}
+		ARCSTAT_BUMP(arcstat_prune);
+	}
+	mutex_exit(&arc_prune_mtx);
+}
+
+#else /* KERNEL */
+
+int64_t
+arc_available_memory(void)
+{
+	int64_t lowest = INT64_MAX;
+	free_memory_reason_t r = FMR_UNKNOWN;
+
+	/* Every 100 calls, free a small amount */
+	if (spa_get_random(100) == 0)
+		lowest = -1024;
+
+	last_free_memory = lowest;
+	last_free_reason = r;
+
+	return (lowest);
+}
+
+int
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
+{
+	return (0);
+}
+
+uint64_t
+arc_all_memory(void)
+{
+	return (ptob(physmem) / 2);
+}
+
+uint64_t
+arc_free_memory(void)
+{
+	return (spa_get_random(arc_all_memory() * 20 / 100));
+}
+
+#endif /* KERNEL */
diff --git a/module/os/macos/zfs/ldi_iokit.cpp b/module/os/macos/zfs/ldi_iokit.cpp
new file mode 100644
index 0000000000..bae0060efb
--- /dev/null
+++ b/module/os/macos/zfs/ldi_iokit.cpp
@@ -0,0 +1,2000 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * Portions of this document are copyright Oracle and Joyent.
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+/*
+ * Apple IOKit (c++)
+ */
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOTypes.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/IODeviceTreeSupport.h>
+#include <IOKit/IOMemoryDescriptor.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/storage/IOBlockStorageDevice.h>
+#include <IOKit/storage/IOStorageDeviceCharacteristics.h>
+
+/*
+ * ZFS internal
+ */
+#include <sys/zfs_context.h>
+
+/*
+ * LDI Includes
+ */
+#include <sys/ldi_impl_osx.h>
+
+/* Debug prints */
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+
+#ifdef dprintf
+#undef dprintf
+#endif
+
+#define	dprintf(fmt, ...) do {		\
+	IOLog(fmt, __VA_ARGS__);	\
+_NOTE(CONSTCOND) } while (0)
+#endif
+
+/* Attach created IOService objects to the IORegistry under ZFS. */
+// #define	LDI_IOREGISTRY_ATTACH
+
+/*
+ * Globals
+ */
+static IOService		*ldi_zfs_handle;
+
+/* Exposed to c callers */
+extern "C" {
+
+struct _handle_iokit {
+	IOMedia			*media;
+	IOService		*client;
+};	/* 16b */
+
+struct _handle_notifier {
+	IONotifier		*obj;
+};	/* 8b */
+
+#define	LH_MEDIA(lhp)		lhp->lh_tsd.iokit_tsd->media
+#define	LH_CLIENT(lhp)		lhp->lh_tsd.iokit_tsd->client
+#define	LH_NOTIFIER(lhp)	lhp->lh_notifier->obj
+
+void
+handle_free_iokit(struct ldi_handle *lhp) {
+	if (!lhp) {
+		dprintf("%s missing lhp\n", __func__);
+		return;
+	}
+
+	if (!lhp->lh_tsd.iokit_tsd) {
+		dprintf("%s missing iokit_tsd\n", __func__);
+		return;
+	}
+
+	/* Free IOService client */
+	if (handle_free_ioservice(lhp) != 0) {
+		dprintf("%s lhp %p client %s\n",
+		    __func__, lhp, "couldn't be removed");
+	}
+
+	kmem_free(lhp->lh_tsd.iokit_tsd, sizeof (struct _handle_iokit));
+	lhp->lh_tsd.iokit_tsd = 0;
+}
+
+/* Returns handle with lock still held */
+struct ldi_handle *
+handle_alloc_iokit(dev_t device, int fmode)
+{
+	struct ldi_handle *lhp, *retlhp;
+
+	/* Search for existing handle */
+	if ((retlhp = handle_find(device, fmode, B_TRUE)) != NULL) {
+		dprintf("%s found handle before alloc\n", __func__);
+		return (retlhp);
+	}
+
+	/* Allocate an LDI IOKit handle */
+	if ((lhp = handle_alloc_common(LDI_TYPE_IOKIT, device,
+	    fmode)) == NULL) {
+		dprintf("%s couldn't allocate handle\n", __func__);
+		return (NULL);
+	}
+
+	/* Allocate and clear type-specific device data */
+	lhp->lh_tsd.iokit_tsd = (struct _handle_iokit *)kmem_alloc(
+	    sizeof (struct _handle_iokit), KM_SLEEP);
+	LH_MEDIA(lhp) = 0;
+	LH_CLIENT(lhp) = 0;
+
+	/* Allocate an IOService client for open/close */
+	if (handle_alloc_ioservice(lhp) != 0) {
+		dprintf("%s couldn't allocate IOService client\n", __func__);
+		handle_release(lhp);
+		return (NULL);
+	}
+
+	/* Add the handle to the list, or return match */
+	if ((retlhp = handle_add(lhp)) == NULL) {
+		dprintf("%s handle_add failed\n", __func__);
+		handle_release(lhp);
+		return (NULL);
+	}
+
+	/* Check if new or found handle was returned */
+	if (retlhp != lhp) {
+		dprintf("%s found handle after alloc\n", __func__);
+		handle_release(lhp);
+		lhp = 0;
+	}
+
+	return (retlhp);
+}
+
+int
+handle_free_ioservice(struct ldi_handle *lhp)
+{
+	/* Validate handle pointer */
+	ASSERT3U(lhp, !=, NULL);
+#ifdef DEBUG
+	if (!lhp) {
+		dprintf("%s missing handle\n", __func__);
+		return (EINVAL);
+	}
+	if (!LH_CLIENT(lhp)) {
+		dprintf("%s missing client\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+#ifdef LDI_IOREGISTRY_ATTACH
+	/* Detach client from ZFS in IORegistry */
+	LH_CLIENT(lhp)->detach(ldi_zfs_handle);
+#endif
+
+	LH_CLIENT(lhp)->stop(ldi_zfs_handle);
+	LH_CLIENT(lhp)->release();
+	LH_CLIENT(lhp) = 0;
+
+	return (0);
+}
+
+int
+handle_alloc_ioservice(struct ldi_handle *lhp)
+{
+	IOService *client;
+
+	/* Validate handle pointer */
+	ASSERT3U(lhp, !=, NULL);
+	if (lhp == NULL) {
+		dprintf("%s missing handle\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Allocate and init an IOService client for open/close */
+	if ((client = new IOService) == NULL) {
+		dprintf("%s couldn't allocate new IOService\n", __func__);
+		return (ENOMEM);
+	}
+	if (client->init(0) != true) {
+		dprintf("%s IOService init failed\n", __func__);
+		client->release();
+		return (ENOMEM);
+	}
+
+#ifdef LDI_IOREGISTRY_ATTACH
+	/* Attach client to ZFS in IORegistry */
+	if (client->attach(ldi_zfs_handle) != true) {
+		dprintf("%s IOService attach failed\n", __func__);
+		client->release();
+		return (ENOMEM);
+	}
+#endif
+
+	/* Start service */
+	if (client->start(ldi_zfs_handle) != true) {
+		dprintf("%s IOService attach failed\n", __func__);
+		/* Detach client from ZFS in IORegistry */
+#ifdef LDI_IOREGISTRY_ATTACH
+		client->detach(ldi_zfs_handle);
+#endif
+		client->release();
+		return (ENOMEM);
+	}
+
+	LH_CLIENT(lhp) = client;
+	return (0);
+}
+
+/* Set status to Offline and post event */
+static bool
+handle_media_terminate_cb(void* target, void* refCon,
+    IOService* newService, IONotifier* notifier)
+{
+	struct ldi_handle *lhp = (struct ldi_handle *)refCon;
+
+#ifdef DEBUG
+	if (!lhp) {
+		dprintf("%s missing refCon ldi_handle\n", __func__);
+		return (false);
+	}
+#endif
+
+	/* Take hold on handle to prevent removal */
+	handle_hold(lhp);
+
+	dprintf("%s setting lhp %p to Offline status\n", __func__, lhp);
+	if (handle_status_change(lhp, LDI_STATUS_OFFLINE) != 0) {
+		dprintf("%s handle_status_change failed\n", __func__);
+		handle_release(lhp);
+		return (false);
+	}
+
+	handle_release(lhp);
+	return (true);
+}
+
+int
+handle_close_iokit(struct ldi_handle *lhp)
+{
+#ifdef DEBUG
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_type, ==, LDI_TYPE_IOKIT);
+	ASSERT3U(lhp->lh_status, ==, LDI_STATUS_CLOSING);
+
+	/* Validate IOMedia and IOService client */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp)) ||
+	    !OSDynamicCast(IOService, LH_CLIENT(lhp))) {
+		dprintf("%s invalid IOMedia or client\n", __func__);
+		return (ENODEV);
+	}
+#endif /* DEBUG */
+
+	LH_MEDIA(lhp)->close(LH_CLIENT(lhp));
+	LH_MEDIA(lhp) = 0;
+	return (0);
+}
+
+static int
+handle_open_iokit(struct ldi_handle *lhp, IOMedia *media)
+{
+#ifdef DEBUG
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(media, !=, NULL);
+	ASSERT3U(lhp->lh_type, ==, LDI_TYPE_IOKIT);
+	ASSERT3U(lhp->lh_status, ==, LDI_STATUS_OPENING);
+
+	/* Validate IOMedia and IOService client */
+	if (!OSDynamicCast(IOMedia, media) ||
+	    !OSDynamicCast(IOService, LH_CLIENT(lhp))) {
+		dprintf("%s invalid IOMedia or client\n", __func__);
+		return (ENODEV);
+	}
+#endif /* DEBUG */
+	/* Retain until open or error */
+	media->retain();
+
+	/*
+	 * If read/write mode is requested, check that the
+	 * device is actually writeable.
+	 */
+	if (lhp->lh_fmode & FWRITE && media->isWritable() == false) {
+		dprintf("%s read-write requested on %s\n",
+		    __func__, "read-only IOMedia");
+		media->release();
+		return (EPERM);
+	}
+
+	/* Call open with the IOService client handle */
+	if (media->IOMedia::open(LH_CLIENT(lhp), 0,
+	    (lhp->lh_fmode & FWRITE ?  kIOStorageAccessReaderWriter :
+	    kIOStorageAccessReader)) == false) {
+		dprintf("%s IOMedia->open failed\n", __func__);
+		media->release();
+		return (EIO);
+	}
+	media->release();
+
+	/* Assign IOMedia device */
+	LH_MEDIA(lhp) = media;
+	return (0);
+}
+
+int
+handle_get_size_iokit(struct ldi_handle *lhp, uint64_t *dev_size)
+{
+	if (!lhp || !dev_size) {
+		dprintf("%s missing lhp or dev_size\n", __func__);
+		return (EINVAL);
+	}
+
+#ifdef DEBUG
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s no IOMedia\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	*dev_size = LH_MEDIA(lhp)->getSize();
+	if (*dev_size == 0) {
+		dprintf("%s %s\n", __func__,
+		    "IOMedia getSize returned 0");
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+handle_get_dev_path_iokit(struct ldi_handle *lhp,
+    char *path, int len)
+{
+	int retlen = len;
+
+	if (!lhp || !path || len == 0) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+#ifdef DEBUG
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s no IOMedia\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	if (LH_MEDIA(lhp)->getPath(path, &retlen, gIODTPlane) == false) {
+		dprintf("%s getPath failed\n", __func__);
+		return (EIO);
+	}
+
+dprintf("%s got path [%s]\n", __func__, path);
+	return (0);
+}
+
+int handle_get_bootinfo_iokit(struct ldi_handle *lhp,
+    struct io_bootinfo *bootinfo)
+{
+	int error = 0;
+
+	if (!lhp || !bootinfo) {
+		dprintf("%s missing argument\n", __func__);
+printf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	if ((error = handle_get_size_iokit(lhp,
+	    &bootinfo->dev_size)) != 0 ||
+	    (error = handle_get_dev_path_iokit(lhp, bootinfo->dev_path,
+	    sizeof (bootinfo->dev_path))) != 0) {
+		dprintf("%s get size or dev_path error %d\n",
+		    __func__, error);
+	}
+
+	return (error);
+}
+
+int
+handle_sync_iokit(struct ldi_handle *lhp)
+{
+#ifdef DEBUG
+	/* Validate IOMedia and client */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp)) ||
+	    !OSDynamicCast(IOService, LH_CLIENT(lhp))) {
+		dprintf("%s invalid IOMedia or client\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+#if defined(MAC_OS_X_VERSION_10_11) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_11)
+	/* Issue device sync */
+	if (LH_MEDIA(lhp)->synchronize(LH_CLIENT(lhp), 0, 0, 0) !=
+	    kIOReturnSuccess) {
+		dprintf("%s %s\n", __func__,
+		    "IOMedia synchronizeCache failed");
+		return (ENOTSUP);
+	}
+#else
+	/* Issue device sync */
+	if (LH_MEDIA(lhp)->synchronizeCache(LH_CLIENT(lhp)) !=
+	    kIOReturnSuccess) {
+		dprintf("%s %s\n", __func__,
+		    "IOMedia synchronizeCache failed");
+		return (ENOTSUP);
+	}
+#endif
+
+	/* Success */
+	return (0);
+}
+
+static dev_t
+dev_from_media(IOMedia *media)
+{
+	OSObject *property;
+	OSNumber *number;
+	uint32_t major, minor;
+	dev_t device = 0;
+
+	/* Validate media */
+	if (!media || !OSDynamicCast(IOMedia, media)) {
+		dprintf("%s no device\n", __func__);
+		return (0);
+	}
+	media->retain();
+
+	/* Get device major */
+	if (NULL == (property = media->getProperty(kIOBSDMajorKey,
+	    gIOServicePlane, kIORegistryIterateRecursively)) ||
+	    NULL == (number = OSDynamicCast(OSNumber, property))) {
+		dprintf("%s couldn't get BSD major\n", __func__);
+		media->release();
+		return (0);
+	}
+	major = number->unsigned32BitValue();
+	number = NULL;
+	property = NULL;
+
+	/* Get device minor */
+	if (NULL == (property = media->getProperty(kIOBSDMinorKey,
+	    gIOServicePlane, kIORegistryIterateRecursively)) ||
+	    NULL == (number = OSDynamicCast(OSNumber, property))) {
+		dprintf("%s couldn't get BSD major\n", __func__);
+		media->release();
+		return (0);
+	}
+	minor = number->unsigned32BitValue();
+	number = NULL;
+	property = NULL;
+
+	/* Cleanup */
+	media->release();
+	media = NULL;
+
+	device = makedev(major, minor);
+
+	/* Return 0 or valid dev_t */
+	return (device);
+}
+
+/* Returns NULL or dictionary with a retain count */
+static OSDictionary *
+media_matchdict_from_dev(dev_t device)
+{
+	OSDictionary *matchDict;
+	OSNumber *majorNum, *minorNum;
+
+	/* Validate dev_t */
+	if (device == 0) {
+		dprintf("%s no dev_t provided\n", __func__);
+		return (NULL);
+	}
+
+	/* Allocate OSNumbers for BSD major and minor (32-bit) */
+	if (NULL == (majorNum = OSNumber::withNumber(major(device), 32)) ||
+	    NULL == (minorNum = OSNumber::withNumber(minor(device), 32))) {
+		dprintf("%s couldn't alloc major/minor as OSNumber\n",
+		    __func__);
+		if (majorNum) {
+			majorNum->release();
+		}
+		return (NULL);
+	}
+
+	/* Match on IOMedia */
+	if (NULL == (matchDict = IOService::serviceMatching("IOMedia")) ||
+	    !(matchDict->setObject(kIOBSDMajorKey, majorNum)) ||
+	    !(matchDict->setObject(kIOBSDMinorKey, minorNum))) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		if (matchDict) {
+			matchDict->release();
+		}
+		majorNum->release();
+		minorNum->release();
+		return (NULL);
+	}
+	majorNum->release();
+	minorNum->release();
+
+	/* Return NULL or valid OSDictionary with retain count */
+	return (matchDict);
+}
+
+/* Returns NULL or dictionary with a retain count */
+/*
+ * media_matchdict_from_path
+ * translate from paths of the form /dev/diskNsN
+ * or /private/var/run/disk/by-id/media-<UUID> to a matching
+ * dictionary.
+ */
+static OSDictionary *
+media_matchdict_from_path(const char *path)
+{
+	OSDictionary *matchDict = 0;
+	OSString *bsdName = NULL;
+	OSString *uuid = NULL;
+	const char *substr = 0;
+	bool ret;
+
+	/* Validate path */
+	if (path == 0 || strlen(path) <= 1) {
+		dprintf("%s no path provided\n", __func__);
+		return (NULL);
+	}
+	/* Translate /dev/diskN and InvariantDisks paths */
+	if (strncmp(path, "/dev/", 5) != 0 &&
+	    strncmp(path, "/var/run/disk/by-id/", 20) != 0 &&
+	    strncmp(path, "/private/var/run/disk/by-id/", 28) != 0) {
+		dprintf("%s Unrecognized path %s\n", __func__, path);
+		return (NULL);
+	}
+
+	/* Validate path and alloc bsdName */
+	if (strncmp(path, "/dev/", 5) == 0) {
+
+		/* substr starts after '/dev/' */
+		substr = path + 5;
+		/* Get diskN from /dev/diskN or /dev/rdiskN */
+		if (strncmp(substr, "disk", 4) == 0) {
+			bsdName = OSString::withCString(substr);
+		} else if (strncmp(substr, "rdisk", 5) == 0) {
+			bsdName = OSString::withCString(substr + 1);
+		}
+	} else if (strncmp(path, "/var/run/disk/by-id/", 20) == 0 ||
+	    strncmp(path, "/private/var/run/disk/by-id/", 28) == 0) {
+	/* InvariantDisks paths */
+
+		/* substr starts after '/by-id/' */
+		substr = path + 20;
+		if (strncmp(path, "/private", 8) == 0) substr += 8;
+
+		/* Handle media UUID, skip volume UUID or device GUID */
+		if (strncmp(substr, "media-", 6) == 0) {
+			/* Lookup IOMedia with UUID */
+			uuid = OSString::withCString(substr+strlen("media-"));
+		} else if (strncmp(substr, "volume-", 7) == 0) {
+			/*
+			 * volume-UUID is specified by DiskArbitration
+			 * when a Filesystem bundle is able to probe
+			 * the media and retrieve/generate a UUID for
+			 * it's contents.
+			 * So while we could use this and have zfs.util
+			 * probe for vdev GUID (and pool GUID) and
+			 * generate a UUID, we would need to do the same
+			 * here to find the disk, possibly probing
+			 * devices to get the vdev GUID in the process.
+			 */
+			dprintf("%s Unsupported volume-UUID path %s\n",
+			    __func__, path);
+		} else if (strncmp(substr, "device-", 7) == 0) {
+			/* Lookup IOMedia with device GUID */
+			/*
+			 * XXX Not sure when this is used, no devices
+			 * seem to be presented this way.
+			 */
+			dprintf("%s Unsupported device-GUID path %s\n",
+			    __func__, path);
+		} else {
+			dprintf("%s unrecognized path %s\n", __func__, path);
+		}
+		/* by-path and by-serial are handled separately */
+	}
+
+	if (!bsdName && !uuid) {
+		dprintf("%s Invalid path %s\n", __func__, path);
+		return (NULL);
+	}
+
+	/* Match on IOMedia by BSD disk name */
+	matchDict = IOService::serviceMatching("IOMedia");
+	if (!matchDict) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		if (bsdName) bsdName->release();
+		if (uuid) uuid->release();
+		return (NULL);
+	}
+	if (bsdName) {
+		ret = matchDict->setObject(kIOBSDNameKey, bsdName);
+		bsdName->release();
+
+		if (!ret) {
+			dprintf("%s couldn't setup bsd name matching"
+			    " dictionary\n", __func__);
+			matchDict->release();
+			matchDict = 0;
+		}
+		if (uuid) uuid->release();
+	} else if (uuid) {
+		if (matchDict->setObject(kIOMediaUUIDKey, uuid) == false) {
+			dprintf("%s couldn't setup UUID matching"
+			    " dictionary\n", __func__);
+			uuid->release();
+			matchDict->release();
+			matchDict = 0;
+		}
+	} else {
+		dprintf("%s missing matching property\n", __func__);
+		matchDict->release();
+		matchDict = 0;
+	}
+
+	/* Return NULL or valid OSDictionary with retain count */
+	return (matchDict);
+}
+
+/* Returns NULL or matched IOMedia with a retain count */
+static IOMedia *
+media_from_matchdict(OSDictionary *matchDict)
+{
+	OSIterator *iter = 0;
+	OSObject *obj = 0;
+	IOMedia *media = 0;
+
+	if (!matchDict) {
+		dprintf("%s missing matching dictionary\n", __func__);
+		return (NULL);
+	}
+
+	/*
+	 * We could instead use copyMatchingService, since
+	 * there should only be one match.
+	 */
+	iter = IOService::getMatchingServices(matchDict);
+	if (!iter) {
+		dprintf("%s No iterator from getMatchingServices\n",
+		    __func__);
+		return (NULL);
+	}
+
+	/* Get first object from iterator */
+	while ((obj = iter->getNextObject()) != NULL) {
+		if ((media = OSDynamicCast(IOMedia, obj)) == NULL) {
+			obj = 0;
+			continue;
+		}
+		if (media->isFormatted() == false) {
+			obj = 0;
+			media = 0;
+			continue;
+		}
+
+		media->retain();
+		break;
+	}
+
+	if (!media) {
+		dprintf("%s no match found\n", __func__);
+		iter->release();
+		return (NULL);
+	}
+
+#ifdef DEBUG
+	/* Report if there were additional matches */
+	if (iter->getNextObject() != NULL) {
+		dprintf("%s Had more potential matches\n", __func__);
+	}
+#endif
+	iter->release();
+	iter = 0;
+
+	/* Return valid IOMedia with retain count */
+	return (media);
+}
+
+/*
+ * media_from_dev is intended to be called by ldi_open_by_name
+ * and ldi_open_by_dev with a dev_t, and returns NULL or an IOMedia
+ * device with a retain count that should be released on open.
+ */
+static IOMedia *
+media_from_dev(dev_t device = 0)
+{
+	IOMedia *media;
+	OSDictionary *matchDict;
+
+	/* Get matchDict, will need to be released */
+	matchDict = media_matchdict_from_dev(device);
+	if (!matchDict) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		return (NULL);
+	}
+
+	/* Get first matching IOMedia */
+	media = media_from_matchdict(matchDict);
+	matchDict->release();
+	matchDict = 0;
+
+	if (!media) {
+		dprintf("%s no IOMedia found for dev_t %d\n", __func__,
+		    device);
+	}
+
+	/* Return NULL or valid media with retain count */
+	return (media);
+}
+
+/*
+ * media_from_device_path
+ *
+ * translate /private/var/run/disk/by-path/<path> to an IOMedia
+ * handle. The remainder of the path should be a valid
+ * path in the IORegistry IODTPlane device tree.
+ */
+static IOMedia *
+media_from_device_path(const char *path = 0)
+{
+	IORegistryEntry *entry = 0;
+	IOMedia *media = 0;
+	OSString *osstr;
+	const char *string, *dash;
+
+	/* Must be /var/run/disk/by-path/, but may have /private prefix */
+	if (!path || path[0] == 0 ||
+	    (strncmp(path, "/var/run/disk/by-path/", 22) != 0 &&
+	    strncmp(path, "/private/var/run/disk/by-path/", 30) != 0)) {
+		dprintf("%s invalid path [%s]\n", __func__,
+		    (path && path[0] != '\0' ? path : ""));
+		return (NULL);
+	}
+
+	/* We need the leading slash in the string, so trim 21 or 29 */
+	if (strncmp(path, "/private", 8) == 0) {
+		osstr = OSString::withCString(path+29);
+	} else {
+		osstr = OSString::withCString(path+21);
+	}
+	if (!osstr) {
+		dprintf("%s couldn't get string from path\n", __func__);
+		return (NULL);
+	}
+
+	string = osstr->getCStringNoCopy();
+	ASSERT(string);
+
+	/* Convert dashes to slashes */
+	while ((dash = strchr(string, '-')) != NULL) {
+		osstr->setChar('/', dash - string);
+	}
+	dprintf("%s string [%s]\n", __func__, string);
+
+	entry = IORegistryEntry::fromPath(string, gIODTPlane);
+	string = 0;
+	osstr->release();
+	osstr = 0;
+
+	if (!entry) {
+		dprintf("%s IORegistryEntry::fromPath failed\n", __func__);
+		return (NULL);
+	}
+
+	if ((media = OSDynamicCast(IOMedia, entry)) == NULL) {
+		entry->release();
+		return (0);
+	}
+
+	/* Leave a retain count on the media */
+	return (media);
+}
+
+/*
+ * media_from_serial
+ *
+ * translate /private/var/run/disk/by-serial/model-serial[:location]
+ * to an IOMedia handle. The path format is determined by
+ * InvariantDisks logic in IDSerialLinker.cpp.
+ */
+static IOMedia *
+media_from_serial(const char *path = 0)
+{
+	IORegistryEntry *entry = 0;
+	IOMedia *media = 0;
+	OSDictionary *matching = 0;
+	OSDictionary *deviceCharacteristics = 0;
+	OSIterator *iter = 0;
+	OSString *osstr = 0;
+	OSString *model = 0;
+	OSString *serial = 0;
+	OSNumber *bsdUnit = 0;
+	OSObject *property = 0;
+	OSObject *propDict = 0;
+	OSObject *obj = 0;
+	const char *substr = 0;
+	const char *sep1 = 0, *sep2 = 0;
+	const char *string = 0, *space = 0;
+	const char *location = 0, *entryLocation = 0;
+	int newlen = 0, soff = 0;
+	bool matched = false;
+
+	/* Must be /var/run/disk/by-serial/, but may have /private prefix */
+	if (!path || path[0] == 0 ||
+	    (strncmp(path, "/var/run/disk/by-serial/", 24) != 0 &&
+	    strncmp(path, "/private/var/run/disk/by-serial/", 32) != 0)) {
+		dprintf("%s invalid path [%s]\n", __func__,
+		    (path && path[0] != '\0' ? path : ""));
+		return (NULL);
+	}
+
+	/* substr starts after '/by-serial/' */
+	substr = path + 24;
+	if (strncmp(path, "/private", 8) == 0) substr += 8;
+
+	/*
+	 * For each whole-disk IOMedia:
+	 * Search parents for deviceCharacteristics, or skip.
+	 * Check for Model and Serial Number properties, or skip.
+	 * Trim trailing space and swap underscores within string.
+	 * If "model-serial" matches path so far:
+	 *  Match whole-disk IOMedia if no slice specified.
+	 *  Or get child IOMedia with matching Location property.
+	 */
+
+	sep1 = strchr(substr, '-');
+	sep2 = strrchr(substr, ':');
+	if (sep1 == 0) {
+		dprintf("%s invalid by-serial path [%s]\n", __func__, substr);
+		return (NULL);
+	}
+	if (sep2 == 0) {
+		dprintf("%s no slice, whole disk [%s]\n", __func__, substr);
+		sep2 = substr + (strlen(substr));
+	}
+
+	if ((matching = IOService::serviceMatching("IOMedia")) == NULL) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		return (NULL);
+	}
+
+	if ((matching->setObject(kIOMediaWholeKey, kOSBooleanTrue) == false) ||
+	    (iter = IOService::getMatchingServices(matching)) == NULL) {
+		dprintf("%s couldn't get IOMedia iterator\n", __func__);
+		matching->release();
+		return (NULL);
+	}
+	matching->release();
+	matching = 0;
+
+	while ((obj = iter->getNextObject()) != NULL) {
+		if ((entry = OSDynamicCast(IORegistryEntry, obj)) == NULL ||
+		    (media = OSDynamicCast(IOMedia, entry)) == NULL ||
+		    media->isFormatted() == false) {
+		    // media->isWhole() == false) {
+			continue;
+		}
+
+		propDict = media->getProperty(
+		    kIOPropertyDeviceCharacteristicsKey, gIOServicePlane,
+		    (kIORegistryIterateRecursively |
+		    kIORegistryIterateParents));
+		if ((deviceCharacteristics = OSDynamicCast(OSDictionary,
+		    propDict)) == NULL) {
+			dprintf("%s no device characteristics, skipping\n",
+			    __func__);
+			continue;
+		}
+
+		/*
+		 * Get each property, cast as OSString, then copy
+		 * to a new OSString.
+		 */
+		if ((property = deviceCharacteristics->getObject(
+		    kIOPropertyProductNameKey)) == NULL ||
+		    (osstr = OSDynamicCast(OSString, property)) == NULL ||
+		    (model = OSString::withString(osstr)) == NULL) {
+			dprintf("%s no product name, skipping\n", __func__);
+			continue;
+		}
+		if ((property = deviceCharacteristics->getObject(
+		    kIOPropertyProductSerialNumberKey)) == NULL ||
+		    (osstr = OSDynamicCast(OSString, property)) == NULL ||
+		    (serial = OSString::withString(osstr)) == NULL) {
+			dprintf("%s no serial number, skipping\n", __func__);
+			model->release();
+			model = 0;
+			continue;
+		}
+
+		string = model->getCStringNoCopy();
+		if (!string) {
+			model->release();
+			model = 0;
+			serial->release();
+			serial = 0;
+			continue;
+		}
+		/* Trim trailing whitespace */
+		for (newlen = strlen(string); newlen > 0; newlen--) {
+			if (string[newlen-1] != ' ') {
+				model->setChar('\0', newlen);
+				break;
+			}
+		}
+
+		/*
+		 * sep1 is the location of the first '-' in the path.
+		 * even if there is a '-' in the model name, we can skip
+		 * media with model names shorter than that.
+		 */
+		if (newlen == 0 ||
+		    (newlen < (sep1 - substr)) ||
+		    (substr[newlen] != '-')) {
+			model->release();
+			model = 0;
+			serial->release();
+			serial = 0;
+			continue;
+		}
+
+		/* Convert spaces to underscores */
+		while ((space = strchr(string, ' ')) != NULL) {
+			model->setChar('_', space - string);
+		}
+
+		/* Compare the model string with the path */
+		if (strncmp(substr, string, newlen) != 0) {
+			model->release();
+			model = 0;
+			serial->release();
+			serial = 0;
+			continue;
+		}
+		dprintf("%s model string matched [%s]\n",
+		    __func__, model->getCStringNoCopy());
+		model->release();
+		model = 0;
+
+		soff = newlen + 1;
+
+		string = serial->getCStringNoCopy();
+		if (!string) {
+			serial->release();
+			serial = 0;
+			continue;
+		}
+		/* Trim trailing whitespace */
+		for (newlen = strlen(string); newlen > 0; newlen--) {
+			if (string[newlen-1] != ' ') {
+				serial->setChar('\0', newlen);
+				break;
+			}
+		}
+		/*
+		 * sep2 is the location of the last ':' in the path, or
+		 * the end of the string if there is none.
+		 * even if there is a ':' in the serial number, we can skip
+		 * media with serial number strings shorter than that.
+		 */
+		if (newlen == 0 ||
+		    (newlen < (sep2 - sep1 - 1)) ||
+		    (substr[soff+newlen] != '\0' &&
+		    substr[soff+newlen] != ':')) {
+			serial->release();
+			serial = 0;
+			continue;
+		}
+
+		/* Convert spaces to underscores */
+		while ((space = strchr(string, ' ')) != NULL) {
+			serial->setChar('_', space - string);
+		}
+
+		/* Compare the serial string with the path */
+		if (strncmp(substr+soff, string, newlen) != 0) {
+			serial->release();
+			serial = 0;
+			continue;
+		}
+		dprintf("%s serial string matched [%s]\n",
+		    __func__, serial->getCStringNoCopy());
+		serial->release();
+		serial = 0;
+
+		/*
+		 * Still need to get the slice - the component
+		 * after an optional ':' at the end of the
+		 * string, by searching for IOMedia with that
+		 * location string below the whole-disk IOMedia.
+		 */
+		/* Set new location of ':' */
+		sep2 = substr + (soff + newlen);
+		/* Found match */
+		matched = true;
+		media->retain();
+		break;
+	}
+	iter->release();
+	iter = 0;
+
+	if (!matched || !media) {
+		dprintf("%s no matching devices found\n", __func__);
+		return (NULL);
+	}
+
+	/* Whole disk path will not end with ':<location>' */
+	if (sep2[0] != ':') {
+		dprintf("%s Found whole disk [%s]\n", __func__, path);
+		/* Leave a retain count on the media */
+		return (media);
+	}
+
+	/* Remainder of string is location */
+	location = sep2 + 1;
+	dprintf("%s location string [%s]\n", __func__, location);
+
+	if ((bsdUnit = OSDynamicCast(OSNumber,
+	    media->getProperty(kIOBSDUnitKey))) == NULL) {
+		dprintf("%s couldn't get BSD unit number\n", __func__);
+		media->release();
+		return (NULL);
+	}
+	if ((matching = IOService::serviceMatching("IOMedia")) == NULL ||
+	    (matching->setObject(kIOMediaWholeKey, kOSBooleanFalse)) == false ||
+	    (matching->setObject(kIOBSDUnitKey, bsdUnit)) == false ||
+	    (iter = IOService::getMatchingServices(matching)) == NULL) {
+		dprintf("%s iterator for location failed\n",
+		    __func__);
+
+		if (matching) matching->release();
+		/* We had a candidate, but couldn't get the location */
+		media->release();
+		return (NULL);
+	}
+	matching->release();
+	matching = 0;
+
+	/* Iterate over children checking for matching location */
+	matched = false;
+	entry = 0;
+	while ((obj = iter->getNextObject()) != NULL) {
+		if ((entry = OSDynamicCast(IORegistryEntry, obj)) == NULL ||
+		    (OSDynamicCast(IOMedia, entry)) == NULL) {
+			entry = 0;
+			continue;
+		}
+
+		if ((entryLocation = entry->getLocation()) == NULL ||
+		    (strlen(entryLocation) != strlen(location)) ||
+		    strcmp(entryLocation, location) != 0) {
+			entry = 0;
+			continue;
+		}
+
+		dprintf("%s found match\n", __func__);
+		matched = true;
+		entry->retain();
+		break;
+	}
+	iter->release();
+	iter = 0;
+
+	/* Drop the whole-disk media */
+	media->release();
+	media = 0;
+
+	/* Cast the new entry, if there is one */
+	if (!entry || (media = OSDynamicCast(IOMedia, entry)) == NULL) {
+if (entry) dprintf("%s had entry but couldn't cast\n", __func__);
+		dprintf("%s no media found for path %s\n",
+		    __func__, path);
+		if (entry) entry->release();
+		return (NULL);
+	}
+
+	dprintf("%s media from serial number succeeded\n", __func__);
+
+	/* Leave a retain count on the media */
+	return (matched ? media : NULL);
+}
+
+/*
+ * media_from_path is intended to be called by ldi_open_by_name
+ * with a char* path, and returns NULL or an IOMedia device with a
+ * retain count that should be released on open.
+ */
+static IOMedia *
+media_from_path(const char *path = 0)
+{
+	IOMedia *media;
+	OSDictionary *matchDict;
+
+	/* Validate path */
+	if (path == 0 || strlen(path) <= 1) {
+		dprintf("%s no path provided\n", __func__);
+		return (NULL);
+	}
+
+	if (strncmp(path, "/var/run/disk/by-path/", 22) == 0 ||
+	    strncmp(path, "/private/var/run/disk/by-path/", 30) == 0) {
+		media = media_from_device_path(path);
+		dprintf("%s media_from_device_path %s\n", __func__,
+		    (media ? "succeeded" : "failed"));
+		return (media);
+	}
+
+	if (strncmp(path, "/var/run/disk/by-serial/", 24) == 0 ||
+	    strncmp(path, "/private/var/run/disk/by-serial/", 32) == 0) {
+		media = media_from_serial(path);
+		dprintf("%s media_from_serial %s\n", __func__,
+		    (media ? "succeeded" : "failed"));
+		return (media);
+	}
+
+	/* Try to get /dev/disk or /private/var/run/disk/by-id path */
+	matchDict = media_matchdict_from_path(path);
+	if (!matchDict) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		return (NULL);
+	}
+
+	media = media_from_matchdict(matchDict);
+	matchDict->release();
+	matchDict = 0;
+
+	if (!media) {
+		dprintf("%s no IOMedia found for path %s\n", __func__, path);
+	}
+
+	/* Return NULL or valid media with retain count */
+	return (media);
+}
+
+/* Define an IOKit buffer for buf_strategy_iokit */
+typedef struct ldi_iokit_buf {
+	IOMemoryDescriptor	*iomem;
+	IOStorageCompletion	iocompletion;
+	IOStorageAttributes	ioattr;
+} ldi_iokit_buf_t;		/* XXX Currently 64b */
+
+/* Completion handler for IOKit strategy */
+static void
+ldi_iokit_io_intr(void *target, void *parameter,
+    IOReturn status, UInt64 actualByteCount)
+{
+	ldi_iokit_buf_t *iobp = (ldi_iokit_buf_t *)target;
+	ldi_buf_t *lbp = (ldi_buf_t *)parameter;
+
+#ifdef DEBUG
+	/* In debug builds, verify buffer pointers */
+	ASSERT3U(lbp, !=, 0);
+	ASSERT3U(iobp, !=, 0);
+
+	if (!iobp || !lbp) {
+		printf("%s missing a buffer\n", __func__);
+		return;
+	}
+
+	ASSERT3U(iobp->iomem, !=, 0);
+
+	if (!iobp->iomem) {
+		printf("%s missing iobp->iomem\n", __func__);
+		return;
+	}
+
+	// this is very very very noisy in --enable-boot
+	// ASSERT3U(ldi_zfs_handle, !=, 0);
+
+	if (actualByteCount == 0 ||
+	    actualByteCount != lbp->b_bcount ||
+	    status != kIOReturnSuccess) {
+		printf("%s %s %llx / %llx\n", __func__,
+		    "actualByteCount != lbp->b_bcount",
+		    actualByteCount, lbp->b_bcount);
+		if (ldi_zfs_handle)
+			printf("%s status %d %d %s\n", __func__, status,
+			    ldi_zfs_handle->errnoFromReturn(status),
+			    ldi_zfs_handle->stringFromReturn(status));
+		else
+			printf("%s status %d ldi_zfs_handle is NULL\n",
+			    __func__, status);
+	}
+#endif
+
+	/* Complete and release IOMemoryDescriptor */
+	iobp->iomem->complete();
+	iobp->iomem->release();
+	iobp->iomem = 0;
+
+	/* Compute resid */
+	ASSERT3U(lbp->b_bcount, >=, actualByteCount);
+	lbp->b_resid = (lbp->b_bcount - actualByteCount);
+
+	/* Set error status */
+	if (status == kIOReturnSuccess &&
+	    actualByteCount != 0 && lbp->b_resid == 0) {
+		lbp->b_error = 0;
+	} else {
+		lbp->b_error = EIO;
+	}
+
+	/* Free IOKit buffer */
+	kmem_free(iobp, sizeof (ldi_iokit_buf_t));
+
+	/* Call original completion function */
+	if (lbp->b_iodone) {
+		(void) lbp->b_iodone(lbp);
+	}
+}
+
+/* Synchronous IO, called by buf_strategy_iokit */
+static int
+buf_sync_strategy_iokit(ldi_buf_t *lbp, ldi_iokit_buf_t *iobp,
+    struct ldi_handle *lhp)
+{
+	UInt64 actualByteCount = 0;
+	IOReturn result;
+
+	/* Read or write */
+	if (lbp->b_flags & B_READ) {
+		result = LH_MEDIA(lhp)->IOStorage::read(LH_CLIENT(lhp),
+		    dbtolb(lbp->b_lblkno), iobp->iomem,
+		    &iobp->ioattr, &actualByteCount);
+	} else {
+		result = LH_MEDIA(lhp)->IOStorage::write(LH_CLIENT(lhp),
+		    dbtolb(lbp->b_lblkno), iobp->iomem,
+		    &iobp->ioattr, &actualByteCount);
+	}
+
+	/* Call completion */
+	ldi_iokit_io_intr((void *)iobp, (void *)lbp,
+	    result, actualByteCount);
+
+	/* Return success based on result */
+	return (result == kIOReturnSuccess ? 0 : EIO);
+}
+
+/*
+ * Uses IOMedia::read asynchronously or IOStorage::read synchronously.
+ * virtual void read(IOService *	client,
+ *     UInt64				byteStart,
+ *     IOMemoryDescriptor *		buffer,
+ *     IOStorageAttributes *		attributes,
+ *     IOStorageCompletion *		completion);
+ * virtual IOReturn read(IOService *	client,
+ *     UInt64				byteStart,
+ *     IOMemoryDescriptor *		buffer,
+ *     IOStorageAttributes *		attributes = 0,
+ *     UInt64 *				actualByteCount = 0);
+ */
+int
+buf_strategy_iokit(ldi_buf_t *lbp, struct ldi_handle *lhp)
+{
+	ldi_iokit_buf_t *iobp = 0;
+
+	ASSERT3U(lbp, !=, NULL);
+	ASSERT3U(lhp, !=, NULL);
+
+#ifdef DEBUG
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp)) ||
+	    !OSDynamicCast(IOService, LH_CLIENT(lhp))) {
+		dprintf("%s invalid IOMedia or client\n", __func__);
+		return (ENODEV);
+	}
+#endif /* DEBUG */
+
+	/* Allocate an IOKit buffer */
+	iobp = (ldi_iokit_buf_t *)kmem_alloc(sizeof (ldi_iokit_buf_t),
+	    KM_SLEEP);
+	if (!iobp) {
+		dprintf("%s couldn't allocate buf_iokit_t\n", __func__);
+		return (ENOMEM);
+	}
+#ifdef LDI_ZERO
+	/* Zero the new buffer struct */
+	bzero(iobp, sizeof (ldi_iokit_buf_t));
+#endif
+
+	/* Set completion and attributes for async IO */
+	if (lbp->b_iodone != NULL) {
+		iobp->iocompletion.target = iobp;
+		iobp->iocompletion.parameter = lbp;
+		iobp->iocompletion.action = &ldi_iokit_io_intr;
+	}
+
+/* XXX Zeroed above if LDI_ZERO, otherwise here */
+#ifndef LDI_ZERO
+	/* XXX Zero the ioattr struct */
+	bzero(&iobp->ioattr, sizeof (IOStorageAttributes));
+#endif
+
+	/* Allocate a memory descriptor pointing to the data address */
+	iobp->iomem = IOMemoryDescriptor::withAddress(
+	    lbp->b_un.b_addr, lbp->b_bcount,
+	    (lbp->b_flags & B_READ ? kIODirectionIn : kIODirectionOut));
+
+	/* Verify the buffer */
+	if (!iobp->iomem || iobp->iomem->getLength() != lbp->b_bcount ||
+	    iobp->iomem->prepare() != kIOReturnSuccess) {
+		dprintf("%s couldn't allocate IO buffer\n",
+		    __func__);
+		if (iobp->iomem) {
+			iobp->iomem->release();
+		}
+		kmem_free(iobp, sizeof (ldi_iokit_buf_t));
+		return (ENOMEM);
+	}
+
+	/* Recheck instantaneous value of handle status */
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s device not online\n", __func__);
+		iobp->iomem->complete();
+		iobp->iomem->release();
+		kmem_free(iobp, sizeof (ldi_iokit_buf_t));
+		return (ENODEV);
+	}
+
+	/* Synchronous or async */
+	if (lbp->b_iodone == NULL) {
+		return (buf_sync_strategy_iokit(lbp, iobp, lhp));
+	}
+
+	/* Read or write */
+	if (lbp->b_flags & B_READ) {
+		LH_MEDIA(lhp)->IOMedia::read(LH_CLIENT(lhp),
+		    dbtolb(lbp->b_lblkno), iobp->iomem,
+		    &iobp->ioattr, &iobp->iocompletion);
+	} else {
+		LH_MEDIA(lhp)->IOMedia::write(LH_CLIENT(lhp),
+		    dbtolb(lbp->b_lblkno), iobp->iomem,
+		    &iobp->ioattr, &iobp->iocompletion);
+	}
+
+	/* Return success, will call io_intr when done */
+	return (0);
+}
+
+/* Client interface, alloc and open IOKit handle */
+int
+ldi_open_by_media(IOMedia *media = 0, dev_t device = 0,
+    int fmode = 0, ldi_handle_t *lhp = 0)
+{
+	struct ldi_handle *retlhp;
+	ldi_status_t status;
+	int error;
+
+	/* Validate IOMedia */
+	if (!media || !lhp) {
+		dprintf("%s invalid argument %p or %p\n",
+		    __func__, media, lhp);
+		return (EINVAL);
+	}
+
+	/* Retain for duration of open */
+	media->retain();
+
+	/* Get dev_t if not supplied */
+	if (device == 0 && (device = dev_from_media(media)) == 0) {
+		dprintf("%s dev_from_media failed: %p %d\n", __func__,
+		    media, device);
+		media->release();
+		return (ENODEV);
+	}
+
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*(struct ldi_handle **)lhp, ==, NULL);
+
+	/* Allocate IOKit handle */
+	retlhp = handle_alloc_iokit(device, fmode);
+	if (retlhp == NULL) {
+		dprintf("%s couldn't allocate IOKit handle\n", __func__);
+		media->release();
+		return (ENOMEM);
+	}
+
+	/* Try to open device with IOMedia */
+	status = handle_open_start(retlhp);
+	if (status == LDI_STATUS_ONLINE) {
+		dprintf("%s already online, refs %d, openrefs %d\n", __func__,
+		    retlhp->lh_ref, retlhp->lh_openref);
+		/* Cast retlhp and assign to lhp (may be 0) */
+		*lhp = (ldi_handle_t)retlhp;
+		media->release();
+		/* Successfully incremented open ref */
+		return (0);
+	}
+	if (status != LDI_STATUS_OPENING) {
+		dprintf("%s invalid status %d\n", __func__, status);
+		handle_release(retlhp);
+		retlhp = 0;
+		media->release();
+		return (ENODEV);
+	}
+
+	error = handle_open_iokit(retlhp, media);
+	media->release();
+
+	if (error) {
+		dprintf("%s Couldn't open handle\n", __func__);
+		handle_open_done(retlhp, LDI_STATUS_CLOSED);
+		handle_release(retlhp);
+		retlhp = 0;
+		return (EIO);
+	}
+	handle_open_done(retlhp, LDI_STATUS_ONLINE);
+
+	/* Register for disk notifications */
+	handle_register_notifier(retlhp);
+
+	/* Cast retlhp and assign to lhp (may be 0) */
+	*lhp = (ldi_handle_t)retlhp;
+	/* Pass error from open */
+	return (error);
+}
+
+/* Client interface, find IOMedia from dev_t, alloc and open handle */
+int
+ldi_open_media_by_dev(dev_t device = 0, int fmode = 0,
+    ldi_handle_t *lhp = 0)
+{
+	IOMedia *media = 0;
+	int error = EINVAL;
+
+	/* Validate arguments */
+	if (!lhp || device == 0) {
+		dprintf("%s missing argument %p %d\n",
+		    __func__, lhp, device);
+		return (EINVAL);
+	}
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*((struct ldi_handle **)lhp), ==, NULL);
+
+	/* Get IOMedia from major/minor */
+	if ((media = media_from_dev(device)) == NULL) {
+		dprintf("%s media_from_dev error %d\n",
+		    __func__, error);
+		return (ENODEV);
+	}
+
+	/* Try to open by media */
+	error = ldi_open_by_media(media, device, fmode, lhp);
+
+	/* Release IOMedia and clear */
+	media->release();
+	media = 0;
+
+	/* Pass error from open */
+	return (error);
+}
+
+/* Client interface, find dev_t and IOMedia/vnode, alloc and open handle */
+int
+ldi_open_media_by_path(char *path = 0, int fmode = 0,
+    ldi_handle_t *lhp = 0)
+{
+	IOMedia *media = 0;
+	dev_t device = 0;
+	int error = EINVAL;
+
+	/* Validate arguments */
+	if (!lhp || !path) {
+		dprintf("%s %s %p %s %d\n", __func__,
+		    "missing lhp or path", lhp, path, fmode);
+		return (EINVAL);
+	}
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*((struct ldi_handle **)lhp), ==, NULL);
+
+	/* For /dev/disk*, and InvariantDisk paths */
+	if ((media = media_from_path(path)) == NULL) {
+		dprintf("%s media_from_path failed\n", __func__);
+		return (ENODEV);
+	}
+
+	error = ldi_open_by_media(media, device, fmode, lhp);
+
+	/* Release IOMedia and clear */
+	media->release();
+	media = 0;
+
+	/* Error check open */
+	if (error) {
+		dprintf("%s ldi_open_by_media failed %d\n",
+		    __func__, error);
+	}
+
+	return (error);
+}
+
+int
+handle_remove_notifier(struct ldi_handle *lhp)
+{
+	handle_notifier_t notifier;
+
+#ifdef DEBUG
+	if (!lhp) {
+		dprintf("%s missing handle\n", __func__);
+		return (EINVAL);
+	}
+#endif
+
+	if (lhp->lh_notifier == 0) {
+		dprintf("%s no notifier installed\n", __func__);
+		return (0);
+	}
+
+	/* First clear notifier pointer */
+	notifier = lhp->lh_notifier;
+	lhp->lh_notifier = 0;
+
+#ifdef DEBUG
+	/* Validate IONotifier object */
+	if (!OSDynamicCast(IONotifier, notifier->obj)) {
+		dprintf("%s %p is not an IONotifier\n", __func__,
+		    notifier->obj);
+		return (EINVAL);
+	}
+#endif
+
+	notifier->obj->remove();
+	kmem_free(notifier, sizeof (handle_notifier_t));
+	return (0);
+}
+
+int
+handle_register_notifier(struct ldi_handle *lhp)
+{
+	OSDictionary *matchDict;
+	handle_notifier_t notifier;
+
+	/* Make sure we have a handle and dev_t */
+	if (!lhp || lhp->lh_dev == 0) {
+		dprintf("%s no handle or missing dev_t\n", __func__);
+		return (EINVAL);
+	}
+
+	notifier = (handle_notifier_t)kmem_alloc(
+	    sizeof (struct _handle_notifier), KM_SLEEP);
+	if (!notifier) {
+		dprintf("%s couldn't alloc notifier struct\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Get matchDict, will need to be released */
+	matchDict = media_matchdict_from_dev(lhp->lh_dev);
+	if (!matchDict) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		kmem_free(notifier, sizeof (handle_notifier_t));
+		return (EINVAL);
+	}
+
+	/* Register IOMedia termination notification */
+	notifier->obj = IOService::addMatchingNotification(
+	    gIOTerminatedNotification, matchDict,
+	    handle_media_terminate_cb, /* target */ 0,
+	    /* refCon */ (void *)lhp, /* priority */ 0);
+	matchDict->release();
+
+	/* Error check notifier */
+	if (!notifier->obj) {
+		dprintf("%s addMatchingNotification failed\n",
+		    __func__);
+		kmem_free(notifier, sizeof (handle_notifier_t));
+		return (ENOMEM);
+	}
+
+	/* Assign notifier to handle */
+	lhp->lh_notifier = notifier;
+	return (0);
+}
+
+/* Supports both IOKit and vnode handles by finding IOMedia from dev_t */
+int
+handle_set_wce_iokit(struct ldi_handle *lhp, int *wce)
+{
+	IOMedia *media;
+	IORegistryEntry *parent;
+	IOBlockStorageDevice *device;
+	IOReturn result;
+	bool value;
+
+	if (!lhp || !wce) {
+		return (EINVAL);
+	}
+
+	switch (lhp->lh_type) {
+	case LDI_TYPE_IOKIT:
+		if ((media = LH_MEDIA(lhp)) == NULL) {
+			dprintf("%s couldn't get IOMedia\n", __func__);
+			return (ENODEV);
+		}
+		/* Add a retain count */
+		media->retain();
+		break;
+	case LDI_TYPE_VNODE:
+		if (lhp->lh_dev == 0 ||
+		    (media = media_from_dev(lhp->lh_dev)) == 0) {
+			dprintf("%s couldn't find IOMedia for dev_t %d\n",
+			    __func__, lhp->lh_dev);
+			return (ENODEV);
+		}
+		/* Returned media has a retain count */
+		break;
+	default:
+		dprintf("%s invalid handle\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Walk the parents of this media */
+	for (parent = media->getParentEntry(gIOServicePlane);
+	    parent != NULL;
+	    parent = parent->getParentEntry(gIOServicePlane)) {
+		/* Until a valid device is found */
+		device = OSDynamicCast(IOBlockStorageDevice, parent);
+		if (device != NULL) {
+			device->retain();
+			break;
+		}
+		/* Next parent */
+	}
+	media->release();
+	media = 0;
+
+	/* If no matching device was found */
+	if (!device) {
+		dprintf("%s no IOBlockStorageDevice found\n", __func__);
+		return (ENODEV);
+	}
+
+	result = device->getWriteCacheState(&value);
+	if (result != kIOReturnSuccess) {
+		// dprintf("%s couldn't get current write cache state %d\n",
+		//   __func__, ldi_zfs_handle->errnoFromReturn(result));
+		return (ENXIO);
+	}
+
+	/* If requested value does not match current */
+	if (value != *wce) {
+		value = (*wce == 1);
+		/* Attempt to change the value */
+		result = device->setWriteCacheState(value);
+	}
+
+	/* Set error and wce to return */
+	if (result != kIOReturnSuccess) {
+		// dprintf("%s couldn't set write cache %d\n",
+		//   __func__, ldi_zfs_handle->errnoFromReturn(result));
+		/* Flip wce to indicate current status */
+		*wce = !(*wce);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+int
+handle_get_media_info_iokit(struct ldi_handle *lhp,
+    struct dk_minfo *dkm)
+{
+	uint32_t blksize;
+	uint64_t blkcount;
+
+	if (!lhp || !dkm) {
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	if ((blksize = LH_MEDIA(lhp)->getPreferredBlockSize()) == 0) {
+		dprintf("%s invalid blocksize\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	if ((blkcount = LH_MEDIA(lhp)->getSize() / blksize) == 0) {
+		dprintf("%s invalid block count\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	LH_MEDIA(lhp)->release();
+
+	/* Set the return values */
+	dkm->dki_capacity = blkcount;
+	dkm->dki_lbsize = blksize;
+
+	return (0);
+}
+
+int
+handle_get_media_info_ext_iokit(struct ldi_handle *lhp,
+    struct dk_minfo_ext *dkmext)
+{
+	OSObject *prop;
+	OSNumber *number;
+	uint32_t blksize, pblksize;
+	uint64_t blkcount;
+
+	if (!lhp || !dkmext) {
+		dprintf("%s missing lhp or dkmext\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	prop = LH_MEDIA(lhp)->getProperty(kIOPropertyPhysicalBlockSizeKey,
+	    gIOServicePlane, kIORegistryIterateRecursively |
+	    kIORegistryIterateParents);
+
+	number = OSDynamicCast(OSNumber, prop);
+	if (!prop || !number) {
+		dprintf("%s couldn't get physical blocksize\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	pblksize = number->unsigned32BitValue();
+	number = 0;
+	prop = 0;
+
+	if ((blksize = LH_MEDIA(lhp)->getPreferredBlockSize()) == 0) {
+		dprintf("%s invalid blocksize\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	if ((blkcount = LH_MEDIA(lhp)->getSize() / blksize) == 0) {
+		dprintf("%s invalid block count\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	LH_MEDIA(lhp)->release();
+
+#ifdef DEBUG
+	dprintf("%s phys blksize %u, logical blksize %u, blockcount %llu\n",
+	    __func__, pblksize, blksize, blkcount);
+#endif
+
+	/* Set the return values */
+	dkmext->dki_capacity = blkcount;
+	dkmext->dki_lbsize = blksize;
+	dkmext->dki_pbsize = pblksize;
+
+	return (0);
+}
+
+int
+handle_check_media_iokit(struct ldi_handle *lhp, int *status)
+{
+	/* Validate arguments */
+	if (!lhp || !status) {
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	/* Validate device size */
+	if (LH_MEDIA(lhp)->getSize() == 0) {
+		dprintf("%s media reported 0 size\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (ENXIO);
+	}
+
+	/* Validate write status if handle fmode is read-write */
+	if ((lhp->lh_fmode & FWRITE) &&
+	    LH_MEDIA(lhp)->isWritable() == false) {
+		dprintf("%s media is not writeable\n", __func__);
+		LH_MEDIA(lhp)->release();
+		return (EPERM);
+	}
+
+	LH_MEDIA(lhp)->release();
+
+	/* Success */
+	*status = 0;
+	return (0);
+}
+
+int
+handle_is_solidstate_iokit(struct ldi_handle *lhp, int *isssd)
+{
+	OSDictionary *propDict = 0;
+	OSString *property = 0;
+
+	/* Validate arguments */
+	if (!lhp || !isssd) {
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	propDict = OSDynamicCast(OSDictionary, LH_MEDIA(lhp)->getProperty(
+	    kIOPropertyDeviceCharacteristicsKey, gIOServicePlane));
+
+	if (propDict != 0) {
+		property = OSDynamicCast(OSString,
+		    propDict->getObject(kIOPropertyMediumTypeKey));
+		propDict = 0;
+	}
+
+	if (property != 0 &&
+	    property->isEqualTo(kIOPropertyMediumTypeSolidStateKey)) {
+		*isssd = 1;
+	}
+	property = 0;
+
+	LH_MEDIA(lhp)->release();
+
+	return (0);
+}
+
+int
+handle_features_iokit(struct ldi_handle *lhp,
+    uint32_t *data)
+{
+	if (!lhp || !data) {
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	OSDictionary *dictionary = OSDynamicCast(
+	    /* class  */ OSDictionary,
+	    /* object */ LH_MEDIA(lhp)->getProperty(
+	    /* key    */ kIOStorageFeaturesKey,
+	    /* plane  */ gIOServicePlane));
+
+	*data = 0;
+
+	if (dictionary) {
+		OSBoolean *boolean;
+
+#ifdef DK_FEATURE_BARRIER
+		boolean = OSDynamicCast(
+		    /* class  */ OSBoolean,
+		    /* object */ dictionary->getObject(
+		    /* key    */ kIOStorageFeatureBarrier));
+
+		if (boolean == kOSBooleanTrue)
+			*(uint32_t *)data |= DK_FEATURE_BARRIER;
+#endif
+
+		boolean = OSDynamicCast(
+		    /* class  */ OSBoolean,
+		    /* object */ dictionary->getObject(
+		    /* key    */ kIOStorageFeatureForceUnitAccess));
+
+		if (boolean == kOSBooleanTrue)
+			*(uint32_t *)data |= DK_FEATURE_FORCE_UNIT_ACCESS;
+
+#ifdef DK_FEATURE_PRIORITY
+		boolean = OSDynamicCast(
+		    /* class  */ OSBoolean,
+		    /* object */ dictionary->getObject(
+		    /* key    */ kIOStorageFeaturePriority));
+
+		if (boolean == kOSBooleanTrue)
+			*(uint32_t *)data |= DK_FEATURE_PRIORITY;
+#endif
+
+		boolean = OSDynamicCast(
+		    /* class  */ OSBoolean,
+		    /* object */ dictionary->getObject(
+		    /* key    */ kIOStorageFeatureUnmap));
+
+		if (boolean == kOSBooleanTrue)
+			*(uint32_t *)data |= DK_FEATURE_UNMAP;
+	}
+
+	LH_MEDIA(lhp)->release();
+	return (0);
+}
+
+int
+handle_unmap_iokit(struct ldi_handle *lhp,
+    dkioc_free_list_ext_t *dkm)
+{
+	int error = 0;
+
+	if (!lhp || !dkm) {
+		return (EINVAL);
+	}
+
+	/* Validate IOMedia */
+	if (!OSDynamicCast(IOMedia, LH_MEDIA(lhp))) {
+		dprintf("%s invalid IOKit handle\n", __func__);
+		return (ENODEV);
+	}
+
+	LH_MEDIA(lhp)->retain();
+
+	/* We need to convert illumos' dkioc_free_list_t to dk_unmap_t */
+	IOStorageExtent *extents;
+	extents = IONew(IOStorageExtent, 1);
+	extents[0].byteStart = dkm->dfle_start;
+	extents[0].byteCount = dkm->dfle_length;
+
+	/*
+	 * dkm->dfl_flags vs IOStorageUnmapOptions
+	 * #define DF_WAIT_SYNC 0x00000001
+	 * Wait for full write-out of free.
+	 * IOStorageUnmapOptions is only 0
+	 */
+
+	/* issue unmap */
+	error = LH_MEDIA(lhp)->unmap(LH_CLIENT(lhp),
+	    extents, 1, 0);
+
+	if (error != 0) {
+		dprintf("%s unmap: 0x%x\n", __func__, error);
+		// Convert IOReturn to errno
+		error = LH_MEDIA(lhp)->errnoFromReturn(error);
+	}
+
+	IODelete(extents, IOStorageExtent, 1);
+	LH_MEDIA(lhp)->release();
+
+	return (error);
+}
+
+
+} /* extern "C" */
diff --git a/module/os/macos/zfs/ldi_osx.c b/module/os/macos/zfs/ldi_osx.c
new file mode 100644
index 0000000000..7f0902d258
--- /dev/null
+++ b/module/os/macos/zfs/ldi_osx.c
@@ -0,0 +1,2440 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * Portions of this document are copyright Oracle and Joyent.
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+/*
+ * LDI Subsystem on OS X:
+ *
+ * Designed as a drop-in replacement for sunldi.h and driver_lyr.c,
+ * LDI abstracts away platform-specific device handling. This allows
+ * vdev_disk.c to more closely match 'upstream' illumos/OpenZFS.
+ *
+ * LDI handles may use IOKit or vnode ops to locate and use devices.
+ * - This reduces the call stack and work needed for almost all IO.
+ * - Allows for vdev discovery and use during early boot, before the
+ * root device is mounted.
+ * - Having both types allows use of non-standard kexts which publish
+ * bdevsw block devices (without IOMedia).
+ *
+ * XXX Review correct call stack using dtrace, annotate stack size.
+ * Previously, vnode_open and VNOP_STRATEGY were used, which required
+ * allocating buf_t for IO. This meant translating byte offsets to
+ * block numbers for every IO. Once issued, dtrace showed that a very
+ * large stack was required:
+ * VNOP_STRATEGY macro performs work then calls
+ * spec_strategy (vop->vop_strategy) which performs work then calls
+ * dkiostrategy (syscall) which passes the IO to an IOMediaBSDClient
+ * IOMediaBSDClient performed work and passes to its IOMedia provider
+ *
+ * Beyond that is a common path shared by vnode and IOMedia:
+ * IOMedia performs work, then does prepareRequest, breakUpRequest,
+ * deBlockRequest, and executeRequest.
+ * Potentially passed down the provider stack through IOPartitionMap
+ * then to the whole-disk IOMedia, with more work
+ * Passed down through IOBlockStorageDriver, with more work
+ * Passed down through IOBlockStorageDevice, with more work
+ * Finally passed to Family-specific driver (AHCI, diskimage, etc.)
+ *
+ * By directly accessing IOMedia, the stack is reduced, and byte
+ * offsets are passed to read()/write() via ldi_strategy.
+ * We still need to allocate an IOMemoryDescriptor for the data buf,
+ * however only an IOMemoryDescriptor::withAddress() reference is
+ * required, similar to buf_setdataptr.
+ */
+
+/*
+ * LDI Handle hash lists:
+ *
+ * During ldi_init, LH_HASH_SZ lists and locks are allocated. New handles
+ * will be added to the list indexed by the hash of the dev_t number.
+ *
+ * The hash function simply performs a modulus on the dev_t number based on
+ * the LH_HASH_SZ, as opposed to illumos which hashes based on the vnode
+ * pointer.
+ * This has been tested by hashing disk0, disk0s1, disk0s2, disk1, disk1s1,
+ * etc. to verify results were distributed across hash range.
+ *
+ * OS X dev_t numbers should be unique unless a new device claims the same
+ * dev_t as a removed/failed device. This would only be a collision if we
+ * still have a handle for the failed device (notification/event handlers
+ * should remove these before that occurs).
+ * Since Offline status is a dead-end and the handle cannot be dereferenced
+ * or freed while iterating the hash list, it is safe to check the status
+ * and skip a handle if the status is Offline (without taking handle lock).
+ *
+ * XXX On illumos the hash function uses the vnode's pointer address as the
+ * unique key. Since vnode addresses are aligned to the size of the vnode
+ * struct, the hash function shifts the pointer address to the right in order
+ * to hash the unique bits of the address. OS X dev_t use all the bits of
+ * an unsigned 32-bit int.
+ */
+
+/*
+ * LDI Handle locks:
+ *
+ * Handle references and list membership are protected by the hash list
+ * locks.
+ * Handle status and other fields are protected by a per-handle mutex.
+ *
+ * To prevent deadlocks and artificial delays, the hash list locks should
+ * be held only for handle hold/release and handle_add/remove (list
+ * iterate/insert/remove). Those functions avoid blocking.
+ * Use the handle mutex to change state, and avoid blocking there, too.
+ *
+ * XXX Right now handle_status_change does allocate for taskq_dispatch
+ * with the handle lock held, but uses TQ_NOSLEEP and verifies result.
+ *
+ * Non-locking ops such as ldi_strategy, ldi_get_size, and ldi_sync will
+ * check the instantaneous status/refs before attempting to proceed, and
+ * can only perform IO while the device is Online.
+ */
+
+/*
+ * LDI Handle allocation:
+ *
+ * ldi_open_by_name and ldi_open_by_dev locate the device and call
+ * ldi_open_media_by_path, ldi_open_media_by_dev, or ldi_open_vnode_by_path.
+ *
+ * From ldi_open_by_media and _by_vnode, we call handle_alloc_{type}. Both
+ * call handle_alloc_common to allocate and configure the handle.
+ *
+ * A handle is allocated in the Closed state with 1 reference. The handle
+ * is added to the hash list on allocation, unless a duplicate handle exists
+ * (same dev_t as well as fmode, not in Offline status). If an existing
+ * handle is found, the newly allocated handle is freed.
+ *
+ * handle_open_start is called, which takes the handle lock to check current
+ * status. Each of these states is possible:
+ * Offline: device has disappeared between allocation and now (unlikely).
+ * Closed: new or recently closed handle, changes status to Opening.
+ * Closing: already in progress. Sleeps on lock and rechecks the status.
+ * Opening: already in progress. Sleeps on lock and rechecks the status.
+ * Online: no need to open device, just increment openref count.
+ *
+ * If handle_open_start changes the status to Opening, the device is opened
+ * by calling handle_open_iokit or handle_open_vnode.
+ *
+ * This differs from illumos driver_lyr.c where handle_alloc first opens a
+ * vnode for the device, allocates a handle by vnode, and finally checks for
+ * a duplicate handle in the list (open, alloc, find vs. alloc, open, find).
+ * To do so, illumos has a VOP_OPEN that is aware of layered-driver opens.
+ */
+
+/*
+ * LDI Handle list membership:
+ *
+ * Allocate with one reference, to be used or released by the caller.
+ * Call handle_hold if additional references are needed.
+ *
+ * Call handle_release to drop reference. On last release, this calls
+ * handle_free (but does not remove the handle from the list, see below).
+ *
+ * Call handle_add to determine if this handle is a duplicate, inserting
+ * handle into list or returning an existing handle with a hold.
+ * Check the result and call handle_release on the new handle if another
+ * handle was returned (new handle is not added to list).
+ *
+ * Each call to handle_find will take optionally take a hold, which should
+ * be released when no longer needed (used by handle_add).
+ *
+ * Calling handle_open increments lh_openref but does not change lh_ref.
+ * Caller should already have called handle_hold to get a reference.
+ *
+ * If lh_ref is 1, call handle_remove_locked (with list lock) to remove the
+ * handle from the list, then call handle_release_locked to remove last ref
+ * and free.
+ * A handle shouldn't remain in the list in Closed status with no refs.
+ *
+ * Calling handle_close with the last openref will automatically take list
+ * lock, call handle_remove_locked, and then handle_release_locked.
+ */
+
+/*
+ * LDI Handle device objects:
+ *
+ * Multiple read-only opens share one read-only handle.
+ * Multiple read-write opens share one read-write handle.
+ *
+ * IOKit handles are allocated with the dev_t number and fmode.
+ * handle_open_iokit is passed an IOMedia object (which should have a
+ * retain held).
+ * Once handle_open returns, the IOMedia can be released by the caller.
+ *
+ * Vnode handles are allocated with the dev_t number and fmode.
+ * handle_open_vnode is passed a path (null-terminated C string).
+ * vnode_open increments both iocount and refcount, vnode_ref increments
+ * usecount, vnode_put drops iocount between ops.
+ * vnode_getwithref takes an iocount, and vnode_rele drops usecount
+ * before vnode_close decrements iocount and refcount.
+ */
+
+/*
+ * LDI Handle status:
+ *
+ * #define	LDI_STATUS_OFFLINE	0x0
+ * #define	LDI_STATUS_CLOSED	0x1
+ * #define	LDI_STATUS_CLOSING	0x2
+ * #define	LDI_STATUS_OPENING	0x3
+ * #define	LDI_STATUS_ONLINE	0x4
+ *
+ * The handle lock will be taken to change status.
+ *
+ * Handle state can only progress from Closed to Opening status, and must
+ * have a reference held to do so. The lock is dropped for open and close
+ * ops while the handle is in Opening or Closing status.
+ *
+ * If the open is successful, the state is set to Online (with handle lock
+ * held). This state is required for IO operations to be started. The state
+ * may have changed by the time an IO completes.
+ *
+ * For IOKit devices, and vnode devices that have an IOMedia, a callback is
+ * registered for IOMedia termination which changes the state to Offline and
+ * posts event callbacks.
+ *
+ * Closing a handle, by the user or as a result of an event, sets the state
+ * to Closing. Once device close is issued, the state changes from Closing
+ * to Closed (even if close returned failure).
+ *
+ * A handle that still has refs and openrefs will remain in the Online
+ * state, dropping refs and openrefs each time ldi_close is called.
+ *
+ * If there are refs but no openrefs, it remains in the Closed state, and
+ * drops refs each time handle_release is called.
+ * This allows clients to call ldi_open_by_* to reopen the handle, in the
+ * case where one client is opening the handle at the same time another is
+ * closing it.
+ *
+ * If the device has gone missing (IOMedia terminated), the handle will
+ * change to Offline status. This is a dead-end which issues Offline Notify
+ * and Finalize events, then cleans up the handle once all clients have
+ * called ldi_close.
+ *
+ * Once all references have been dropped, the handle is removed from the
+ * hash list with the hash list lock held, then freed.
+ */
+
+/*
+ * LDI Events:
+ *
+ * XXX Degrade event is not implemented, doubt it will be useful. Intended
+ * to be set when a vdev that is backed by RAID becomes degraded. This is
+ * not a recommended use case for ZFS, and on OS X we only have AppleRAID
+ * or custom hardware or software RAID. Also per the comments, the vdev
+ * would be marked Degraded only to inform the user via zpool status.
+ *
+ * XXX Tested in VirtualBox by hotplugging a SATA device, have yet to
+ * test with USB removal, etc.
+ *
+ * ldi_register_ev_callback can be used to add a struct to the event
+ * callback list containing the handle pointer, a notify callback, and
+ * a finalize callback.
+ *
+ * Supported events are Offline Notify/Finalize, which will be
+ * posted when the device enters the Offline state (IOMedia terminated).
+ *
+ * The event callback functions should be non-blocking. It is recommended
+ * to update a flag that can be checked prior to calling ldi_strategy.
+ */
+
+/*
+ * LDI client interfaces:
+ *
+ * ldi_open_by_name
+ * ldi_open_by_dev
+ * ldi_close
+ *
+ * ldi_register_ev_callback
+ * ldi_unregister_ev_callback
+ *
+ * ldi_get_size
+ * ldi_sync
+ * ldi_ioctl
+ * ldi_strategy
+ *
+ * ldi_bioinit
+ * ldi_biofini
+ */
+
+/*
+ * LDI Buffers:
+ *
+ * ldi_strategy uses an abstract buffer for IO, so clients do not need to
+ * be concerned with type-specific buf_t and IOMemoryDescriptor handling.
+ *
+ * Allocate and free ldi_buf_t manually, calling ldi_bioinit after alloc
+ * and ldi_biofini prior to free.
+ *
+ * Synchronous IO can be performed by setting b_iodone to NULL.
+ *
+ * Allocate and use a buffer like this:
+ *
+ * ldi_buf_t *bp = (ldi_buf_t *)kmem_alloc(sizeof (ldi_buf_t), KM_SLEEP);
+ * // Verify allocation before proceeding
+ * error = ldi_bioinit(bp);
+ * bp->b_bcount = size;
+ * bp->b_bufsize = size;
+ * bp->b_offset = offset;
+ * bp->b_data = data_ptr;
+ * bp->b_flags = B_BUSY | B_NOCACHE | B_READ; // For example
+ * bp->b_iodone = &io_intr_func;  // For async IO, omit for sync IO
+ * ldi_strategy(handle, bp);      // Issue IO
+ *
+ * With an async callback function such as:
+ * void io_intr_func(ldi_buf_t bp, void *param)
+ * {
+ *     // Check/copyout bp->b_error and bp->b_resid
+ *     ldi_biofini(bp);
+ *     kmem_free(bp, sizeof (ldi_buf_t));
+ * }
+ */
+
+/*
+ * XXX LDI TO DO
+ *
+ * LDI handle stats. In debug builds, we have IO counters - number of IOs,
+ * number of bytes in/out.
+ * kstats for handle counts and sysctls for vnode/IOKit modes also implemented.
+ *
+ * To implement events, both vnode and IOKit handles register for matching
+ * notifications from the IOMedia object (if found).
+ * Using subclassed IOService can also receive IOMessage events, which
+ * would be issued earlier.
+ *
+ * Vnode handles with no IOMedia could post events on (multiple) IO failures.
+ */
+
+/*
+ * ZFS internal
+ */
+#include <sys/zfs_context.h>
+#include <sys/taskq.h>
+#include <sys/kstat.h>
+#include <sys/kstat_osx.h>
+#include <sys/dkio.h>
+
+/*
+ * LDI Includes
+ */
+#include <sys/ldi_impl_osx.h>
+
+/* Debug prints */
+#ifdef DEBUG
+#define	LDI_EVDBG(args)		cmn_err args
+#define	LDI_EVTRC(args)		cmn_err args
+#else
+#define	LDI_EVDBG(args)		do {} while (0)
+#define	LDI_EVTRC(args)		do {} while (0)
+#endif
+
+#ifdef DEBUG
+#ifdef dprintf
+#undef dprintf
+#endif
+
+#define	dprintf ldi_log
+
+#define	ldi_log(fmt, ...) do {		\
+	printf(fmt, __VA_ARGS__);	\
+	/* delay(hz>>1); */		\
+_NOTE(CONSTCOND) } while (0)
+#endif
+
+/*
+ * Defines
+ * comment out defines to alter behavior.
+ */
+// #define	LDI_ZERO		/* For debugging, zero allocations */
+
+/* Find IOMedia by matching on the BSD disk name. */
+static boolean_t ldi_use_iokit_from_path = 1;
+
+/* Find IOMedia by matching on the BSD major/minor (dev_t) number. */
+static boolean_t ldi_use_iokit_from_dev = 1;
+
+/*
+ * Find dev_t by vnode_lookup.
+ * Resolves symlinks to block devices, symlinks, InvariantDisk links.
+ */
+static boolean_t ldi_use_dev_from_path = 1;
+
+/*
+ * Open device by vnode if all else fails.
+ * Not intented to be a fallback for unsuccessful IOMedia open, but rather
+ * for bdev devices that do not have an IOMedia (published by other KEXTs).
+ */
+static boolean_t ldi_use_vnode_from_path = 1;
+
+/*
+ * Sysctls
+ */
+#include <libkern/sysctl.h>
+SYSCTL_DECL(_ldi);
+SYSCTL_NODE(, OID_AUTO, ldi, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "");
+SYSCTL_NODE(_ldi, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "");
+SYSCTL_UINT(_ldi_debug, OID_AUTO, use_iokit_from_dev,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ldi_use_iokit_from_dev, 0,
+	"ZFS LDI use iokit_from_path");
+SYSCTL_UINT(_ldi_debug, OID_AUTO, use_iokit_from_path,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ldi_use_iokit_from_path, 0,
+	"ZFS LDI use iokit_from_dev");
+SYSCTL_UINT(_ldi_debug, OID_AUTO, use_dev_from_path,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ldi_use_dev_from_path, 0,
+	"ZFS LDI use dev_from_path");
+SYSCTL_UINT(_ldi_debug, OID_AUTO, use_vnode_from_path,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ldi_use_vnode_from_path, 0,
+	"ZFS LDI use vnode_from_path");
+
+/*
+ * Globals
+ */
+static volatile int64_t		ldi_handle_hash_count;
+
+static list_t			ldi_handle_hash_list[LH_HASH_SZ];
+static kmutex_t			ldi_handle_hash_lock[LH_HASH_SZ];
+
+/*
+ * Use of "ldi_ev_callback_list" must be protected by ldi_ev_lock()
+ * and ldi_ev_unlock().
+ */
+static struct ldi_ev_callback_list ldi_ev_callback_list;
+
+static uint32_t ldi_ev_id_pool = 0;
+
+struct ldi_ev_cookie {
+	char *ck_evname;
+	uint_t ck_sync;
+	uint_t ck_ctype;
+};
+
+#define	CT_DEV_EV_OFFLINE	0x1
+#define	CT_DEV_EV_DEGRADED	0x2
+static struct ldi_ev_cookie ldi_ev_cookies[] = {
+	{LDI_EV_OFFLINE, 1, CT_DEV_EV_OFFLINE},
+	{LDI_EV_DEGRADE, 0, CT_DEV_EV_DEGRADED},
+	{LDI_EV_DEVICE_REMOVE, 0, 0},
+	{NULL}		/* must terminate list */
+};
+
+/*
+ * kstats
+ */
+static kstat_t				*ldi_ksp;
+
+typedef struct ldi_stats {
+	kstat_named_t			handle_count;
+	kstat_named_t			handle_count_iokit;
+	kstat_named_t			handle_count_vnode;
+	kstat_named_t			handle_refs;
+	kstat_named_t			handle_open_rw;
+	kstat_named_t			handle_open_ro;
+} ldi_stats_t;
+
+static ldi_stats_t ldi_stats = {
+	{ "handle_count",		KSTAT_DATA_UINT64 },
+	{ "handle_count_iokit",		KSTAT_DATA_UINT64 },
+	{ "handle_count_vnode",		KSTAT_DATA_UINT64 },
+	{ "handle_refs",		KSTAT_DATA_UINT64 },
+	{ "handle_open_rw",		KSTAT_DATA_UINT64 },
+	{ "handle_open_ro",		KSTAT_DATA_UINT64 }
+};
+
+#define	LDISTAT(stat)	(ldi_stats.stat.value.ui64)
+#define	LDISTAT_INCR(stat, val) \
+atomic_add_64(&ldi_stats.stat.value.ui64, (val))
+#define	LDISTAT_BUMP(stat)	LDISTAT_INCR(stat, 1)
+#define	LDISTAT_BUMPDOWN(stat)	LDISTAT_INCR(stat, -1)
+
+/*
+ * Define macros for accessing layered driver hash structures
+ */
+#define	LH_HASH(dev)		handle_hash_func(dev)
+
+static inline uint_t
+handle_hash_func(dev_t device)
+{
+	/* Just cast, macro does modulus to hash value */
+	return ((uint_t)device % LH_HASH_SZ);
+}
+
+typedef struct status_change_args {
+	struct ldi_handle *lhp;
+	int new_status;
+} status_change_args_t;
+
+static void
+handle_status_change_callback(void *arg)
+{
+	status_change_args_t *sc = (status_change_args_t *)arg;
+
+	/* Validate arg struct */
+	if (!sc || !sc->lhp) {
+		dprintf("%s missing callback struct %p or lh\n",
+		    __func__, sc);
+		return;
+	}
+	if (sc->new_status > LDI_STATUS_ONLINE) {
+		dprintf("%s invalid status %d\n",
+		    __func__, sc->new_status);
+		return;
+	}
+
+	dprintf("%s Invoking notify for handle %p status %d\n",
+	    __func__, sc->lhp, sc->new_status);
+	ldi_invoke_notify(0 /* dip */, sc->lhp->lh_dev, S_IFBLK,
+	    LDI_EV_OFFLINE, sc->lhp);
+
+	dprintf("%s Invoking finalize for handle %p status %d\n",
+	    __func__, sc->lhp, sc->new_status);
+	ldi_invoke_finalize(0 /* dip */, sc->lhp->lh_dev, S_IFBLK,
+	    LDI_EV_OFFLINE, LDI_EV_SUCCESS, sc->lhp);
+
+	/* Free callback struct */
+	kmem_free(sc, sizeof (status_change_args_t));
+}
+
+/* Protected by handle lock */
+static int
+handle_status_change_locked(struct ldi_handle *lhp, int new_status)
+{
+	status_change_args_t *sc = 0;
+
+	/* Validate lhp */
+	if (!lhp) {
+		dprintf("%s missing handle\n", __func__);
+		return (EINVAL);
+	}
+	if (new_status > LDI_STATUS_ONLINE) {
+		dprintf("%s invalid status %d\n", __func__, new_status);
+		return (EINVAL);
+	}
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_dev, !=, 0);
+	ASSERT(MUTEX_HELD(&lhp->lh_lock));
+
+	/* Set the status first */
+	lhp->lh_status = new_status;
+
+	/* Only Offline needs an event */
+	if (new_status != LDI_STATUS_OFFLINE) {
+		dprintf("%s skipping status %d\n", __func__, new_status);
+		return (0);
+	}
+
+	dprintf("%s new_status is Offline %d\n", __func__, new_status);
+
+	/* Allocate struct to pass to event callback */
+	/* Allocating with lock held, use KM_NOSLEEP */
+	sc = (status_change_args_t *)kmem_alloc(sizeof (status_change_args_t),
+	    KM_NOSLEEP);
+	if (!sc) {
+		dprintf("%s couldn't allocate callback struct\n",
+		    __func__);
+		return (ENOMEM);
+	}
+	sc->lhp = lhp;
+	sc->new_status = new_status;
+
+	mutex_exit(&lhp->lh_lock);	/* Currently needs to drop lock */
+	handle_status_change_callback((void *)sc);
+	mutex_enter(&lhp->lh_lock);	/* Retake before return */
+
+	return (0);
+}
+
+/* Protected by handle lock */
+int
+handle_status_change(struct ldi_handle *lhp, int new_status)
+{
+	int error;
+
+	/* Validate lh and new_status */
+	if (!lhp) {
+		dprintf("%s missing handle\n", __func__);
+		return (EINVAL);
+	}
+	if (new_status > LDI_STATUS_ONLINE) {
+		dprintf("%s invalid state %d\n", __func__, new_status);
+		return (EINVAL);
+	}
+
+	mutex_enter(&lhp->lh_lock);
+	error = handle_status_change_locked(lhp, new_status);
+	mutex_exit(&lhp->lh_lock);
+
+	return (error);
+}
+
+/* Protected by hash list lock */
+void
+handle_hold_locked(struct ldi_handle *lhp)
+{
+#ifdef DEBUG
+	int index;
+
+	ASSERT3U(lhp, !=, NULL);
+	index = LH_HASH(lhp->lh_dev);
+	ASSERT(MUTEX_HELD(&ldi_handle_hash_lock[index]));
+#endif
+
+	/* Increment ref count and kstat */
+	lhp->lh_ref++;
+	LDISTAT_BUMP(handle_refs);
+}
+
+/* Protected by hash list lock */
+void
+handle_hold(struct ldi_handle *lhp)
+{
+	int index;
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_dev, !=, 0);
+
+	index = LH_HASH(lhp->lh_dev);
+	mutex_enter(&ldi_handle_hash_lock[index]);
+	handle_hold_locked(lhp);
+	mutex_exit(&ldi_handle_hash_lock[index]);
+}
+
+/*
+ * Locate existing handle in linked list, may return NULL. Optionally places a
+ * hold on found handle.
+ */
+static struct ldi_handle *
+handle_find_locked(dev_t device, int fmode, boolean_t hold)
+{
+	struct ldi_handle *retlhp = NULL, *lhp;
+	int index = LH_HASH(device);
+
+	/* Validate device */
+	if (device == 0) {
+		dprintf("%s invalid device\n", __func__);
+		return (NULL);
+	}
+	/* If fmode is 0, find any handle with matching dev_t */
+
+	ASSERT(MUTEX_HELD(&ldi_handle_hash_lock[index]));
+
+	/* Iterate over handle hash list */
+	for (lhp = list_head(&ldi_handle_hash_list[index]);
+	    lhp != NULL;
+	    lhp = list_next(&ldi_handle_hash_list[index], lhp)) {
+		/* Check for matching dev_t and fmode (if set) */
+		if (lhp->lh_dev != device) {
+			continue;
+		}
+
+		/* Special case for find any */
+		if (fmode == 0) {
+			/* Found a match */
+			retlhp = lhp;
+			break;
+		}
+
+		/* fmode must match write level */
+		if (((lhp->lh_fmode & FWRITE) && !(fmode & FWRITE)) ||
+		    (!(lhp->lh_fmode & FWRITE) && (fmode & FWRITE))) {
+			continue;
+		}
+
+		/* Found a match */
+		retlhp = lhp;
+		break;
+	}
+
+	/* Take hold, if requested */
+	if (hold && retlhp) {
+		/* Caller asked for hold on found handle */
+		handle_hold_locked(retlhp);
+	}
+
+	return (retlhp);
+}
+
+/*
+ * Call without lock held to find a handle by dev_t,
+ * optionally placing a hold on the found handle.
+ */
+struct ldi_handle *
+handle_find(dev_t device, int fmode, boolean_t hold)
+{
+	struct ldi_handle *lhp;
+	int index = LH_HASH(device);
+
+	if (device == 0) {
+		dprintf("%s invalid device\n", __func__);
+		return (NULL);
+	}
+
+	/* Lock for duration of find */
+	mutex_enter(&ldi_handle_hash_lock[index]);
+
+	/* Find handle by dev_t (with hold) */
+	lhp = handle_find_locked(device, fmode, hold);
+
+	/* Unlock and return handle (could be NULL) */
+	mutex_exit(&ldi_handle_hash_lock[index]);
+	return (lhp);
+}
+
+static void
+handle_free(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+
+	/* Validate lhp, references, and status */
+	if (lhp->lh_ref != 0 ||
+	    lhp->lh_status != LDI_STATUS_CLOSED) {
+		dprintf("%s ref %d status %d\n", __func__, lhp->lh_ref,
+		    lhp->lh_status);
+	}
+
+	/* Remove notification handler */
+	if (handle_remove_notifier(lhp) != 0) {
+		dprintf("%s lhp %p notifier %s\n",
+		    __func__, lhp, "couldn't be removed");
+	}
+
+	/* Destroy condvar and mutex */
+	cv_destroy(&lhp->lh_cv);
+	mutex_destroy(&lhp->lh_lock);
+
+	/* Decrement kstat handle count */
+	LDISTAT_BUMPDOWN(handle_count);
+	/* IOKit or vnode */
+	switch (lhp->lh_type) {
+	case LDI_TYPE_IOKIT:
+		/* Decrement kstat handle count and free iokit_tsd */
+		LDISTAT_BUMPDOWN(handle_count_iokit);
+		handle_free_iokit(lhp);
+		break;
+
+	case LDI_TYPE_VNODE:
+		/* Decrement kstat handle count and free vnode_tsd */
+		LDISTAT_BUMPDOWN(handle_count_vnode);
+		handle_free_vnode(lhp);
+		break;
+	default:
+		dprintf("%s invalid handle type\n", __func__);
+		break;
+	}
+
+	/* Deallocate handle */
+	dprintf("%s freeing %p\n", __func__, lhp);
+	kmem_free(lhp, sizeof (struct ldi_handle));
+	lhp = 0;
+}
+
+/*
+ * Remove handle from list, decrementing counters
+ */
+static void
+handle_remove_locked(struct ldi_handle *lhp)
+{
+	int index;
+
+	ASSERT3U(lhp, !=, NULL);
+	index = LH_HASH(lhp->lh_dev);
+	ASSERT(MUTEX_HELD(&ldi_handle_hash_lock[index]));
+
+	/* Remove from list, update handle count */
+	list_remove(&ldi_handle_hash_list[index], lhp);
+	OSDecrementAtomic(&ldi_handle_hash_count);
+}
+
+void
+handle_remove(struct ldi_handle *lhp)
+{
+	int index = LH_HASH(lhp->lh_dev);
+
+	mutex_enter(&ldi_handle_hash_lock[index]);
+	handle_remove_locked(lhp);
+	mutex_exit(&ldi_handle_hash_lock[index]);
+}
+
+/* Protected by hash list lock */
+static void
+handle_release_locked(struct ldi_handle *lhp)
+{
+	boolean_t lastrelease = B_FALSE;
+
+#ifdef DEBUG
+	ASSERT3U(lhp, !=, NULL);
+	int index = LH_HASH(lhp->lh_dev);
+	ASSERT(MUTEX_HELD(&ldi_handle_hash_lock[index]));
+#endif
+
+	if (lhp->lh_ref != 0) {
+		lhp->lh_ref--;
+		LDISTAT_BUMPDOWN(handle_refs);
+	} else {
+		dprintf("%s with 0 refs\n", __func__);
+	}
+
+	dprintf("%s %x remaining holds\n", __func__, lhp->lh_ref);
+
+	/* If last open ref was dropped */
+	lastrelease = (lhp->lh_ref == 0);
+
+	if (lastrelease) {
+		dprintf("%s removing handle %p from list\n", __func__, lhp);
+		handle_remove_locked(lhp);
+		dprintf("%s freeing handle %p\n", __func__, lhp);
+		handle_free(lhp);
+	}
+}
+
+/* Protected by hash list lock */
+void
+handle_release(struct ldi_handle *lhp)
+{
+	int index;
+
+	ASSERT3U(lhp, !=, NULL);
+	index = LH_HASH(lhp->lh_dev);
+
+	mutex_enter(&ldi_handle_hash_lock[index]);
+	handle_release_locked(lhp);
+	mutex_exit(&ldi_handle_hash_lock[index]);
+}
+
+/*
+ * Add new handle to list.
+ */
+static struct ldi_handle *
+handle_add_locked(struct ldi_handle *lhp)
+{
+	struct ldi_handle *retlhp;
+	int index = 0;
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_dev, !=, 0);
+
+	/* Lock should be held */
+	index = LH_HASH(lhp->lh_dev);
+	ASSERT(MUTEX_HELD(&ldi_handle_hash_lock[index]));
+
+	/* Search for existing handle */
+	if ((retlhp = handle_find_locked(lhp->lh_dev, lhp->lh_fmode,
+	    B_TRUE)) != NULL) {
+		dprintf("%s found handle %p\n", __func__, retlhp);
+		return (retlhp);
+	}
+
+	/* Insert into list */
+	list_insert_head(&ldi_handle_hash_list[index], lhp);
+
+	/* Update handle count */
+	OSIncrementAtomic(&ldi_handle_hash_count);
+
+	/* Return success */
+	return (lhp);
+}
+
+/*
+ * Caller should check if returned handle is the same and free new
+ * handle if an existing handle was returned
+ */
+struct ldi_handle *
+handle_add(struct ldi_handle *lhp)
+{
+	struct ldi_handle *retlhp;
+	int index;
+
+	ASSERT3U(lhp, !=, NULL);
+	index = LH_HASH(lhp->lh_dev);
+
+	mutex_enter(&ldi_handle_hash_lock[index]);
+	retlhp = handle_add_locked(lhp);
+	mutex_exit(&ldi_handle_hash_lock[index]);
+
+	return (retlhp);
+}
+
+/*
+ * Returns a handle with 1 reference and status Closed
+ */
+#ifdef illumos
+static struct ldi_handle *
+handle_alloc(vnode_t *vp, struct ldi_ident_t *li)
+#else /* illumos */
+struct ldi_handle *
+handle_alloc_common(uint_t type, dev_t device, int fmode)
+#endif /* !illumos */
+{
+	struct ldi_handle *new_lh;
+	size_t len;
+
+	/* Validate arguments */
+	if ((type != LDI_TYPE_IOKIT && type != LDI_TYPE_VNODE) ||
+	    device == 0 || fmode == 0) {
+		dprintf("%s Invalid type %d, device %d, or fmode %d\n",
+		    __func__, type, device, fmode);
+		return (NULL);
+	}
+
+	/* Allocate and verify */
+	len = sizeof (struct ldi_handle);
+	if (NULL == (new_lh = (struct ldi_handle *)kmem_alloc(len,
+	    KM_SLEEP))) {
+		dprintf("%s couldn't allocate ldi_handle\n", __func__);
+		return (NULL);
+	}
+#ifdef LDI_ZERO
+	/* Clear the struct for safety */
+	bzero(new_lh, len);
+#endif
+
+	/* Create handle lock */
+	mutex_init(&new_lh->lh_lock, NULL, MUTEX_DEFAULT, NULL);
+	/* And condvar */
+	cv_init(&new_lh->lh_cv, NULL, CV_DEFAULT, NULL);
+
+	/*
+	 * Set the handle type, which dictates the type of device pointer
+	 * and buffers used for the lifetime of the ldi_handle
+	 */
+	new_lh->lh_type = type;
+	/* Set dev_t (major/minor) device number */
+	new_lh->lh_dev = device;
+
+	/* Clear list head */
+	new_lh->lh_node.list_next = NULL;
+	new_lh->lh_node.list_prev = NULL;
+
+	/* Initialize with 1 handle ref and 0 open refs */
+	new_lh->lh_ref = 1;
+	new_lh->lh_openref = 0;
+
+	/* Clear type-specific device data */
+	new_lh->lh_tsd.iokit_tsd = 0;
+	/* No need to clear vnode_tsd in union */
+	new_lh->lh_notifier = 0;
+
+	/* Assign fmode */
+	new_lh->lh_fmode = fmode;
+
+	/* Alloc in status Closed */
+	new_lh->lh_status = LDI_STATUS_CLOSED;
+
+	/* Increment kstats */
+	LDISTAT_BUMP(handle_count);
+	LDISTAT_BUMP(handle_refs);
+	if (type == LDI_TYPE_IOKIT) {
+		LDISTAT_BUMP(handle_count_iokit);
+	} else if (type == LDI_TYPE_VNODE) {
+		LDISTAT_BUMP(handle_count_vnode);
+	}
+
+	return (new_lh);
+}
+
+static void
+handle_set_open_locked(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT(MUTEX_HELD(&lhp->lh_lock));
+
+	/* Increment number of open clients */
+	lhp->lh_openref++;
+
+	/* Increment kstats */
+	if (lhp->lh_fmode & FWRITE) {
+		LDISTAT_BUMP(handle_open_rw);
+	} else {
+		LDISTAT_BUMP(handle_open_ro);
+	}
+}
+
+#if 0
+static void
+handle_set_open(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+
+	mutex_enter(&lhp->lh_lock);
+	handle_set_open_locked(lhp);
+	mutex_exit(&lhp->lh_lock);
+}
+#endif
+
+static void
+handle_clear_open_locked(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT(MUTEX_HELD(&lhp->lh_lock));
+
+	/* Decrement number of open clients */
+	if (lhp->lh_openref == 0) {
+		dprintf("%s with 0 open refs\n", __func__);
+		return;
+	}
+
+	/* Decrement kstats */
+	lhp->lh_openref--;
+	if (lhp->lh_fmode & FWRITE) {
+		LDISTAT_BUMPDOWN(handle_open_rw);
+	} else {
+		LDISTAT_BUMPDOWN(handle_open_ro);
+	}
+}
+
+#if 0
+static inline void
+handle_clear_open(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_dev, !=, 0);
+	ASSERT3U(lhp->lh_openref, !=, 0);
+
+	mutex_enter(&lhp->lh_lock);
+	handle_clear_open_locked(lhp, lhp->lh_fmode);
+	mutex_exit(&lhp->lh_lock);
+}
+#endif
+
+static int
+handle_close(struct ldi_handle *lhp)
+{
+#ifdef DEBUG
+	int openrefs;
+#endif
+	int error = EINVAL;
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_ref, !=, 0);
+	ASSERT3U(lhp->lh_openref, !=, 0);
+	ASSERT(lhp->lh_type == LDI_TYPE_IOKIT ||
+	    lhp->lh_type == LDI_TYPE_VNODE);
+
+	/* Take lock */
+	mutex_enter(&lhp->lh_lock);
+
+	/*
+	 * Possible statuses:
+	 * Online with one or more openref
+	 * Offline due to IOMedia termination, one or more openref remain
+	 * Impossible or programming error:
+	 * Closing and Closed should only be set with 0 openref
+	 * Opening should have 0 openref so far, and clients should not be
+	 * calling ldi_close
+	 */
+	switch (lhp->lh_status) {
+	case LDI_STATUS_ONLINE:
+		if (lhp->lh_openref == 0) {
+			/* Unlock and return error */
+			mutex_exit(&lhp->lh_lock);
+			/* Shouldn't happen */
+			dprintf("%s status Online with 0 openrefs\n",
+			    __func__);
+			return (ENXIO);
+		}
+
+		/* If multiple open refs are held */
+		if (lhp->lh_openref > 1) {
+			goto drop_openref;
+		}
+
+		/* Otherwise open with last open ref */
+		/* change status to closing and proceed */
+		handle_status_change_locked(lhp, LDI_STATUS_CLOSING);
+		/* Unlock and exit loop */
+		mutex_exit(&lhp->lh_lock);
+		goto do_close;
+
+	case LDI_STATUS_OFFLINE:
+		if (lhp->lh_openref == 0) {
+			/* Unlock and return error */
+			mutex_exit(&lhp->lh_lock);
+			/* Shouldn't happen */
+			dprintf("%s status Offline with 0 openrefs\n",
+			    __func__);
+			return (ENXIO);
+		}
+
+		/*
+		 * Otherwise the device was marked missing and clients need
+		 * to drop openrefs until it can be released.
+		 */
+		goto drop_openref;
+
+	default:
+		mutex_exit(&lhp->lh_lock);
+		dprintf("%s invalid handle status %d\n",
+		    __func__, lhp->lh_status);
+		return (ENXIO);
+	}
+
+drop_openref:
+	/* Just decrement open refs/stats */
+	handle_clear_open_locked(lhp);
+#ifdef DEBUG
+	/* Save openrefs to report after unlock */
+	openrefs = lhp->lh_openref;
+#endif
+	mutex_exit(&lhp->lh_lock);
+
+#ifdef DEBUG
+	dprintf("%s has %d remaining openrefs\n", __func__, openrefs);
+#endif
+	return (0);
+
+do_close:
+	/* Remove notification handler */
+	if (lhp->lh_notifier) {
+		error = handle_remove_notifier(lhp);
+		if (error) {
+			dprintf("%s lhp %p notifier %p error %d %s\n",
+			    __func__, lhp, lhp->lh_notifier, error,
+			    "couldn't be removed");
+			/* Proceeds with close */
+		}
+	}
+
+	/* IOMedia or vnode */
+	switch (lhp->lh_type) {
+	case LDI_TYPE_IOKIT:
+		error = handle_close_iokit(lhp);
+		/* Preserve error for return */
+		break;
+	case LDI_TYPE_VNODE:
+		error = handle_close_vnode(lhp);
+		/* Preserve error for return */
+		break;
+	}
+
+#ifdef DEBUG
+	if (error != 0) {
+		/* We will still set the handle to Closed status */
+		dprintf("%s error %d from handle_close_{type}\n",
+		    __func__, error);
+	}
+#endif
+
+	/* Take lock to drop openref and set status */
+	mutex_enter(&lhp->lh_lock);
+	handle_clear_open_locked(lhp);
+	handle_status_change_locked(lhp, LDI_STATUS_CLOSED);
+
+	/* Wake any waiting opens and unlock */
+	cv_signal(&lhp->lh_cv);
+	mutex_exit(&lhp->lh_lock);
+
+dprintf("%s returning %d\n", __func__, error);
+	return (error);
+}
+
+ldi_status_t
+handle_open_start(struct ldi_handle *lhp)
+{
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_ref, !=, 0);
+
+	/* Take lock */
+	mutex_enter(&lhp->lh_lock);
+	/* Loop if the handle is in opening or closing status */
+	do {
+		/* XXX Needs sleep timeout */
+		switch (lhp->lh_status) {
+		case LDI_STATUS_ONLINE:
+			/* Increment readonly / readwrite count */
+			handle_set_open_locked(lhp);
+			mutex_exit(&lhp->lh_lock);
+
+			/* Success */
+			return (LDI_STATUS_ONLINE);
+
+		case LDI_STATUS_CLOSED:
+			/* Not yet open, change status to opening and proceed */
+			handle_status_change_locked(lhp, LDI_STATUS_OPENING);
+
+			/* Unlock and exit loop */
+			mutex_exit(&lhp->lh_lock);
+			/* Return success */
+			return (LDI_STATUS_OPENING);
+
+		case LDI_STATUS_OPENING:
+		case LDI_STATUS_CLOSING:
+			/* Open or close in progress, sleep until signaled */
+			dprintf("%s sleeping on lock\n", __func__);
+			cv_wait(&lhp->lh_cv, &lhp->lh_lock);
+			continue;
+		default:
+			mutex_exit(&lhp->lh_lock);
+			dprintf("%s invalid handle status %d\n",
+			    __func__, lhp->lh_status);
+			return (LDI_STATUS_OFFLINE);
+		}
+	} while (1);
+
+	/* Shouldn't reach this */
+	return (LDI_STATUS_CLOSED);
+}
+
+void
+handle_open_done(struct ldi_handle *lhp, ldi_status_t new_status)
+{
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_status, ==, LDI_STATUS_OPENING);
+
+	/* Lock to change status */
+	mutex_enter(&lhp->lh_lock);
+
+	if (new_status != LDI_STATUS_ONLINE) {
+		/* Set status, issues event */
+		handle_status_change_locked(lhp, LDI_STATUS_CLOSED);
+	} else {
+		/* Increment open count and fmode */
+		handle_set_open_locked(lhp);
+		/* Set status, issues event */
+		handle_status_change_locked(lhp, LDI_STATUS_ONLINE);
+	}
+
+	/* Wake any waiting opens and unlock */
+	cv_signal(&lhp->lh_cv);
+	mutex_exit(&lhp->lh_lock);
+
+	/*
+	 * Flush out any old buffers remaining from
+	 * a previous use, only if opening read-write.
+	 */
+	if (new_status == LDI_STATUS_ONLINE &&
+	    (lhp->lh_fmode & FWRITE) &&
+	    ldi_sync((ldi_handle_t)lhp) != 0) {
+		dprintf("%s ldi_sync failed\n", __func__);
+	}
+}
+
+/*
+ * Release all remaining handles (during ldi_fini)
+ * Unless something went wrong, all handles should
+ * be closed and have zero references.
+ */
+static void
+handle_hash_release()
+{
+	struct ldi_handle *lhp;
+	int index, refs, j;
+
+	for (index = 0; index < LH_HASH_SZ; index++) {
+		mutex_enter(&ldi_handle_hash_lock[index]);
+		if (!list_empty(&ldi_handle_hash_list[index])) {
+			dprintf("%s still have LDI handle(s) in list %d\n",
+			    __func__, index);
+		}
+
+		/* Iterate over the list */
+		while ((lhp = list_head(&ldi_handle_hash_list[index]))) {
+			/* remove from list to deallocate */
+			list_remove(&ldi_handle_hash_list[index], lhp);
+
+			/* Update handle count */
+			OSDecrementAtomic(&ldi_handle_hash_count);
+
+			dprintf("%s releasing %p with %u refs and status %d\n",
+			    __func__, lhp, lhp->lh_ref, lhp->lh_status);
+			/* release holds */
+			refs = lhp->lh_ref;
+			for (j = 0; j < refs; j++) {
+				handle_release_locked(lhp);
+			}
+			lhp = 0;
+		}
+
+		list_destroy(&ldi_handle_hash_list[index]);
+		mutex_exit(&ldi_handle_hash_lock[index]);
+		mutex_destroy(&ldi_handle_hash_lock[index]);
+	}
+}
+
+/*
+ * LDI Event functions
+ */
+char *
+ldi_ev_get_type(ldi_ev_cookie_t cookie)
+{
+	int i;
+	struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+	for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+		if (&ldi_ev_cookies[i] == cookie_impl) {
+			LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: LDI: %s",
+			    ldi_ev_cookies[i].ck_evname));
+			return (ldi_ev_cookies[i].ck_evname);
+		}
+	}
+
+	return ("UNKNOWN EVENT");
+}
+
+static int
+ldi_native_cookie(ldi_ev_cookie_t cookie)
+{
+	int i;
+	struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+	for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+		if (&ldi_ev_cookies[i] == cookie_impl) {
+			LDI_EVTRC((CE_NOTE, "ldi_native_cookie: native LDI"));
+			return (1);
+		}
+	}
+
+	LDI_EVTRC((CE_NOTE, "ldi_native_cookie: is NDI"));
+	return (0);
+}
+
+static ldi_ev_cookie_t
+ldi_get_native_cookie(const char *evname)
+{
+	int i;
+
+	for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+		if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) {
+			LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: found"));
+			return ((ldi_ev_cookie_t)&ldi_ev_cookies[i]);
+		}
+	}
+
+	LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: NOT found"));
+	return (NULL);
+}
+
+/*
+ * ldi_ev_lock() needs to be recursive, since layered drivers may call
+ * other LDI interfaces (such as ldi_close() from within the context of
+ * a notify callback. Since the notify callback is called with the
+ * ldi_ev_lock() held and ldi_close() also grabs ldi_ev_lock, the lock needs
+ * to be recursive.
+ */
+static void
+ldi_ev_lock(void)
+{
+	LDI_EVTRC((CE_NOTE, "ldi_ev_lock: entered"));
+
+	mutex_enter(&ldi_ev_callback_list.le_lock);
+	if (ldi_ev_callback_list.le_thread == curthread) {
+		ASSERT(ldi_ev_callback_list.le_busy >= 1);
+		ldi_ev_callback_list.le_busy++;
+	} else {
+		while (ldi_ev_callback_list.le_busy)
+			cv_wait(&ldi_ev_callback_list.le_cv,
+			    &ldi_ev_callback_list.le_lock);
+		ASSERT(ldi_ev_callback_list.le_thread == NULL);
+		ldi_ev_callback_list.le_busy = 1;
+		ldi_ev_callback_list.le_thread = curthread;
+	}
+	mutex_exit(&ldi_ev_callback_list.le_lock);
+
+	LDI_EVTRC((CE_NOTE, "ldi_ev_lock: exit"));
+}
+
+static void
+ldi_ev_unlock(void)
+{
+	LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: entered"));
+	mutex_enter(&ldi_ev_callback_list.le_lock);
+	ASSERT(ldi_ev_callback_list.le_thread == curthread);
+	ASSERT(ldi_ev_callback_list.le_busy >= 1);
+
+	ldi_ev_callback_list.le_busy--;
+	if (ldi_ev_callback_list.le_busy == 0) {
+		ldi_ev_callback_list.le_thread = NULL;
+		cv_signal(&ldi_ev_callback_list.le_cv);
+	}
+	mutex_exit(&ldi_ev_callback_list.le_lock);
+	LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: exit"));
+}
+
+int
+ldi_ev_get_cookie(ldi_handle_t lh, char *evname, ldi_ev_cookie_t *cookiep)
+{
+	ldi_ev_cookie_t		tcookie;
+
+	LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: entered: evname=%s",
+	    evname ? evname : "<NULL>"));
+
+	if (lh == NULL || evname == NULL ||
+	    strlen(evname) == 0 || cookiep == NULL) {
+		LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: invalid args"));
+		return (LDI_EV_FAILURE);
+	}
+
+	*cookiep = NULL;
+
+	/*
+	 * First check if it is a LDI native event
+	 */
+	tcookie = ldi_get_native_cookie(evname);
+	if (tcookie) {
+		LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: got native cookie"));
+		*cookiep = tcookie;
+		return (LDI_EV_SUCCESS);
+	}
+
+	return (LDI_EV_FAILURE);
+}
+
+int
+ldi_ev_register_callbacks(ldi_handle_t lh, ldi_ev_cookie_t cookie,
+    ldi_ev_callback_t *callb, void *arg, ldi_callback_id_t *id)
+{
+	struct ldi_handle	*lhp = (struct ldi_handle *)lh;
+	ldi_ev_callback_impl_t	*lecp;
+
+	if (lh == NULL || cookie == NULL || callb == NULL || id == NULL) {
+		LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid args"));
+		return (LDI_EV_FAILURE);
+	}
+
+	if (callb->cb_vers != LDI_EV_CB_VERS) {
+		LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid vers"));
+		return (LDI_EV_FAILURE);
+	}
+
+	if (callb->cb_notify == NULL && callb->cb_finalize == NULL) {
+		LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: NULL callb"));
+		return (LDI_EV_FAILURE);
+	}
+
+	*id = 0;
+
+	lecp = kmem_zalloc(sizeof (ldi_ev_callback_impl_t), KM_SLEEP);
+
+	ldi_ev_lock();
+
+	/*
+	 * Add the notify/finalize callback to the LDI's list of callbacks.
+	 */
+	lecp->lec_lhp = lhp;
+
+	lecp->lec_dev = lhp->lh_dev;
+	lecp->lec_spec = S_IFBLK;
+
+	lecp->lec_notify = callb->cb_notify;
+	lecp->lec_finalize = callb->cb_finalize;
+	lecp->lec_arg = arg;
+	lecp->lec_cookie = cookie;
+
+	lecp->lec_id = (void *)(uintptr_t)(++ldi_ev_id_pool);
+
+	list_insert_tail(&ldi_ev_callback_list.le_head, lecp);
+
+	*id = (ldi_callback_id_t)lecp->lec_id;
+
+	ldi_ev_unlock();
+
+	LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: registered "
+	    "notify/finalize"));
+
+	return (LDI_EV_SUCCESS);
+}
+
+static int
+ldi_ev_device_match(ldi_ev_callback_impl_t *lecp, __unused dev_info_t *dip,
+    dev_t dev, int spec_type)
+{
+	ASSERT(lecp);
+	ASSERT(dev != DDI_DEV_T_NONE);
+	ASSERT(dev != NODEV);
+	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+	    (spec_type == S_IFCHR || spec_type == S_IFBLK));
+	ASSERT(lecp->lec_spec == S_IFCHR || lecp->lec_spec == S_IFBLK);
+	ASSERT(lecp->lec_dev != DDI_DEV_T_ANY);
+	ASSERT(lecp->lec_dev != DDI_DEV_T_NONE);
+	ASSERT(lecp->lec_dev != NODEV);
+
+	if (dev != DDI_DEV_T_ANY) {
+		if (dev != lecp->lec_dev || spec_type != lecp->lec_spec)
+			return (0);
+	}
+
+	LDI_EVTRC((CE_NOTE, "ldi_ev_device_match: MATCH dev=%d",
+	    (uint32_t)dev));
+
+	return (1);
+}
+
+/*
+ * LDI framework function to post a "notify" event to all layered drivers
+ * that have registered for that event
+ *
+ * Returns:
+ *		LDI_EV_SUCCESS - registered callbacks allow event
+ *		LDI_EV_FAILURE - registered callbacks block event
+ *		LDI_EV_NONE    - No matching LDI callbacks
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+int
+ldi_invoke_notify(__unused dev_info_t *dip, dev_t dev, int spec_type,
+    char *event, void *ev_data)
+{
+	ldi_ev_callback_impl_t *lecp;
+	list_t	*listp;
+	int	ret;
+	char	*lec_event;
+
+	ASSERT(dev != DDI_DEV_T_NONE);
+	ASSERT(dev != NODEV);
+	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+	    (spec_type == S_IFCHR || spec_type == S_IFBLK));
+	ASSERT(event);
+
+	LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): entered: dip=%p, ev=%s",
+	    (void *)dip, event));
+
+	ret = LDI_EV_NONE;
+	ldi_ev_lock();
+
+	VERIFY(ldi_ev_callback_list.le_walker_next == NULL);
+	listp = &ldi_ev_callback_list.le_head;
+	for (lecp = list_head(listp); lecp; lecp =
+	    ldi_ev_callback_list.le_walker_next) {
+		ldi_ev_callback_list.le_walker_next = list_next(listp, lecp);
+
+		/* Check if matching device */
+		if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+			continue;
+
+		if (lecp->lec_lhp == NULL) {
+			/*
+			 * Consumer has unregistered the handle and so
+			 * is no longer interested in notify events.
+			 */
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No LDI "
+			    "handle, skipping"));
+			continue;
+		}
+
+		if (lecp->lec_notify == NULL) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No notify "
+			    "callback. skipping"));
+			continue;	/* not interested in notify */
+		}
+
+		/*
+		 * Check if matching event
+		 */
+		lec_event = ldi_ev_get_type(lecp->lec_cookie);
+		if (strcmp(event, lec_event) != 0) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching"
+			    " event {%s,%s}. skipping", event, lec_event));
+			continue;
+		}
+
+		lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY;
+		if (lecp->lec_notify((ldi_handle_t)lecp->lec_lhp,
+		    lecp->lec_cookie, lecp->lec_arg, ev_data) !=
+		    LDI_EV_SUCCESS) {
+			ret = LDI_EV_FAILURE;
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify"
+			    " FAILURE"));
+			break;
+		}
+
+		/* We have a matching callback that allows the event to occur */
+		ret = LDI_EV_SUCCESS;
+
+		LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success"));
+	}
+
+	if (ret != LDI_EV_FAILURE)
+		goto out;
+
+#ifdef __APPLE__
+	dprintf("%s offline notify failed, shouldn't happen\n", __func__);
+	goto out;
+#endif
+#ifdef illumos
+	LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify"));
+
+	/*
+	 * Undo notifies already sent
+	 */
+	lecp = list_prev(listp, lecp);
+	VERIFY(ldi_ev_callback_list.le_walker_prev == NULL);
+	for (; lecp; lecp = ldi_ev_callback_list.le_walker_prev) {
+		ldi_ev_callback_list.le_walker_prev = list_prev(listp, lecp);
+
+		/*
+		 * Check if matching device
+		 */
+		if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+			continue;
+
+		if (lecp->lec_finalize == NULL) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no finalize, "
+			    "skipping"));
+			continue;	/* not interested in finalize */
+		}
+
+		/*
+		 * it is possible that in response to a notify event a
+		 * layered driver closed its LDI handle so it is ok
+		 * to have a NULL LDI handle for finalize. The layered
+		 * driver is expected to maintain state in its "arg"
+		 * parameter to keep track of the closed device.
+		 */
+
+		/* Check if matching event */
+		lec_event = ldi_ev_get_type(lecp->lec_cookie);
+		if (strcmp(event, lec_event) != 0) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): not matching "
+			    "event: %s,%s, skipping", event, lec_event));
+			continue;
+		}
+
+		LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize"));
+
+		lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
+		    LDI_EV_FAILURE, lecp->lec_arg, ev_data);
+
+		/*
+		 * If LDI native event and LDI handle closed in context
+		 * of notify, NULL out the finalize callback as we have
+		 * already called the 1 finalize above allowed in this situation
+		 */
+		if (lecp->lec_lhp == NULL &&
+		    ldi_native_cookie(lecp->lec_cookie)) {
+			LDI_EVDBG((CE_NOTE,
+			    "ldi_invoke_notify(): NULL-ing finalize after "
+			    "calling 1 finalize following ldi_close"));
+			lecp->lec_finalize = NULL;
+		}
+	}
+#endif /* illumos */
+
+out:
+	ldi_ev_callback_list.le_walker_next = NULL;
+	ldi_ev_callback_list.le_walker_prev = NULL;
+	ldi_ev_unlock();
+
+	if (ret == LDI_EV_NONE) {
+		LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no matching "
+		    "LDI callbacks"));
+	}
+
+	return (ret);
+}
+
+/*
+ * LDI framework function to invoke "finalize" callbacks for all layered
+ * drivers that have registered callbacks for that event.
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+void
+ldi_invoke_finalize(__unused dev_info_t *dip, dev_t dev, int spec_type,
+    char *event, int ldi_result, void *ev_data)
+{
+	ldi_ev_callback_impl_t *lecp;
+	list_t	*listp;
+	char	*lec_event;
+	int	found = 0;
+
+	ASSERT(dev != DDI_DEV_T_NONE);
+	ASSERT(dev != NODEV);
+	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+	    (spec_type == S_IFCHR || spec_type == S_IFBLK));
+	ASSERT(event);
+	ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE);
+
+	LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): entered: dip=%p, result=%d"
+	    " event=%s", (void *)dip, ldi_result, event));
+
+	ldi_ev_lock();
+	VERIFY(ldi_ev_callback_list.le_walker_next == NULL);
+	listp = &ldi_ev_callback_list.le_head;
+	for (lecp = list_head(listp); lecp; lecp =
+	    ldi_ev_callback_list.le_walker_next) {
+		ldi_ev_callback_list.le_walker_next = list_next(listp, lecp);
+
+		if (lecp->lec_finalize == NULL) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): No "
+			    "finalize. Skipping"));
+			continue;	/* Not interested in finalize */
+		}
+
+		/*
+		 * Check if matching device
+		 */
+		if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+			continue;
+
+		/*
+		 * It is valid for the LDI handle to be NULL during finalize.
+		 * The layered driver may have done an LDI close in the notify
+		 * callback.
+		 */
+
+		/*
+		 * Check if matching event
+		 */
+		lec_event = ldi_ev_get_type(lecp->lec_cookie);
+		if (strcmp(event, lec_event) != 0) {
+			LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): Not "
+			    "matching event {%s,%s}. Skipping",
+			    event, lec_event));
+			continue;
+		}
+
+		LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): calling finalize"));
+
+		found = 1;
+
+		lecp->lec_finalize((ldi_handle_t)lecp->lec_lhp,
+		    lecp->lec_cookie, ldi_result, lecp->lec_arg,
+		    ev_data);
+
+		/*
+		 * If LDI native event and LDI handle closed in context
+		 * of notify, NULL out the finalize callback as we have
+		 * already called the 1 finalize above allowed in this situation
+		 */
+		if (lecp->lec_lhp == NULL &&
+		    ldi_native_cookie(lecp->lec_cookie)) {
+			LDI_EVDBG((CE_NOTE,
+			    "ldi_invoke_finalize(): NULLing finalize after "
+			    "calling 1 finalize following ldi_close"));
+			lecp->lec_finalize = NULL;
+		}
+	}
+	ldi_ev_callback_list.le_walker_next = NULL;
+	ldi_ev_unlock();
+
+	if (found)
+		return;
+
+	LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): no matching callbacks"));
+}
+
+int
+ldi_ev_remove_callbacks(ldi_callback_id_t id)
+{
+	ldi_ev_callback_impl_t	*lecp;
+	ldi_ev_callback_impl_t	*next;
+	ldi_ev_callback_impl_t	*found;
+	list_t			*listp;
+
+	if (id == 0) {
+		cmn_err(CE_WARN, "ldi_ev_remove_callbacks: Invalid ID 0");
+		return (LDI_EV_FAILURE);
+	}
+
+	LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: entered: id=%p",
+	    (void *)id));
+
+	ldi_ev_lock();
+
+	listp = &ldi_ev_callback_list.le_head;
+	next = found = NULL;
+	for (lecp = list_head(listp); lecp; lecp = next) {
+		next = list_next(listp, lecp);
+		if (lecp->lec_id == id) {
+			VERIFY(found == NULL);
+
+			/*
+			 * If there is a walk in progress, shift that walk
+			 * along to the next element so that we can remove
+			 * this one.  This allows us to unregister an arbitrary
+			 * number of callbacks from within a callback.
+			 *
+			 * See the struct definition (in sunldi_impl.h) for
+			 * more information.
+			 */
+			if (ldi_ev_callback_list.le_walker_next == lecp)
+				ldi_ev_callback_list.le_walker_next = next;
+			if (ldi_ev_callback_list.le_walker_prev == lecp)
+				ldi_ev_callback_list.le_walker_prev = list_prev(
+				    listp, ldi_ev_callback_list.le_walker_prev);
+
+			list_remove(listp, lecp);
+			found = lecp;
+		}
+	}
+	ldi_ev_unlock();
+
+	if (found == NULL) {
+		cmn_err(CE_WARN, "No LDI event handler for id (%p)",
+		    (void *)id);
+		return (LDI_EV_SUCCESS);
+	}
+
+	LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: removed "
+	    "LDI native callbacks"));
+	kmem_free(found, sizeof (ldi_ev_callback_impl_t));
+
+	return (LDI_EV_SUCCESS);
+}
+/*
+ * XXX End LDI Events
+ */
+
+/* Client interface, find IOMedia from dev_t, alloc and open handle */
+int
+ldi_open_by_dev(dev_t device, __unused int otyp, int fmode,
+    __unused cred_t *cred, ldi_handle_t *lhp,
+    __unused ldi_ident_t ident)
+{
+	int error = EINVAL;
+
+	dprintf("%s dev_t %d fmode %d\n", __func__, device, fmode);
+
+	/* Validate arguments */
+	if (!lhp || device == 0) {
+		dprintf("%s missing argument %p %d\n",
+		    __func__, lhp, device);
+		return (EINVAL);
+	}
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*((struct ldi_handle **)lhp), ==, NULL);
+
+	/* Try to open by media */
+	error = ldi_open_media_by_dev(device, fmode, lhp);
+
+	/* Pass error from open */
+	return (error);
+}
+
+/* Client interface, find dev_t and IOMedia/vnode, alloc and open handle */
+int
+ldi_open_by_name(char *path, int fmode, __unused cred_t *cred,
+    ldi_handle_t *lhp, __unused ldi_ident_t li)
+{
+	dev_t device = 0;
+	int error = EINVAL;
+
+	dprintf("%s dev_t %d fmode %d\n", __func__, device, fmode);
+
+	/* Validate arguments */
+	if (!lhp || !path) {
+		dprintf("%s %s %p %s %d\n", __func__,
+		    "missing lhp or path", lhp, path, fmode);
+		return (EINVAL);
+	}
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*((struct ldi_handle **)lhp), ==, NULL);
+
+	/* Validate active open modes */
+	if (!ldi_use_iokit_from_path && !ldi_use_dev_from_path &&
+	    !ldi_use_vnode_from_path) {
+		dprintf("%s no valid modes to open device\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Try to open IOMedia by path */
+	if (ldi_use_iokit_from_path) {
+		error = ldi_open_media_by_path(path, fmode, lhp);
+
+		/* Error check open */
+		if (!error) {
+			return (0);
+		} else {
+			dprintf("%s ldi_open_media_by_path failed\n",
+			    __func__);
+			/* Not fatal, retry by dev_t or vnode */
+		}
+	}
+
+	/* Get dev_t from path, try to open IOMedia by dev */
+	if (ldi_use_dev_from_path) {
+		/* Uses vnode_lookup */
+		device = dev_from_path(path);
+		if (device == 0) {
+			dprintf("%s dev_from_path failed %s\n",
+			    __func__, path);
+			/*
+			 * Both media_from_dev and vnode_from_path will fail
+			 * if dev_from_path fails, since it uses vnode_lookup.
+			 */
+			return (ENODEV);
+		}
+
+		if (ldi_use_iokit_from_dev) {
+			/* Searches for matching IOMedia */
+			error = ldi_open_media_by_dev(device, fmode, lhp);
+			if (!error) {
+				return (0);
+			} else {
+				dprintf("%s ldi_open_media_by_dev failed %d\n",
+				    __func__, device);
+				/* Not fatal, retry as vnode */
+			}
+		}
+	}
+
+	if (!ldi_use_vnode_from_path) {
+		return (EINVAL);
+	}
+
+	/* Try to open vnode by path */
+	error = ldi_open_vnode_by_path(path, device, fmode, lhp);
+	if (error) {
+		dprintf("%s ldi_open_vnode_by_path failed %d\n", __func__,
+		    error);
+	}
+
+	return (error);
+}
+
+/* Client interface, wrapper for handle_close */
+int
+ldi_close(ldi_handle_t lh, int fmode, __unused cred_t *cred)
+{
+	struct ldi_handle	*handlep = (struct ldi_handle *)lh;
+	int			error = EINVAL;
+
+	ASSERT3U(handlep, !=, NULL);
+	ASSERT3U(handlep->lh_ref, !=, 0);
+	ASSERT3U(handlep->lh_fmode, ==, fmode);
+
+	dprintf("%s dev_t %d fmode %d\n", __func__, handlep->lh_dev, fmode);
+
+	/* Remove event callbacks */
+	boolean_t		notify = B_FALSE;
+	list_t			*listp;
+	ldi_ev_callback_impl_t	*lecp;
+
+	/*
+	 * Search the event callback list for callbacks with this
+	 * handle. There are 2 cases
+	 * 1. Called in the context of a notify. The handle consumer
+	 *    is releasing its hold on the device to allow a reconfiguration
+	 *    of the device. Simply NULL out the handle and the notify callback.
+	 *    The finalize callback is still available so that the consumer
+	 *    knows of the final disposition of the device.
+	 * 2. Not called in the context of notify. NULL out the handle as well
+	 *    as the notify and finalize callbacks. Since the consumer has
+	 *    closed the handle, we assume it is not interested in the
+	 *    notify and finalize callbacks.
+	 */
+	ldi_ev_lock();
+
+	if (handlep->lh_flags & LH_FLAGS_NOTIFY)
+		notify = B_TRUE;
+	listp = &ldi_ev_callback_list.le_head;
+	for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+		if (lecp->lec_lhp != handlep)
+			continue;
+		lecp->lec_lhp = NULL;
+		lecp->lec_notify = NULL;
+		LDI_EVDBG((CE_NOTE, "ldi_close: NULLed lh and notify"));
+		if (!notify) {
+			LDI_EVDBG((CE_NOTE, "ldi_close: NULLed finalize"));
+			lecp->lec_finalize = NULL;
+		}
+	}
+
+	if (notify)
+		handlep->lh_flags &= ~LH_FLAGS_NOTIFY;
+	ldi_ev_unlock();
+
+	/* Close device if only one openref, or just decrement openrefs */
+	if ((error = handle_close(handlep)) != 0) {
+		dprintf("%s error from handle_close: %d\n",
+		    __func__, error);
+	}
+
+	/* Decrement lh_ref, if last ref then remove and free */
+	handle_release(handlep);
+	handlep = 0;
+
+	/* XXX clear pointer arg, and return success? */
+	lh = (ldi_handle_t)0;
+	return (0);
+	// return (error);
+}
+
+/*
+ * Client interface, must be in LDI_STATUS_ONLINE
+ */
+int
+ldi_get_size(ldi_handle_t lh, uint64_t *dev_size)
+{
+	struct ldi_handle *handlep = (struct ldi_handle *)lh;
+	int error;
+
+	/*
+	 * Ensure we have an LDI handle, and a valid dev_size and/or
+	 * blocksize pointer. Caller must pass at least one of these.
+	 */
+	if (!handlep || !dev_size) {
+		dprintf("%s handle %p\n", __func__, handlep);
+		dprintf("%s dev_size %p\n", __func__, dev_size);
+		return (EINVAL);
+	}
+
+	/*
+	 * Must be in LDI_STATUS_ONLINE
+	 * IOMedia can return getSize without being opened, but vnode
+	 * devices must be opened first.
+	 * Rather than have support differing behaviors, require that
+	 * handle is open to retrieve the size.
+	 */
+	if (handlep->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s device not online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* IOMedia or vnode */
+	switch (handlep->lh_type) {
+	case LDI_TYPE_IOKIT:
+		error = handle_get_size_iokit(handlep, dev_size);
+		return (error);
+
+	case LDI_TYPE_VNODE:
+		error = handle_get_size_vnode(handlep, dev_size);
+		return (error);
+	}
+
+	/* Default case, shouldn't reach this */
+	dprintf("%s invalid lh_type %d\n", __func__,
+	    handlep->lh_type);
+	return (EINVAL);
+}
+
+/*
+ * Must be in LDI_STATUS_ONLINE
+ * XXX Needs async callback
+ */
+int
+ldi_sync(ldi_handle_t lh)
+{
+	struct ldi_handle *handlep = (struct ldi_handle *)lh;
+	int error;
+
+	/* Ensure we have an LDI handle */
+	if (!handlep) {
+		dprintf("%s no handle\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Must be in LDI_STATUS_ONLINE */
+	if (handlep->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s device not online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* IOMedia or vnode */
+	switch (handlep->lh_type) {
+	case LDI_TYPE_IOKIT:
+		error = handle_sync_iokit(handlep);
+		return (error);
+
+	case LDI_TYPE_VNODE:
+		error = handle_sync_vnode(handlep);
+		return (error);
+	}
+
+	/* Default case, shouldn't reach this */
+	dprintf("%s invalid lh_type %d\n", __func__,
+	    handlep->lh_type);
+	return (EINVAL);
+}
+
+int
+ldi_ioctl(ldi_handle_t lh, int cmd, intptr_t arg,
+    __unused int mode, __unused cred_t *cr, __unused int *rvalp)
+{
+	struct ldi_handle *handlep = (struct ldi_handle *)lh;
+	int error = EINVAL;
+	struct dk_callback *dkc;
+
+	switch (cmd) {
+	/* Flush write cache */
+	case DKIOCFLUSHWRITECACHE:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			error = handle_sync_iokit(handlep);
+			break;
+
+		case LDI_TYPE_VNODE:
+			error = handle_sync_vnode(handlep);
+			break;
+
+		default:
+			error = ENOTSUP;
+		}
+
+		if (!arg) {
+			return (error);
+		}
+
+		dkc = (struct dk_callback *)arg;
+		/* Issue completion callback if set */
+		if (dkc->dkc_callback) {
+			(*dkc->dkc_callback)(dkc->dkc_cookie, error);
+		}
+
+		return (error);
+
+	/* Set or clear write cache enabled */
+	case DKIOCSETWCE:
+		/*
+		 * There doesn't seem to be a way to do this by vnode,
+		 * so we need to be able to locate an IOMedia and an
+		 * IOBlockStorageDevice provider.
+		 */
+		return (handle_set_wce_iokit(handlep, (int *)arg));
+
+	/* Get media blocksize and block count */
+	case DKIOCGMEDIAINFO:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_get_media_info_iokit(handlep,
+			    (struct dk_minfo *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_get_media_info_vnode(handlep,
+			    (struct dk_minfo *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	/* Get media logical/physical blocksize and block count */
+	case DKIOCGMEDIAINFOEXT:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_get_media_info_ext_iokit(handlep,
+			    (struct dk_minfo_ext *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_get_media_info_ext_vnode(handlep,
+			    (struct dk_minfo_ext *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	/* Check device status */
+	case DKIOCSTATE:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_check_media_iokit(handlep,
+			    (int *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_check_media_vnode(handlep,
+			    (int *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	case DKIOCISSOLIDSTATE:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_is_solidstate_iokit(handlep,
+			    (int *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_is_solidstate_vnode(handlep,
+			    (int *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	case DKIOCGETBOOTINFO:
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_get_bootinfo_iokit(handlep,
+			    (struct io_bootinfo *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_get_bootinfo_vnode(handlep,
+			    (struct io_bootinfo *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	case DKIOCGETFEATURES: /* UNMAP? */
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_features_iokit(handlep,
+			    (uint32_t *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_features_vnode(handlep,
+			    (uint32_t *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	case DKIOCFREE: /* UNMAP */
+		/* IOMedia or vnode */
+		switch (handlep->lh_type) {
+		case LDI_TYPE_IOKIT:
+			return (handle_unmap_iokit(handlep,
+			    (dkioc_free_list_ext_t *)arg));
+
+		case LDI_TYPE_VNODE:
+			return (handle_unmap_vnode(handlep,
+			    (dkioc_free_list_ext_t *)arg));
+
+		default:
+			return (ENOTSUP);
+		}
+
+	default:
+		return (ENOTSUP);
+	}
+}
+
+/*
+ * Must already have handle_open called on lh.
+ */
+int
+ldi_strategy(ldi_handle_t lh, ldi_buf_t *lbp)
+{
+	struct ldi_handle *handlep = (struct ldi_handle *)lh;
+	int error = EINVAL;
+
+	/* Verify arguments */
+	if (!handlep || !lbp || lbp->b_bcount == 0) {
+		dprintf("%s missing something...\n", __func__);
+		dprintf("handlep [%p]\n", handlep);
+		dprintf("lbp [%p]\n", lbp);
+		if (lbp) {
+			dprintf("lbp->b_bcount %llu\n",
+			    lbp->b_bcount);
+		}
+		return (EINVAL);
+	}
+
+	/* Check instantaneous value of handle status */
+	if (handlep->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s device not online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* IOMedia or vnode */
+	/* Issue type-specific buf_strategy, preserve error */
+	switch (handlep->lh_type) {
+	case LDI_TYPE_IOKIT:
+		error = buf_strategy_iokit(lbp, handlep);
+		break;
+	case LDI_TYPE_VNODE:
+		error = buf_strategy_vnode(lbp, handlep);
+		break;
+	default:
+		dprintf("%s invalid lh_type %d\n", __func__, handlep->lh_type);
+		return (EINVAL);
+	}
+
+	return (error);
+}
+
+/* Client interface to get an LDI buffer */
+ldi_buf_t *
+ldi_getrbuf(int flags)
+{
+/* Example: bp = getrbuf(KM_SLEEP); */
+	ldi_buf_t *lbp;
+
+	/* Allocate with requested flags */
+	lbp = kmem_alloc(sizeof (ldi_buf_t), flags);
+	/* Verify allocation */
+	if (!lbp) {
+		return (NULL);
+	}
+
+	ldi_bioinit(lbp);
+
+	return (lbp);
+}
+
+/* Client interface to release an LDI buffer */
+void
+ldi_freerbuf(ldi_buf_t *lbp)
+{
+	if (!lbp) {
+		return;
+	}
+
+	/* Deallocate */
+	kmem_free(lbp, sizeof (ldi_buf_t));
+}
+
+void
+ldi_bioinit(ldi_buf_t *lbp)
+{
+#ifdef LDI_ZERO
+	/* Zero the new buffer struct */
+	bzero(lbp, sizeof (ldi_buf_t));
+#endif
+
+	/* Initialize defaults */
+	lbp->b_un.b_addr = 0;
+	lbp->b_flags = 0;
+	lbp->b_bcount = 0;
+	lbp->b_bufsize = 0;
+	lbp->b_lblkno = 0;
+	lbp->b_resid = 0;
+	lbp->b_error = 0;
+}
+
+/*
+ * IOKit C++ functions
+ */
+int
+ldi_init(void *provider)
+{
+	int index;
+
+	/* Allocate kstat pointer */
+	ldi_ksp = kstat_create("zfs", 0, "ldi", "darwin", KSTAT_TYPE_NAMED,
+	    sizeof (ldi_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+
+	if (ldi_ksp == NULL) {
+		dprintf("%s couldn't register kstats\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Register kstats */
+	ldi_ksp->ks_data = &ldi_stats;
+	kstat_install(ldi_ksp);
+
+	/* Register sysctls */
+	sysctl_register_oid(&sysctl__ldi);
+	sysctl_register_oid(&sysctl__ldi_debug);
+	sysctl_register_oid(&sysctl__ldi_debug_use_iokit_from_path);
+	sysctl_register_oid(&sysctl__ldi_debug_use_iokit_from_dev);
+	sysctl_register_oid(&sysctl__ldi_debug_use_dev_from_path);
+	sysctl_register_oid(&sysctl__ldi_debug_use_vnode_from_path);
+
+	/* Create handle hash lists and locks */
+	ldi_handle_hash_count = 0;
+	for (index = 0; index < LH_HASH_SZ; index++) {
+		mutex_init(&ldi_handle_hash_lock[index], NULL,
+		    MUTEX_DEFAULT, NULL);
+		list_create(&ldi_handle_hash_list[index],
+		    sizeof (struct ldi_handle),
+		    offsetof(struct ldi_handle, lh_node));
+	}
+
+	/*
+	 * Initialize the LDI event subsystem
+	 */
+	mutex_init(&ldi_ev_callback_list.le_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&ldi_ev_callback_list.le_cv, NULL, CV_DEFAULT, NULL);
+	ldi_ev_callback_list.le_busy = 0;
+	ldi_ev_callback_list.le_thread = NULL;
+	ldi_ev_callback_list.le_walker_next = NULL;
+	ldi_ev_callback_list.le_walker_prev = NULL;
+	list_create(&ldi_ev_callback_list.le_head,
+	    sizeof (ldi_ev_callback_impl_t),
+	    offsetof(ldi_ev_callback_impl_t, lec_list));
+
+	return (0);
+}
+
+void
+ldi_fini()
+{
+	/*
+	 * Teardown the LDI event subsystem
+	 */
+	ldi_ev_lock();
+#ifdef DEBUG
+	if (ldi_ev_callback_list.le_busy != 1 ||
+	    ldi_ev_callback_list.le_thread != curthread ||
+	    ldi_ev_callback_list.le_walker_next != NULL ||
+	    ldi_ev_callback_list.le_walker_prev != NULL) {
+		dprintf("%s still has %s %llu %s %p %s %p %s %p\n", __func__,
+		    "le_busy", ldi_ev_callback_list.le_busy,
+		    "le_thread", ldi_ev_callback_list.le_thread,
+		    "le_walker_next", ldi_ev_callback_list.le_walker_next,
+		    "le_walker_prev", ldi_ev_callback_list.le_walker_prev);
+	}
+#endif
+	list_destroy(&ldi_ev_callback_list.le_head);
+	ldi_ev_unlock();
+#ifdef DEBUG
+	ldi_ev_callback_list.le_busy = 0;
+	ldi_ev_callback_list.le_thread = NULL;
+	ldi_ev_callback_list.le_walker_next = NULL;
+	ldi_ev_callback_list.le_walker_prev = NULL;
+#endif
+
+	cv_destroy(&ldi_ev_callback_list.le_cv);
+	mutex_destroy(&ldi_ev_callback_list.le_lock);
+
+	if (ldi_handle_hash_count != 0) {
+		dprintf("%s ldi_handle_hash_count %llu\n", __func__,
+		    ldi_handle_hash_count);
+	}
+
+	/* Destroy handle hash lists and locks */
+	handle_hash_release();
+
+	/* Unregister sysctls */
+	sysctl_unregister_oid(&sysctl__ldi_debug_use_iokit_from_path);
+	sysctl_unregister_oid(&sysctl__ldi_debug_use_iokit_from_dev);
+	sysctl_unregister_oid(&sysctl__ldi_debug_use_dev_from_path);
+	sysctl_unregister_oid(&sysctl__ldi_debug_use_vnode_from_path);
+	sysctl_unregister_oid(&sysctl__ldi_debug);
+	sysctl_unregister_oid(&sysctl__ldi);
+
+	/* Unregister kstats */
+	if (ldi_ksp != NULL) {
+		kstat_delete(ldi_ksp);
+		ldi_ksp = NULL;
+	}
+
+	if (ldi_handle_hash_count != 0) {
+		dprintf("%s handle_hash_count still %llu\n", __func__,
+		    ldi_handle_hash_count);
+	}
+}
diff --git a/module/os/macos/zfs/ldi_vnode.c b/module/os/macos/zfs/ldi_vnode.c
new file mode 100644
index 0000000000..5a6ff4efc0
--- /dev/null
+++ b/module/os/macos/zfs/ldi_vnode.c
@@ -0,0 +1,1028 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * Portions of this document are copyright Oracle and Joyent.
+ * OS X implementation of ldi_ named functions for ZFS written by
+ * Evan Susarret in 2015.
+ */
+
+/*
+ * ZFS internal
+ */
+#include <sys/zfs_context.h>
+
+/*
+ * LDI Includes
+ */
+#include <sys/ldi_impl_osx.h>
+
+/* Debug prints */
+#ifdef DEBUG
+
+#ifdef dprintf
+#undef dprintf
+#endif
+
+#define	dprintf ldi_log
+
+#define	ldi_log(fmt, ...) do {		\
+	printf(fmt, __VA_ARGS__);	\
+	/* delay(hz>>1); */		\
+_NOTE(CONSTCOND) } while (0)
+#endif
+
+struct _handle_vnode {
+	vnode_t		*devvp;
+	char *vd_readlinkname;
+};	/* 16b */
+
+#define	LH_VNODE(lhp)	lhp->lh_tsd.vnode_tsd->devvp
+
+void
+handle_free_vnode(struct ldi_handle *lhp)
+{
+	if (!lhp) {
+		dprintf("%s missing lhp\n", __func__);
+		return;
+	}
+
+	if (!lhp->lh_tsd.vnode_tsd) {
+		dprintf("%s missing vnode_tsd\n", __func__);
+		return;
+	}
+
+	kmem_free(lhp->lh_tsd.vnode_tsd, sizeof (struct _handle_vnode));
+	lhp->lh_tsd.vnode_tsd = 0;
+}
+
+
+/* Returns handle with lock still held */
+struct ldi_handle *
+handle_alloc_vnode(dev_t device, int fmode)
+{
+	struct ldi_handle *lhp, *retlhp;
+
+	/* Search for existing handle */
+	if ((retlhp = handle_find(device, fmode, B_TRUE)) != NULL) {
+		dprintf("%s found handle before alloc\n", __func__);
+		return (retlhp);
+	}
+
+	/* Validate arguments */
+	if (device == 0 || fmode == 0) {
+		dprintf("%s missing dev_t %d or fmode %d\n",
+		    __func__, device, fmode);
+		return (NULL);
+	}
+
+	/* Allocate LDI vnode handle */
+	if ((lhp = handle_alloc_common(LDI_TYPE_VNODE, device,
+	    fmode)) == NULL) {
+		dprintf("%s couldn't allocate lhp\n", __func__);
+		return (NULL);
+	}
+
+	/* Allocate and clear type-specific device data */
+	lhp->lh_tsd.vnode_tsd = (struct _handle_vnode *)kmem_alloc(
+	    sizeof (struct _handle_vnode), KM_SLEEP);
+	LH_VNODE(lhp) = 0;
+
+	/* Add the handle to the list, or return match */
+	if ((retlhp = handle_add(lhp)) == NULL) {
+		dprintf("%s handle_add failed\n", __func__);
+		handle_release(lhp);
+		return (NULL);
+	}
+
+	/* Check if new or found handle was returned */
+	if (retlhp != lhp) {
+		dprintf("%s found handle after alloc\n", __func__);
+		handle_release(lhp);
+		lhp = 0;
+	}
+
+	return (retlhp);
+}
+
+int
+handle_close_vnode(struct ldi_handle *lhp)
+{
+	vfs_context_t context;
+	int error = EINVAL;
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(lhp->lh_type, ==, LDI_TYPE_VNODE);
+	ASSERT3U(LH_VNODE(lhp), !=, NULL);
+	ASSERT3U(lhp->lh_status, ==, LDI_STATUS_CLOSING);
+
+#ifdef DEBUG
+	/* Validate vnode and context */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		/* If getwithref failed, we can't call vnode_close.  */
+		LH_VNODE(lhp) = NULLVP;
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/* For read-write, clear mountedon flag and wait for writes */
+	if (lhp->lh_fmode & FWRITE) {
+		/* Wait for writes to complete */
+		error = vnode_waitforwrites(LH_VNODE(lhp), 0, 0, 0,
+		    "ldi::handle_close_vnode");
+		if (error != 0) {
+			dprintf("%s waitforwrites returned %d\n",
+			    __func__, error);
+		}
+	}
+
+	/* Drop usecount */
+	vnode_rele(LH_VNODE(lhp));
+
+	/* Drop iocount and refcount */
+	error = vnode_close(LH_VNODE(lhp),
+	    (lhp->lh_fmode & FWRITE ? FWASWRITTEN : 0),
+	    context);
+	/* Preserve error from vnode_close */
+
+	/* Clear handle devvp vnode pointer */
+	LH_VNODE(lhp) = NULLVP;
+	/* Drop VFS context */
+	vfs_context_rele(context);
+
+	if (error) {
+		dprintf("%s vnode_close error %d\n",
+		    __func__, error);
+	}
+	/* Return error from close */
+	return (error);
+}
+
+static int
+handle_open_vnode(struct ldi_handle *lhp, char *path)
+{
+	vfs_context_t context;
+	int error = EINVAL;
+
+	ASSERT3U(lhp, !=, NULL);
+	ASSERT3U(path, !=, NULL);
+	ASSERT3U(lhp->lh_type, ==, LDI_TYPE_VNODE);
+	ASSERT3U(lhp->lh_status, ==, LDI_STATUS_OPENING);
+
+	/* Validate path string */
+	if (!path || strlen(path) <= 1) {
+		dprintf("%s missing path\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Try to open the device by path (takes iocount) */
+	error = vnode_open(path, lhp->lh_fmode, 0, 0,
+	    &(LH_VNODE(lhp)), context);
+
+	if (error) {
+		dprintf("%s vnode_open error %d\n", __func__, error);
+		/* Return error from vnode_open */
+		return (error);
+	}
+
+	/* Increase usecount, saving error. */
+	error = vnode_ref(LH_VNODE(lhp));
+	if (error != 0) {
+		dprintf("%s couldn't vnode_ref\n", __func__);
+		vnode_close(LH_VNODE(lhp), lhp->lh_fmode, context);
+		/* Return error from vnode_ref */
+		return (error);
+	}
+
+	/* Verify vnode refers to a block device */
+	if (!vnode_isblk(LH_VNODE(lhp))) {
+		dprintf("%s %s is not a block device\n",
+		    __func__, path);
+		vnode_rele(LH_VNODE(lhp));
+		vnode_close(LH_VNODE(lhp), lhp->lh_fmode, context);
+		return (ENOTBLK);
+	}
+
+	/* Drop iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop VFS context */
+	vfs_context_rele(context);
+
+	return (0);
+}
+
+int
+handle_get_size_vnode(struct ldi_handle *lhp, uint64_t *dev_size)
+{
+	vfs_context_t context;
+	uint64_t blkcnt = 0;
+	uint32_t blksize = 0;
+	int error = EINVAL;
+
+#ifdef DEBUG
+	if (!lhp || !dev_size) {
+		dprintf("%s missing lhp or dev_size\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/* Fetch the blocksize */
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCGETBLOCKSIZE,
+	    (caddr_t)&blksize, 0, context);
+	error = (blksize == 0 ? ENODEV : error);
+
+	/* Fetch the block count */
+	error = (error ? error : VNOP_IOCTL(LH_VNODE(lhp),
+	    DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt,
+	    0, context));
+	error = (blkcnt == 0 ? ENODEV : error);
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop VFS context */
+	vfs_context_rele(context);
+
+	/* Cast both to 64-bit then multiply */
+	*dev_size = ((uint64_t)blksize * (uint64_t)blkcnt);
+	if (*dev_size == 0) {
+		dprintf("%s invalid blksize %u or blkcnt %llu\n",
+		    __func__, blksize, blkcnt);
+		return (ENODEV);
+	}
+	return (0);
+}
+
+int
+handle_get_dev_path_vnode(struct ldi_handle *lhp, char *path, int len)
+{
+	vfs_context_t context;
+	int error;
+
+	if (!lhp || !path || len == 0) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	if ((error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCGETFIRMWAREPATH,
+	    (caddr_t)path, len, context)) != 0) {
+		dprintf("%s VNOP_IOCTL error %d\n", __func__, error);
+		/* Preserve error to return */
+	}
+
+	/* Drop iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop VFS context */
+	vfs_context_rele(context);
+
+if (error == 0) dprintf("%s got device path [%s]\n", __func__, path);
+	return (error);
+}
+
+int
+handle_get_bootinfo_vnode(struct ldi_handle *lhp,
+    struct io_bootinfo *bootinfo)
+{
+	int error;
+
+	if (!lhp || !bootinfo) {
+		dprintf("%s missing argument\n", __func__);
+printf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	if ((error = handle_get_size_vnode(lhp,
+	    &bootinfo->dev_size)) != 0 ||
+	    (error = handle_get_dev_path_vnode(lhp, bootinfo->dev_path,
+	    sizeof (bootinfo->dev_path))) != 0) {
+		dprintf("%s get size or dev_path error %d\n",
+		    __func__, error);
+	}
+
+	return (error);
+}
+
+int
+handle_sync_vnode(struct ldi_handle *lhp)
+{
+	vfs_context_t context;
+	int error = EINVAL;
+
+#ifdef DEBUG
+	if (!lhp) {
+		dprintf("%s missing lhp\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/*
+	 * Flush out any old buffers remaining from a previous use.
+	 * buf_invalidateblks flushes UPL buffers, VNOP_FSYNC informs
+	 * the disk device to flush write buffers to disk.
+	 */
+	error = buf_invalidateblks(LH_VNODE(lhp), BUF_WRITE_DATA, 0, 0);
+
+	error = (error ? error : VNOP_FSYNC(LH_VNODE(lhp),
+	    MNT_WAIT, context));
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop VFS context */
+	vfs_context_rele(context);
+
+	if (error) {
+		dprintf("%s buf_invalidateblks or VNOP_FSYNC error %d\n",
+		    __func__, error);
+		return (ENOTSUP);
+	}
+	return (0);
+}
+
+/* vnode_lookup, find dev_t info */
+dev_t
+dev_from_path(char *path)
+{
+	vfs_context_t context;
+	vnode_t *devvp = NULLVP;
+	dev_t device;
+	int error = EINVAL;
+
+#ifdef DEBUG
+	/* Validate path */
+	if (path == 0 || strlen(path) <= 1 || path[0] != '/') {
+		dprintf("%s invalid path provided\n", __func__);
+		return (0);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (0);
+	}
+
+	/* Try to lookup the vnode by path */
+	error = vnode_lookup(path, 0, &devvp, context);
+	if (error || devvp == NULLVP) {
+		dprintf("%s vnode_lookup failed %d\n", __func__, error);
+		vfs_context_rele(context);
+		return (0);
+	}
+
+	/* Get the rdev of this vnode */
+	device = vnode_specrdev(devvp);
+
+	/* Drop iocount on devvp */
+	vnode_put(devvp);
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+#ifdef DEBUG
+	/* Validate dev_t */
+	if (device == 0) {
+		dprintf("%s invalid device\n", __func__);
+	}
+#endif
+
+	/* Return 0 or valid dev_t */
+	return (device);
+}
+
+/* Completion handler for vnode strategy */
+static void
+ldi_vnode_io_intr(buf_t bp, void *arg)
+{
+	ldi_buf_t *lbp = (ldi_buf_t *)arg;
+
+	ASSERT3U(bp, !=, NULL);
+	ASSERT3U(lbp, !=, NULL);
+
+	/* Copyout error and resid */
+	lbp->b_error = buf_error(bp);
+	lbp->b_resid = buf_resid(bp);
+
+#ifdef DEBUG
+	if (lbp->b_error || lbp->b_resid != 0) {
+		dprintf("%s io error %d resid %llu\n", __func__,
+		    lbp->b_error, lbp->b_resid);
+	}
+#endif
+
+	/* Teardown */
+	buf_free(bp);
+
+	/* Call original completion function */
+	if (lbp->b_iodone) {
+		lbp->b_iodone(lbp);
+	}
+}
+
+int
+buf_strategy_vnode(ldi_buf_t *lbp, struct ldi_handle *lhp)
+{
+	buf_t bp = 0;
+	int error = EINVAL;
+
+	ASSERT3U(lbp, !=, NULL);
+	ASSERT3U(lhp, !=, NULL);
+
+#ifdef DEBUG
+	if (!lbp || !lhp) {
+		dprintf("%s missing lbp or lhp\n", __func__);
+		return (EINVAL);
+	}
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s handle is not Online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and verify buf_t */
+	if (NULL == (bp = buf_alloc(LH_VNODE(lhp)))) {
+		dprintf("%s couldn't allocate buf_t\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Setup buffer */
+	buf_setflags(bp, B_NOCACHE | (lbp->b_flags & B_READ ?
+	    B_READ : B_WRITE));
+	buf_setcount(bp, lbp->b_bcount);
+	buf_setdataptr(bp, (uintptr_t)lbp->b_un.b_addr);
+	buf_setblkno(bp, lbp->b_lblkno);
+	buf_setlblkno(bp, lbp->b_lblkno);
+	buf_setsize(bp, lbp->b_bufsize);
+
+	/* For asynchronous IO */
+	if (lbp->b_iodone != NULL) {
+		buf_setcallback(bp, &ldi_vnode_io_intr, lbp);
+	}
+
+	/* Recheck instantaneous value of handle status */
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s device not online\n", __func__);
+		buf_free(bp);
+		return (ENODEV);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		buf_free(bp);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	if (!(lbp->b_flags & B_READ)) {
+		/* Does not return an error status */
+		vnode_startwrite(LH_VNODE(lhp));
+	}
+
+
+
+	/* Issue the IO, preserving error */
+	error = VNOP_STRATEGY(bp);
+
+	if (error) {
+		dprintf("%s VNOP_STRATEGY error %d\n",
+		    __func__, error);
+		/* Reclaim write count on vnode */
+		if (!(lbp->b_flags & B_READ)) {
+			vnode_writedone(LH_VNODE(lhp));
+		}
+		vnode_put(LH_VNODE(lhp));
+		buf_free(bp);
+		return (EIO);
+	}
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+
+	/* For synchronous IO, call completion */
+	if (lbp->b_iodone == NULL) {
+		ldi_vnode_io_intr(bp, (void*)lbp);
+	}
+
+	/* Pass error from VNOP_STRATEGY */
+	return (error);
+}
+
+/* Client interface, alloc and open vnode handle by pathname */
+int
+ldi_open_vnode_by_path(char *path, dev_t device,
+    int fmode, ldi_handle_t *lhp)
+{
+	struct ldi_handle *retlhp;
+	ldi_status_t status;
+	int error = EIO;
+
+	/* Validate arguments */
+	if (!path || strlen(path) <= 1 || device == 0 || !lhp) {
+		dprintf("%s invalid argument %p %d %p\n", __func__,
+		    path, device, lhp);
+		if (path) {
+			dprintf("*path string is %s\n", path);
+		}
+		return (EINVAL);
+	}
+	/* In debug build, be loud if we potentially leak a handle */
+	ASSERT3U(*(struct ldi_handle **)lhp, ==, NULL);
+
+	/* Allocate handle with path */
+	retlhp = handle_alloc_vnode(device, fmode);
+	if (retlhp == NULL) {
+		dprintf("%s couldn't allocate vnode handle\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Mark the handle as Opening, or increment openref */
+	status = handle_open_start(retlhp);
+	if (status == LDI_STATUS_ONLINE) {
+		dprintf("%s already online, refs %d, openrefs %d\n", __func__,
+		    retlhp->lh_ref, retlhp->lh_openref);
+		/* Cast retlhp and assign to lhp (may be 0) */
+		*lhp = (ldi_handle_t)retlhp;
+		/* Successfully incremented open ref in open_start */
+		return (0);
+	}
+
+	/* If state is now Opening, try to open device by vnode */
+	if (status != LDI_STATUS_OPENING ||
+	    (error = handle_open_vnode(retlhp, path)) != 0) {
+		dprintf("%s Couldn't open handle\n", __func__);
+		handle_open_done(retlhp, LDI_STATUS_CLOSED);
+		handle_release(retlhp);
+		retlhp = 0;
+		return ((error == EACCES) ? EROFS:EIO);
+	}
+	handle_open_done(retlhp, LDI_STATUS_ONLINE);
+
+	/* Register for disk notifications */
+	handle_register_notifier(retlhp);
+
+	/* Cast retlhp and assign to lhp (may be 0) */
+	*lhp = (ldi_handle_t)retlhp;
+	/* Pass error from open */
+	return (error);
+}
+
+int
+handle_get_media_info_vnode(struct ldi_handle *lhp,
+    struct dk_minfo *dkm)
+{
+	vfs_context_t context;
+	uint32_t blksize;
+	uint64_t blkcount;
+	int error;
+
+#ifdef DEBUG
+	if (!lhp || !dkm) {
+		dprintf("%s missing lhp or dkm\n", __func__);
+		return (EINVAL);
+	}
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s handle is not Online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (0);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/* Get the blocksize and block count */
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCGETBLOCKSIZE,
+	    (caddr_t)&blksize, 0, context);
+	error = (error ? error : VNOP_IOCTL(LH_VNODE(lhp),
+	    DKIOCGETBLOCKCOUNT, (caddr_t)&blkcount,
+	    0, context));
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+	if (error) {
+		dkm->dki_capacity = 0;
+		dkm->dki_lbsize = 0;
+		return (error);
+	}
+
+	/* If successful, set return values */
+	dkm->dki_capacity = blkcount;
+	dkm->dki_lbsize = blksize;
+	return (0);
+}
+
+int
+handle_get_media_info_ext_vnode(struct ldi_handle *lhp,
+    struct dk_minfo_ext *dkmext)
+{
+	vfs_context_t context;
+	uint32_t blksize, pblksize;
+	uint64_t blkcount;
+	int error;
+
+#ifdef DEBUG
+	if (!lhp || !dkmext) {
+		dprintf("%s missing lhp or dkmext\n", __func__);
+		return (EINVAL);
+	}
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s handle is not Online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Validate vnode and context */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode or context\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (0);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/* Get the blocksize, physical blocksize, and block count */
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCGETBLOCKSIZE,
+	    (caddr_t)&blksize, 0, context);
+	error = (error ? error : VNOP_IOCTL(LH_VNODE(lhp),
+	    DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&pblksize,
+	    0, context));
+	error = (error ? error : VNOP_IOCTL(LH_VNODE(lhp),
+	    DKIOCGETBLOCKCOUNT, (caddr_t)&blkcount,
+	    0, context));
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+	if (error) {
+		dkmext->dki_capacity = 0;
+		dkmext->dki_lbsize = 0;
+		dkmext->dki_pbsize = 0;
+		return (error);
+	}
+
+	/* If successful, set return values */
+	dkmext->dki_capacity = blkcount;
+	dkmext->dki_lbsize = blksize;
+	dkmext->dki_pbsize = pblksize;
+	return (0);
+}
+
+int
+handle_check_media_vnode(struct ldi_handle *lhp, int *status)
+{
+	if (!lhp || !status) {
+		dprintf("%s missing lhp or invalid status\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate vnode and context */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+
+	/* XXX As yet unsupported */
+	return (ENOTSUP);
+
+	/* Check if the device is available and responding */
+	return (0);
+}
+
+int
+handle_is_solidstate_vnode(struct ldi_handle *lhp, int *isssd)
+{
+	vfs_context_t context;
+	int error;
+
+	if (!lhp || !isssd) {
+		dprintf("%s missing lhp or invalid status\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCISSOLIDSTATE,
+	    (caddr_t)isssd, 0, context);
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+	return (error);
+}
+
+int
+handle_features_vnode(struct ldi_handle *lhp,
+    uint32_t *features)
+{
+	vfs_context_t context;
+	int error;
+
+#ifdef DEBUG
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s handle is not Online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (0);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+
+	/* All code paths from here must vnode_put. */
+
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCGETFEATURES,
+	    (caddr_t)features, 0, context);
+
+	if (error) {
+		printf("%s: 0x%x\n", __func__, error);
+	}
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+	return (error);
+}
+
+int
+handle_unmap_vnode(struct ldi_handle *lhp,
+    dkioc_free_list_ext_t *dkm)
+{
+	vfs_context_t context;
+	int error;
+
+#ifdef DEBUG
+	if (!lhp || !dkm) {
+		dprintf("%s missing lhp or dkm\n", __func__);
+		return (EINVAL);
+	}
+	if (lhp->lh_status != LDI_STATUS_ONLINE) {
+		dprintf("%s handle is not Online\n", __func__);
+		return (ENODEV);
+	}
+
+	/* Validate vnode */
+	if (LH_VNODE(lhp) == NULLVP) {
+		dprintf("%s missing vnode\n", __func__);
+		return (ENODEV);
+	}
+#endif
+
+	/* Allocate and validate context */
+	context = vfs_context_create(spl_vfs_context_kernel());
+	if (!context) {
+		dprintf("%s couldn't create VFS context\n", __func__);
+		return (0);
+	}
+
+	/* Take an iocount on devvp vnode. */
+	error = vnode_getwithref(LH_VNODE(lhp));
+	if (error) {
+		dprintf("%s vnode_getwithref error %d\n",
+		    __func__, error);
+		vfs_context_rele(context);
+		return (ENODEV);
+	}
+	/* All code paths from here must vnode_put. */
+
+	/* We need to convert illumos' dkioc_free_list_t to dk_unmap_t */
+	/* We only support 1 entry now */
+	dk_unmap_t dkun = { 0 };
+	dk_extent_t ext;
+	dkun.extentsCount = 1;
+	dkun.extents = &ext;
+	ext.offset = dkm->dfle_start;
+	ext.length = dkm->dfle_length;
+
+	/*
+	 * dkm->dfl_flags vs dkun.options
+	 * #define DF_WAIT_SYNC 0x00000001 Wait for full write-out of free.
+	 * #define _DK_UNMAP_INITIALIZE    0x00000100
+	 */
+
+	/* issue unmap */
+	error = VNOP_IOCTL(LH_VNODE(lhp), DKIOCUNMAP,
+	    (caddr_t)&dkun, 0, context);
+
+	if (error) {
+		dprintf("%s unmap: 0x%x for off %llx size %llx\n", __func__,
+		    error, ext.offset, ext.length);
+	}
+
+	/* Release iocount on vnode (still has usecount) */
+	vnode_put(LH_VNODE(lhp));
+	/* Drop vfs_context */
+	vfs_context_rele(context);
+
+	return (error);
+}
diff --git a/module/os/macos/zfs/policy.c b/module/os/macos/zfs/policy.c
new file mode 100644
index 0000000000..5525302266
--- /dev/null
+++ b/module/os/macos/zfs/policy.c
@@ -0,0 +1,354 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright (C) 2016 Lawrence Livermore National Security, LLC.
+ *
+ * For Linux the vast majority of this enforcement is already handled via
+ * the standard Linux VFS permission checks.  However certain administrative
+ * commands which bypass the standard mechanisms may need to make use of
+ * this functionality.
+ */
+
+#include <sys/policy.h>
+#include <linux/security.h>
+#include <linux/vfs_compat.h>
+
+/*
+ * The passed credentials cannot be directly verified because Linux only
+ * provides and interface to check the *current* process credentials.  In
+ * order to handle this the capable() test is only run when the passed
+ * credentials match the current process credentials or the kcred.  In
+ * all other cases this function must fail and return the passed err.
+ */
+static int
+priv_policy_ns(const cred_t *cr, int capability, boolean_t all, int err,
+    struct user_namespace *ns)
+{
+	ASSERT3S(all, ==, B_FALSE);
+
+	if (cr != CRED() && (cr != kcred))
+		return (err);
+
+#if defined(CONFIG_USER_NS)
+	if (!(ns ? ns_capable(ns, capability) : capable(capability)))
+#else
+	if (!capable(capability))
+#endif
+		return (err);
+
+	return (0);
+}
+
+static int
+priv_policy(const cred_t *cr, int capability, boolean_t all, int err)
+{
+	return (priv_policy_ns(cr, capability, all, err, NULL));
+}
+
+static int
+priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err)
+{
+	/*
+	 * All priv_policy_user checks are preceded by kuid/kgid_has_mapping()
+	 * checks. If we cannot do them, we shouldn't be using ns_capable()
+	 * since we don't know whether the affected files are valid in our
+	 * namespace.
+	 */
+#if defined(CONFIG_USER_NS)
+	return (priv_policy_ns(cr, capability, all, err, cr->user_ns));
+#else
+	return (priv_policy_ns(cr, capability, all, err, NULL));
+#endif
+}
+
+/*
+ * Checks for operations that are either client-only or are used by
+ * both clients and servers.
+ */
+int
+secpolicy_nfs(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
+}
+
+/*
+ * Catch all system configuration.
+ */
+int
+secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
+}
+
+/*
+ * Like secpolicy_vnode_access() but we get the actual wanted mode and the
+ * current mode of the file, not the missing bits.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
+    mode_t curmode, mode_t wantmode)
+{
+	return (0);
+}
+
+/*
+ * This is a special routine for ZFS; it is used to determine whether
+ * any of the privileges in effect allow any form of access to the
+ * file.  There's no reason to audit this or any reason to record
+ * this.  More work is needed to do the "KPLD" stuff.
+ */
+int
+secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
+{
+	if (crgetfsuid(cr) == owner)
+		return (0);
+
+	if (inode_owner_or_capable(ip))
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	if (priv_policy_user(cr, CAP_DAC_OVERRIDE, B_FALSE, EPERM) == 0)
+		return (0);
+
+	if (priv_policy_user(cr, CAP_DAC_READ_SEARCH, B_FALSE, EPERM) == 0)
+		return (0);
+
+	return (EPERM);
+}
+
+/*
+ * Determine if subject can chown owner of a file.
+ */
+int
+secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
+{
+	if (crgetfsuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Determine if subject can change group ownership of a file.
+ */
+int
+secpolicy_vnode_create_gid(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SETGID, B_FALSE, EPERM));
+}
+
+/*
+ * Policy determines whether we can remove an entry from a directory,
+ * regardless of permission bits.
+ */
+int
+secpolicy_vnode_remove(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Determine that subject can modify the mode of a file.  allzone privilege
+ * needed when modifying root owned object.
+ */
+int
+secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
+{
+	if (crgetfsuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Are we allowed to retain the set-uid/set-gid bits when
+ * changing ownership or when writing to a file?
+ * "issuid" should be true when set-uid; only in that case
+ * root ownership is checked (setgid is assumed).
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
+{
+	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
+}
+
+/*
+ * Determine that subject can set the file setgid flag.
+ */
+int
+secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
+{
+#if defined(CONFIG_USER_NS)
+	if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
+		return (EPERM);
+#endif
+	if (crgetfsgid(cr) != gid && !groupmember(gid, cr))
+		return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
+
+	return (0);
+}
+
+/*
+ * Determine if the subject can inject faults in the ZFS fault injection
+ * framework.  Requires all privileges.
+ */
+int
+secpolicy_zinject(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
+}
+
+/*
+ * Determine if the subject has permission to manipulate ZFS datasets
+ * (not pools).  Equivalent to the SYS_MOUNT privilege.
+ */
+int
+secpolicy_zfs(const cred_t *cr)
+{
+	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
+}
+
+void
+secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
+{
+	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
+	    secpolicy_vnode_setid_retain(cr,
+	    (vap->va_mode & S_ISUID) != 0 &&
+	    (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
+		vap->va_mask |= AT_MODE;
+		vap->va_mode &= ~(S_ISUID|S_ISGID);
+	}
+}
+
+/*
+ * Determine that subject can set the file setid flags.
+ */
+static int
+secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
+{
+	if (crgetfsuid(cr) == owner)
+		return (0);
+
+#if defined(CONFIG_USER_NS)
+	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
+		return (EPERM);
+#endif
+
+	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
+}
+
+/*
+ * Determine that subject can make a file a "sticky".
+ *
+ * Enforced in the Linux VFS.
+ */
+static int
+secpolicy_vnode_stky_modify(const cred_t *cr)
+{
+	return (0);
+}
+
+int
+secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
+    const vattr_t *ovap, cred_t *cr)
+{
+	int error;
+
+	if ((vap->va_mode & S_ISUID) != 0 &&
+	    (error = secpolicy_vnode_setid_modify(cr,
+	    ovap->va_uid)) != 0) {
+		return (error);
+	}
+
+	/*
+	 * Check privilege if attempting to set the
+	 * sticky bit on a non-directory.
+	 */
+	if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
+	    secpolicy_vnode_stky_modify(cr) != 0) {
+		vap->va_mode &= ~S_ISVTX;
+	}
+
+	/*
+	 * Check for privilege if attempting to set the
+	 * group-id bit.
+	 */
+	if ((vap->va_mode & S_ISGID) != 0 &&
+	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
+		vap->va_mode &= ~S_ISGID;
+	}
+
+	return (0);
+}
+
+/*
+ * Check privileges for setting xvattr attributes
+ */
+int
+secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, mode_t type)
+{
+	return (secpolicy_vnode_chown(cr, owner));
+}
+
+/*
+ * Check privileges for setattr attributes.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
+    const struct vattr *ovap, int flags,
+    int unlocked_access(void *, int, cred_t *), void *node)
+{
+	return (0);
+}
+
+/*
+ * Check privileges for links.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_basic_link(const cred_t *cr)
+{
+	return (0);
+}
diff --git a/module/os/macos/zfs/qat.c b/module/os/macos/zfs/qat.c
new file mode 100644
index 0000000000..08613b3a20
--- /dev/null
+++ b/module/os/macos/zfs/qat.c
@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <sys/zfs_context.h>
+#include <sys/qat.h>
+
+qat_stats_t qat_stats = {
+	{ "comp_requests",			KSTAT_DATA_UINT64 },
+	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_requests",			KSTAT_DATA_UINT64 },
+	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "dc_fails",				KSTAT_DATA_UINT64 },
+	{ "encrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "encrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "encrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "decrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "crypt_fails",			KSTAT_DATA_UINT64 },
+	{ "cksum_requests",			KSTAT_DATA_UINT64 },
+	{ "cksum_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "cksum_fails",			KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *qat_ksp = NULL;
+
+CpaStatus
+qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
+{
+	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
+	if (*pp_mem_addr == NULL)
+		return (CPA_STATUS_RESOURCE);
+	return (CPA_STATUS_SUCCESS);
+}
+
+void
+qat_mem_free_contig(void **pp_mem_addr)
+{
+	if (*pp_mem_addr != NULL) {
+		kfree(*pp_mem_addr);
+		*pp_mem_addr = NULL;
+	}
+}
+
+int
+qat_init(void)
+{
+	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (qat_ksp != NULL) {
+		qat_ksp->ks_data = &qat_stats;
+		kstat_install(qat_ksp);
+	}
+
+	/*
+	 * Just set the disable flag when qat init failed, qat can be
+	 * turned on again in post-process after zfs module is loaded, e.g.:
+	 * echo 0 > /sys/module/zfs/parameters/zfs_qat_compress_disable
+	 */
+	if (qat_dc_init() != 0)
+		zfs_qat_compress_disable = 1;
+
+	if (qat_cy_init() != 0) {
+		zfs_qat_checksum_disable = 1;
+		zfs_qat_encrypt_disable = 1;
+	}
+
+	return (0);
+}
+
+void
+qat_fini(void)
+{
+	if (qat_ksp != NULL) {
+		kstat_delete(qat_ksp);
+		qat_ksp = NULL;
+	}
+
+	qat_cy_fini();
+	qat_dc_fini();
+}
+
+#endif
diff --git a/module/os/macos/zfs/qat_compress.c b/module/os/macos/zfs/qat_compress.c
new file mode 100644
index 0000000000..ad3ead3b16
--- /dev/null
+++ b/module/os/macos/zfs/qat_compress.c
@@ -0,0 +1,569 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include <sys/byteorder.h>
+#include <sys/zio.h>
+#include <sys/qat.h>
+
+/*
+ * Max instances in a QAT device, each instance is a channel to submit
+ * jobs to QAT hardware, this is only for pre-allocating instance and
+ * session arrays; the actual number of instances are defined in the
+ * QAT driver's configuration file.
+ */
+#define	QAT_DC_MAX_INSTANCES	48
+
+/*
+ * ZLIB head and foot size
+ */
+#define	ZLIB_HEAD_SZ		2
+#define	ZLIB_FOOT_SZ		4
+
+static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
+static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
+static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
+static Cpa16U num_inst = 0;
+static Cpa32U inst_num = 0;
+static boolean_t qat_dc_init_done = B_FALSE;
+int zfs_qat_compress_disable = 0;
+
+boolean_t
+qat_dc_use_accel(size_t s_len)
+{
+	return (!zfs_qat_compress_disable &&
+	    qat_dc_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+static void
+qat_dc_callback(void *p_callback, CpaStatus status)
+{
+	if (p_callback != NULL)
+		complete((struct completion *)p_callback);
+}
+
+static void
+qat_dc_clean(void)
+{
+	Cpa16U buff_num = 0;
+	Cpa16U num_inter_buff_lists = 0;
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		cpaDcStopInstance(dc_inst_handles[i]);
+		QAT_PHYS_CONTIG_FREE(session_handles[i]);
+		/* free intermediate buffers  */
+		if (buffer_array[i] != NULL) {
+			cpaDcGetNumIntermediateBuffers(
+			    dc_inst_handles[i], &num_inter_buff_lists);
+			for (buff_num = 0; buff_num < num_inter_buff_lists;
+			    buff_num++) {
+				CpaBufferList *buffer_inter =
+				    buffer_array[i][buff_num];
+				if (buffer_inter->pBuffers) {
+					QAT_PHYS_CONTIG_FREE(
+					    buffer_inter->pBuffers->pData);
+					QAT_PHYS_CONTIG_FREE(
+					    buffer_inter->pBuffers);
+				}
+				QAT_PHYS_CONTIG_FREE(
+				    buffer_inter->pPrivateMetaData);
+				QAT_PHYS_CONTIG_FREE(buffer_inter);
+			}
+		}
+	}
+
+	num_inst = 0;
+	qat_dc_init_done = B_FALSE;
+}
+
+int
+qat_dc_init(void)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U sess_size = 0;
+	Cpa32U ctx_size = 0;
+	Cpa16U num_inter_buff_lists = 0;
+	Cpa16U buff_num = 0;
+	Cpa32U buff_meta_size = 0;
+	CpaDcSessionSetupData sd = {0};
+
+	if (qat_dc_init_done)
+		return (0);
+
+	status = cpaDcGetNumInstances(&num_inst);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	/* if the user has configured no QAT compression units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_DC_MAX_INSTANCES)
+		num_inst = QAT_DC_MAX_INSTANCES;
+
+	status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		cpaDcSetAddressTranslation(dc_inst_handles[i],
+		    (void*)virt_to_phys);
+
+		status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
+		    1, &buff_meta_size);
+
+		if (status == CPA_STATUS_SUCCESS)
+			status = cpaDcGetNumIntermediateBuffers(
+			    dc_inst_handles[i], &num_inter_buff_lists);
+
+		if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
+			status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
+			    num_inter_buff_lists *
+			    sizeof (CpaBufferList *));
+
+		for (buff_num = 0; buff_num < num_inter_buff_lists;
+		    buff_num++) {
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num],
+				    sizeof (CpaBufferList));
+
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->
+				    pPrivateMetaData,
+				    buff_meta_size);
+
+			if (status == CPA_STATUS_SUCCESS)
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->pBuffers,
+				    sizeof (CpaFlatBuffer));
+
+			if (status == CPA_STATUS_SUCCESS) {
+				/*
+				 *  implementation requires an intermediate
+				 *  buffer approximately twice the size of
+				 *  output buffer, which is 2x max buffer
+				 *  size here.
+				 */
+				status = QAT_PHYS_CONTIG_ALLOC(
+				    &buffer_array[i][buff_num]->pBuffers->
+				    pData, 2 * QAT_MAX_BUF_SIZE);
+				if (status != CPA_STATUS_SUCCESS)
+					goto fail;
+
+				buffer_array[i][buff_num]->numBuffers = 1;
+				buffer_array[i][buff_num]->pBuffers->
+				    dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
+			}
+		}
+
+		status = cpaDcStartInstance(dc_inst_handles[i],
+		    num_inter_buff_lists, buffer_array[i]);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+
+		sd.compLevel = CPA_DC_L1;
+		sd.compType = CPA_DC_DEFLATE;
+		sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
+		sd.sessDirection = CPA_DC_DIR_COMBINED;
+		sd.sessState = CPA_DC_STATELESS;
+		sd.deflateWindowSize = 7;
+		sd.checksum = CPA_DC_ADLER32;
+		status = cpaDcGetSessionSize(dc_inst_handles[i],
+		    &sd, &sess_size, &ctx_size);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+
+		QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
+		if (session_handles[i] == NULL)
+			goto fail;
+
+		status = cpaDcInitSession(dc_inst_handles[i],
+		    session_handles[i],
+		    &sd, NULL, qat_dc_callback);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+	}
+
+	qat_dc_init_done = B_TRUE;
+	return (0);
+fail:
+	qat_dc_clean();
+	return (-1);
+}
+
+void
+qat_dc_fini(void)
+{
+	if (!qat_dc_init_done)
+		return;
+
+	qat_dc_clean();
+}
+
+/*
+ * The "add" parameter is an additional buffer which is passed
+ * to QAT as a scratch buffer alongside the destination buffer
+ * in case the "compressed" data ends up being larger than the
+ * original source data. This is necessary to prevent QAT from
+ * generating buffer overflow warnings for incompressible data.
+ */
+static int
+qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
+    char *dst, int dst_len, char *add, int add_len, size_t *c_len)
+{
+	CpaInstanceHandle dc_inst_handle;
+	CpaDcSessionHandle session_handle;
+	CpaBufferList *buf_list_src = NULL;
+	CpaBufferList *buf_list_dst = NULL;
+	CpaFlatBuffer *flat_buf_src = NULL;
+	CpaFlatBuffer *flat_buf_dst = NULL;
+	Cpa8U *buffer_meta_src = NULL;
+	Cpa8U *buffer_meta_dst = NULL;
+	Cpa32U buffer_meta_size = 0;
+	CpaDcRqResults dc_results;
+	CpaStatus status = CPA_STATUS_FAIL;
+	Cpa32U hdr_sz = 0;
+	Cpa32U compressed_sz;
+	Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
+	Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
+	Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left;
+	Cpa32U dst_pages = 0;
+	Cpa32U adler32 = 0;
+	char *data;
+	struct page *page;
+	struct page **in_pages = NULL;
+	struct page **out_pages = NULL;
+	struct page **add_pages = NULL;
+	Cpa32U page_off = 0;
+	struct completion complete;
+	Cpa32U page_num = 0;
+	Cpa16U i;
+
+	/*
+	 * We increment num_src_buf and num_dst_buf by 2 to allow
+	 * us to handle non page-aligned buffer addresses and buffers
+	 * whose sizes are not divisible by PAGE_SIZE.
+	 */
+	Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
+	    (num_src_buf * sizeof (CpaFlatBuffer));
+	Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
+	    ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
+
+	status = QAT_PHYS_CONTIG_ALLOC(&in_pages,
+	    num_src_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&out_pages,
+	    num_dst_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&add_pages,
+	    num_add_buf * sizeof (struct page *));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	dc_inst_handle = dc_inst_handles[i];
+	session_handle = session_handles[i];
+
+	cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
+	    &buffer_meta_size);
+	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
+	    &buffer_meta_size);
+	status = QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* build source buffer list */
+	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
+
+	buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
+
+	/* build destination buffer list */
+	status = QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
+
+	buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
+
+	buf_list_src->numBuffers = 0;
+	buf_list_src->pPrivateMetaData = buffer_meta_src;
+	bytes_left = src_len;
+	data = src;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		in_pages[page_num] = page;
+		flat_buf_src->pData = kmap(page) + page_off;
+		flat_buf_src->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_src->dataLenInBytes;
+		data += flat_buf_src->dataLenInBytes;
+		flat_buf_src++;
+		buf_list_src->numBuffers++;
+		page_num++;
+	}
+
+	buf_list_dst->numBuffers = 0;
+	buf_list_dst->pPrivateMetaData = buffer_meta_dst;
+	bytes_left = dst_len;
+	data = dst;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		flat_buf_dst->pData = kmap(page) + page_off;
+		out_pages[page_num] = page;
+		flat_buf_dst->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_dst->dataLenInBytes;
+		data += flat_buf_dst->dataLenInBytes;
+		flat_buf_dst++;
+		buf_list_dst->numBuffers++;
+		page_num++;
+		dst_pages++;
+	}
+
+	/* map additional scratch pages into the destination buffer list */
+	bytes_left = add_len;
+	data = add;
+	page_num = 0;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		page = qat_mem_to_page(data);
+		flat_buf_dst->pData = kmap(page) + page_off;
+		add_pages[page_num] = page;
+		flat_buf_dst->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+
+		bytes_left -= flat_buf_dst->dataLenInBytes;
+		data += flat_buf_dst->dataLenInBytes;
+		flat_buf_dst++;
+		buf_list_dst->numBuffers++;
+		page_num++;
+	}
+
+	init_completion(&complete);
+
+	if (dir == QAT_COMPRESS) {
+		QAT_STAT_BUMP(comp_requests);
+		QAT_STAT_INCR(comp_total_in_bytes, src_len);
+
+		cpaDcGenerateHeader(session_handle,
+		    buf_list_dst->pBuffers, &hdr_sz);
+		buf_list_dst->pBuffers->pData += hdr_sz;
+		buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
+		status = cpaDcCompressData(
+		    dc_inst_handle, session_handle,
+		    buf_list_src, buf_list_dst,
+		    &dc_results, CPA_DC_FLUSH_FINAL,
+		    &complete);
+		if (status != CPA_STATUS_SUCCESS) {
+			goto fail;
+		}
+
+		/* we now wait until the completion of the operation. */
+		wait_for_completion(&complete);
+
+		if (dc_results.status != CPA_STATUS_SUCCESS) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		compressed_sz = dc_results.produced;
+		if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
+			status = CPA_STATUS_INCOMPRESSIBLE;
+			goto fail;
+		}
+
+		flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
+		/* move to the last page */
+		flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
+
+		/* no space for gzip footer in the last page */
+		if (((compressed_sz + hdr_sz) % PAGE_SIZE)
+		    + ZLIB_FOOT_SZ > PAGE_SIZE) {
+			status = CPA_STATUS_INCOMPRESSIBLE;
+			goto fail;
+		}
+
+		/* jump to the end of the buffer and append footer */
+		flat_buf_dst->pData =
+		    (char *)((unsigned long)flat_buf_dst->pData & PAGE_MASK)
+		    + ((compressed_sz + hdr_sz) % PAGE_SIZE);
+		flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
+
+		dc_results.produced = 0;
+		status = cpaDcGenerateFooter(session_handle,
+		    flat_buf_dst, &dc_results);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+
+		*c_len = compressed_sz + dc_results.produced + hdr_sz;
+		QAT_STAT_INCR(comp_total_out_bytes, *c_len);
+	} else {
+		ASSERT3U(dir, ==, QAT_DECOMPRESS);
+		QAT_STAT_BUMP(decomp_requests);
+		QAT_STAT_INCR(decomp_total_in_bytes, src_len);
+
+		buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
+		buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
+		status = cpaDcDecompressData(dc_inst_handle, session_handle,
+		    buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
+		    &complete);
+
+		if (CPA_STATUS_SUCCESS != status) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		/* we now wait until the completion of the operation. */
+		wait_for_completion(&complete);
+
+		if (dc_results.status != CPA_STATUS_SUCCESS) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+
+		/* verify adler checksum */
+		adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
+		if (adler32 != BSWAP_32(dc_results.checksum)) {
+			status = CPA_STATUS_FAIL;
+			goto fail;
+		}
+		*c_len = dc_results.produced;
+		QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
+	}
+
+fail:
+	if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
+		QAT_STAT_BUMP(dc_fails);
+
+	if (in_pages) {
+		for (page_num = 0;
+		    page_num < buf_list_src->numBuffers;
+		    page_num++) {
+			kunmap(in_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(in_pages);
+	}
+
+	if (out_pages) {
+		for (page_num = 0; page_num < dst_pages; page_num++) {
+			kunmap(out_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(out_pages);
+	}
+
+	if (add_pages) {
+		for (page_num = 0;
+		    page_num < buf_list_dst->numBuffers - dst_pages;
+		    page_num++) {
+			kunmap(add_pages[page_num]);
+		}
+		QAT_PHYS_CONTIG_FREE(add_pages);
+	}
+
+	QAT_PHYS_CONTIG_FREE(buffer_meta_src);
+	QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
+	QAT_PHYS_CONTIG_FREE(buf_list_src);
+	QAT_PHYS_CONTIG_FREE(buf_list_dst);
+
+	return (status);
+}
+
+/*
+ * Entry point for QAT accelerated compression / decompression.
+ */
+int
+qat_compress(qat_compress_dir_t dir, char *src, int src_len,
+    char *dst, int dst_len, size_t *c_len)
+{
+	int ret;
+	size_t add_len = 0;
+	void *add = NULL;
+
+	if (dir == QAT_COMPRESS) {
+		add_len = dst_len;
+		add = zio_data_buf_alloc(add_len);
+	}
+
+	ret = qat_compress_impl(dir, src, src_len, dst,
+	    dst_len, add, add_len, c_len);
+
+	if (dir == QAT_COMPRESS)
+		zio_data_buf_free(add, add_len);
+
+	return (ret);
+}
+
+static int
+param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * zfs_qat_compress_disable = 0: enable qat compress
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_dc_init_done) {
+		ret = qat_dc_init();
+		if (ret != 0) {
+			zfs_qat_compress_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
+    param_get_int, &zfs_qat_compress_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
+
+#endif
diff --git a/module/os/macos/zfs/qat_crypt.c b/module/os/macos/zfs/qat_crypt.c
new file mode 100644
index 0000000000..4771b2f3be
--- /dev/null
+++ b/module/os/macos/zfs/qat_crypt.c
@@ -0,0 +1,630 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * This file represents the QAT implementation of checksums and encryption.
+ * Internally, QAT shares the same cryptographic instances for both of these
+ * operations, so the code has been combined here. QAT data compression uses
+ * compression instances, so that code is separated into qat_compress.c
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include <sys/zio_crypt.h>
+#include "lac/cpa_cy_im.h"
+#include "lac/cpa_cy_common.h"
+#include <sys/qat.h>
+
+/*
+ * Max instances in a QAT device, each instance is a channel to submit
+ * jobs to QAT hardware, this is only for pre-allocating instances
+ * and session arrays; the actual number of instances are defined in
+ * the QAT driver's configure file.
+ */
+#define	QAT_CRYPT_MAX_INSTANCES		48
+
+#define	MAX_PAGE_NUM			1024
+
+static Cpa32U inst_num = 0;
+static Cpa16U num_inst = 0;
+static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
+static boolean_t qat_cy_init_done = B_FALSE;
+int zfs_qat_encrypt_disable = 0;
+int zfs_qat_checksum_disable = 0;
+
+typedef struct cy_callback {
+	CpaBoolean verify_result;
+	struct completion complete;
+} cy_callback_t;
+
+static void
+symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
+    void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
+{
+	cy_callback_t *cb = p_callback;
+
+	if (cb != NULL) {
+		/* indicate that the function has been called */
+		cb->verify_result = verify;
+		complete(&cb->complete);
+	}
+}
+
+boolean_t
+qat_crypt_use_accel(size_t s_len)
+{
+	return (!zfs_qat_encrypt_disable &&
+	    qat_cy_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+boolean_t
+qat_checksum_use_accel(size_t s_len)
+{
+	return (!zfs_qat_checksum_disable &&
+	    qat_cy_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+void
+qat_cy_clean(void)
+{
+	for (Cpa16U i = 0; i < num_inst; i++)
+		cpaCyStopInstance(cy_inst_handles[i]);
+
+	num_inst = 0;
+	qat_cy_init_done = B_FALSE;
+}
+
+int
+qat_cy_init(void)
+{
+	CpaStatus status = CPA_STATUS_FAIL;
+
+	if (qat_cy_init_done)
+		return (0);
+
+	status = cpaCyGetNumInstances(&num_inst);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	/* if the user has configured no QAT encryption units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_CRYPT_MAX_INSTANCES)
+		num_inst = QAT_CRYPT_MAX_INSTANCES;
+
+	status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	for (Cpa16U i = 0; i < num_inst; i++) {
+		status = cpaCySetAddressTranslation(cy_inst_handles[i],
+		    (void *)virt_to_phys);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+
+		status = cpaCyStartInstance(cy_inst_handles[i]);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	qat_cy_init_done = B_TRUE;
+	return (0);
+
+error:
+	qat_cy_clean();
+	return (-1);
+}
+
+void
+qat_cy_fini(void)
+{
+	if (!qat_cy_init_done)
+		return;
+
+	qat_cy_clean();
+}
+
+static CpaStatus
+qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
+    Cpa64U crypt, Cpa32U aad_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U ctx_size;
+	Cpa32U ciper_algorithm;
+	Cpa32U hash_algorithm;
+	CpaCySymSessionSetupData sd = { 0 };
+
+	if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
+		return (CPA_STATUS_FAIL);
+	} else {
+		ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
+		hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
+	}
+
+	sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
+	sd.cipherSetupData.pCipherKey = key->ck_data;
+	sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
+	sd.hashSetupData.hashAlgorithm = hash_algorithm;
+	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
+	sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
+	sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
+	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+	sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
+	sd.digestIsAppended = CPA_FALSE;
+	sd.verifyDigest = CPA_FALSE;
+
+	if (dir == QAT_ENCRYPT) {
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
+	} else {
+		ASSERT3U(dir, ==, QAT_DECRYPT);
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
+	}
+
+	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+	    *cy_session_ctx);
+	if (status != CPA_STATUS_SUCCESS) {
+		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+		return (status);
+	}
+
+	return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U ctx_size;
+	Cpa32U hash_algorithm;
+	CpaCySymSessionSetupData sd = { 0 };
+
+	/*
+	 * ZFS's SHA512 checksum is actually SHA512/256, which uses
+	 * a different IV from standard SHA512. QAT does not support
+	 * SHA512/256, so we can only support SHA256.
+	 */
+	if (cksum == ZIO_CHECKSUM_SHA256)
+		hash_algorithm = CPA_CY_SYM_HASH_SHA256;
+	else
+		return (CPA_STATUS_FAIL);
+
+	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+	sd.symOperation = CPA_CY_SYM_OP_HASH;
+	sd.hashSetupData.hashAlgorithm = hash_algorithm;
+	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
+	sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
+	sd.digestIsAppended = CPA_FALSE;
+	sd.verifyDigest = CPA_FALSE;
+
+	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+	    *cy_session_ctx);
+	if (status != CPA_STATUS_SUCCESS) {
+		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+		return (status);
+	}
+
+	return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
+    CpaBufferList *src, CpaBufferList *dst)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U meta_size = 0;
+
+	status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto error;
+
+	if (src != dst) {
+		status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
+		    meta_size);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	return (CPA_STATUS_SUCCESS);
+
+error:
+	QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
+	if (src != dst)
+		QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
+
+	return (status);
+}
+
+int
+qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
+    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
+    crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa16U i;
+	CpaInstanceHandle cy_inst_handle;
+	Cpa16U nr_bufs = (enc_len >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left = 0;
+	Cpa8S *data = NULL;
+	CpaCySymSessionCtx *cy_session_ctx = NULL;
+	cy_callback_t cb;
+	CpaCySymOpData op_data = { 0 };
+	CpaBufferList src_buffer_list = { 0 };
+	CpaBufferList dst_buffer_list = { 0 };
+	CpaFlatBuffer *flat_src_buf_array = NULL;
+	CpaFlatBuffer *flat_src_buf = NULL;
+	CpaFlatBuffer *flat_dst_buf_array = NULL;
+	CpaFlatBuffer *flat_dst_buf = NULL;
+	struct page *in_pages[MAX_PAGE_NUM];
+	struct page *out_pages[MAX_PAGE_NUM];
+	Cpa32U in_page_num = 0;
+	Cpa32U out_page_num = 0;
+	Cpa32U in_page_off = 0;
+	Cpa32U out_page_off = 0;
+
+	if (dir == QAT_ENCRYPT) {
+		QAT_STAT_BUMP(encrypt_requests);
+		QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
+	} else {
+		QAT_STAT_BUMP(decrypt_requests);
+		QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
+	}
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	cy_inst_handle = cy_inst_handles[i];
+
+	status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
+	    &cy_session_ctx, key, crypt, aad_len);
+	if (status != CPA_STATUS_SUCCESS) {
+		/* don't count CCM as a failure since it's not supported */
+		if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
+			QAT_STAT_BUMP(crypt_fails);
+		return (status);
+	}
+
+	/*
+	 * We increment nr_bufs by 2 to allow us to handle non
+	 * page-aligned buffer addresses and buffers whose sizes
+	 * are not divisible by PAGE_SIZE.
+	 */
+	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+	    &src_buffer_list, &dst_buffer_list);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pDigestResult,
+	    ZIO_DATA_MAC_LEN);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pIv,
+	    ZIO_DATA_IV_LEN);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	if (aad_len > 0) {
+		status = QAT_PHYS_CONTIG_ALLOC(&op_data.pAdditionalAuthData,
+		    aad_len);
+		if (status != CPA_STATUS_SUCCESS)
+			goto fail;
+		bcopy(aad_buf, op_data.pAdditionalAuthData, aad_len);
+	}
+
+	bytes_left = enc_len;
+	data = src_buf;
+	flat_src_buf = flat_src_buf_array;
+	while (bytes_left > 0) {
+		in_page_off = ((long)data & ~PAGE_MASK);
+		in_pages[in_page_num] = qat_mem_to_page(data);
+		flat_src_buf->pData = kmap(in_pages[in_page_num]) + in_page_off;
+		flat_src_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - in_page_off, (long)bytes_left);
+		data += flat_src_buf->dataLenInBytes;
+		bytes_left -= flat_src_buf->dataLenInBytes;
+		flat_src_buf++;
+		in_page_num++;
+	}
+	src_buffer_list.pBuffers = flat_src_buf_array;
+	src_buffer_list.numBuffers = in_page_num;
+
+	bytes_left = enc_len;
+	data = dst_buf;
+	flat_dst_buf = flat_dst_buf_array;
+	while (bytes_left > 0) {
+		out_page_off = ((long)data & ~PAGE_MASK);
+		out_pages[out_page_num] = qat_mem_to_page(data);
+		flat_dst_buf->pData = kmap(out_pages[out_page_num]) +
+		    out_page_off;
+		flat_dst_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - out_page_off, (long)bytes_left);
+		data += flat_dst_buf->dataLenInBytes;
+		bytes_left -= flat_dst_buf->dataLenInBytes;
+		flat_dst_buf++;
+		out_page_num++;
+	}
+	dst_buffer_list.pBuffers = flat_dst_buf_array;
+	dst_buffer_list.numBuffers = out_page_num;
+
+	op_data.sessionCtx = cy_session_ctx;
+	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+	op_data.cryptoStartSrcOffsetInBytes = 0;
+	op_data.messageLenToCipherInBytes = 0;
+	op_data.hashStartSrcOffsetInBytes = 0;
+	op_data.messageLenToHashInBytes = 0;
+	op_data.messageLenToCipherInBytes = enc_len;
+	op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
+	bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN);
+	/* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */
+	if (dir == QAT_DECRYPT)
+		bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN);
+
+	cb.verify_result = CPA_FALSE;
+	init_completion(&cb.complete);
+	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+	    &src_buffer_list, &dst_buffer_list, NULL);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* we now wait until the completion of the operation. */
+	wait_for_completion(&cb.complete);
+
+	if (cb.verify_result == CPA_FALSE) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	if (dir == QAT_ENCRYPT) {
+		/* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */
+		bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN);
+		QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
+	} else {
+		QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
+	}
+
+fail:
+	if (status != CPA_STATUS_SUCCESS)
+		QAT_STAT_BUMP(crypt_fails);
+
+	for (i = 0; i < in_page_num; i++)
+		kunmap(in_pages[i]);
+	for (i = 0; i < out_page_num; i++)
+		kunmap(out_pages[i]);
+
+	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+	if (aad_len > 0)
+		QAT_PHYS_CONTIG_FREE(op_data.pAdditionalAuthData);
+	QAT_PHYS_CONTIG_FREE(op_data.pIv);
+	QAT_PHYS_CONTIG_FREE(op_data.pDigestResult);
+	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+	QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
+
+	return (status);
+}
+
+int
+qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
+{
+	CpaStatus status;
+	Cpa16U i;
+	CpaInstanceHandle cy_inst_handle;
+	Cpa16U nr_bufs = (size >> PAGE_SHIFT) + 2;
+	Cpa32U bytes_left = 0;
+	Cpa8S *data = NULL;
+	CpaCySymSessionCtx *cy_session_ctx = NULL;
+	cy_callback_t cb;
+	Cpa8U *digest_buffer = NULL;
+	CpaCySymOpData op_data = { 0 };
+	CpaBufferList src_buffer_list = { 0 };
+	CpaFlatBuffer *flat_src_buf_array = NULL;
+	CpaFlatBuffer *flat_src_buf = NULL;
+	struct page *in_pages[MAX_PAGE_NUM];
+	Cpa32U page_num = 0;
+	Cpa32U page_off = 0;
+
+	QAT_STAT_BUMP(cksum_requests);
+	QAT_STAT_INCR(cksum_total_in_bytes, size);
+
+	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
+	cy_inst_handle = cy_inst_handles[i];
+
+	status = qat_init_checksum_session_ctx(cy_inst_handle,
+	    &cy_session_ctx, cksum);
+	if (status != CPA_STATUS_SUCCESS) {
+		/* don't count unsupported checksums as a failure */
+		if (cksum == ZIO_CHECKSUM_SHA256 ||
+		    cksum == ZIO_CHECKSUM_SHA512)
+			QAT_STAT_BUMP(cksum_fails);
+		return (status);
+	}
+
+	/*
+	 * We increment nr_bufs by 2 to allow us to handle non
+	 * page-aligned buffer addresses and buffers whose sizes
+	 * are not divisible by PAGE_SIZE.
+	 */
+	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+	    &src_buffer_list, &src_buffer_list);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&digest_buffer,
+	    sizeof (zio_cksum_t));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	bytes_left = size;
+	data = buf;
+	flat_src_buf = flat_src_buf_array;
+	while (bytes_left > 0) {
+		page_off = ((long)data & ~PAGE_MASK);
+		in_pages[page_num] = qat_mem_to_page(data);
+		flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
+		flat_src_buf->dataLenInBytes =
+		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
+		data += flat_src_buf->dataLenInBytes;
+		bytes_left -= flat_src_buf->dataLenInBytes;
+		flat_src_buf++;
+		page_num++;
+	}
+	src_buffer_list.pBuffers = flat_src_buf_array;
+	src_buffer_list.numBuffers = page_num;
+
+	op_data.sessionCtx = cy_session_ctx;
+	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+	op_data.hashStartSrcOffsetInBytes = 0;
+	op_data.messageLenToHashInBytes = size;
+	op_data.pDigestResult = digest_buffer;
+
+	cb.verify_result = CPA_FALSE;
+	init_completion(&cb.complete);
+	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+	    &src_buffer_list, &src_buffer_list, NULL);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	/* we now wait until the completion of the operation. */
+	wait_for_completion(&cb.complete);
+
+	if (cb.verify_result == CPA_FALSE) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
+
+fail:
+	if (status != CPA_STATUS_SUCCESS)
+		QAT_STAT_BUMP(cksum_fails);
+
+	for (i = 0; i < page_num; i++)
+		kunmap(in_pages[i]);
+
+	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(digest_buffer);
+	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+
+	return (status);
+}
+
+static int
+param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * zfs_qat_encrypt_disable = 0: enable qat encrypt
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_cy_init_done) {
+		ret = qat_cy_init();
+		if (ret != 0) {
+			zfs_qat_encrypt_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+static int
+param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp)
+{
+	int ret;
+	int *pvalue = kp->arg;
+	ret = param_set_int(val, kp);
+	if (ret)
+		return (ret);
+	/*
+	 * set_checksum_param_ops = 0: enable qat checksum
+	 * try to initialize qat instance if it has not been done
+	 */
+	if (*pvalue == 0 && !qat_cy_init_done) {
+		ret = qat_cy_init();
+		if (ret != 0) {
+			zfs_qat_checksum_disable = 1;
+			return (ret);
+		}
+	}
+	return (ret);
+}
+
+module_param_call(zfs_qat_encrypt_disable, param_set_qat_encrypt,
+    param_get_int, &zfs_qat_encrypt_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Enable/Disable QAT encryption");
+
+module_param_call(zfs_qat_checksum_disable, param_set_qat_checksum,
+    param_get_int, &zfs_qat_checksum_disable, 0644);
+MODULE_PARM_DESC(zfs_qat_checksum_disable, "Enable/Disable QAT checksumming");
+
+#endif
diff --git a/module/os/macos/zfs/spa_misc_os.c b/module/os/macos/zfs/spa_misc_os.c
new file mode 100644
index 0000000000..aa546edd1b
--- /dev/null
+++ b/module/os/macos/zfs/spa_misc_os.c
@@ -0,0 +1,116 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, Intel Corporation.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa_impl.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/unique.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/fm/util.h>
+#include <sys/dsl_scan.h>
+#include <sys/fs/zfs.h>
+#include <sys/kstat.h>
+#include <sys/ZFSPool.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_boot.h>
+#include <libkern/OSKextLib.h>
+
+#include "zfs_prop.h"
+
+const char *
+spa_history_zone(void)
+{
+	return ("macos");
+}
+
+void
+spa_create_os(void *arg)
+{
+	spa_t *spa = (spa_t *)arg;
+	int haslock = 0;
+	int error;
+
+	haslock = mutex_owned(&spa_namespace_lock);
+
+	/* Increase open refcount */
+	spa_open_ref(spa, FTAG);
+
+	if (haslock) {
+		mutex_exit(&spa_namespace_lock);
+	}
+
+	/* Create IOKit pool proxy */
+	if ((error = spa_iokit_pool_proxy_create(spa)) != 0) {
+		printf("%s spa_iokit_pool_proxy_create error %d\n",
+		    __func__, error);
+		/* spa_create succeeded, ignore proxy error */
+	}
+
+	/* Cache vdev info, needs open ref above, and pool proxy */
+
+	if (error == 0 && (error = zfs_boot_update_bootinfo(spa)) != 0) {
+		printf("%s update_bootinfo error %d\n", __func__, error);
+		/* create succeeded, ignore error from bootinfo */
+	}
+
+	/* Drop open refcount */
+	if (haslock) {
+		mutex_enter(&spa_namespace_lock);
+	}
+
+	spa_close(spa, FTAG);
+}
+
+void
+spa_export_os(void *arg)
+{
+	spa_t *spa = (spa_t *)arg;
+
+	/* Remove IOKit pool proxy */
+	spa_iokit_pool_proxy_destroy(spa);
+}
+
+void
+spa_activate_os(void *arg)
+{
+	/* spa_t *spa = (spa_t *)arg; */
+	/* Lock kext in kernel while mounted */
+	OSKextRetainKextWithLoadTag(OSKextGetCurrentLoadTag());
+}
+
+void
+spa_deactivate_os(void *arg)
+{
+	/* spa_t *spa = (spa_t *)arg; */
+	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
+}
diff --git a/module/os/macos/zfs/spa_stats.c b/module/os/macos/zfs/spa_stats.c
new file mode 100644
index 0000000000..cb7c88f441
--- /dev/null
+++ b/module/os/macos/zfs/spa_stats.c
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_impl.h>
+
+void
+spa_stats_init(spa_t *spa)
+{
+
+}
+
+void
+spa_stats_destroy(spa_t *spa)
+{
+
+}
+
+void
+spa_iostats_trim_add(spa_t *spa, trim_type_t type,
+    uint64_t extents_written, uint64_t bytes_written,
+    uint64_t extents_skipped, uint64_t bytes_skipped,
+    uint64_t extents_failed, uint64_t bytes_failed)
+{
+}
+
+void
+spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
+{
+}
+
+void
+spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
+{
+
+}
+/*
+ * Set txg state completion time and increment current state.
+ */
+int
+spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
+    hrtime_t completed_time)
+{
+	return (0);
+}
+
+txg_stat_t *
+spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
+{
+	return (NULL);
+}
+
+void
+spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
+{
+
+}
+
+void
+spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
+{
+
+}
+
+void
+spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
+    uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
+    int error)
+{
+
+}
+
+int
+spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
+    hrtime_t duration)
+{
+	return (0);
+}
+
+int
+spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
+{
+	return (0);
+}
diff --git a/module/os/macos/zfs/trace.c b/module/os/macos/zfs/trace.c
new file mode 100644
index 0000000000..0c9990e854
--- /dev/null
+++ b/module/os/macos/zfs/trace.c
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one
+ * (and only one) C file, so this dummy file exists for that purpose.
+ */
+
+#include <sys/multilist.h>
+#include <sys/arc_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dnode.h>
+#include <sys/multilist.h>
+#include <sys/zfs_znode.h>
+#include <sys/zil_impl.h>
+#include <sys/zrlock.h>
+
+#define	CREATE_TRACE_POINTS
+#include <sys/trace.h>
+#include <sys/trace_acl.h>
+#include <sys/trace_arc.h>
+#include <sys/trace_dbuf.h>
+#include <sys/trace_dmu.h>
+#include <sys/trace_dnode.h>
+#include <sys/trace_multilist.h>
+#include <sys/trace_txg.h>
+#include <sys/trace_zil.h>
+#include <sys/trace_zrlock.h>
diff --git a/module/os/macos/zfs/vdev_disk.c b/module/os/macos/zfs/vdev_disk.c
new file mode 100644
index 0000000000..ef24f9ab6a
--- /dev/null
+++ b/module/os/macos/zfs/vdev_disk.c
@@ -0,0 +1,786 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Based on Apple MacZFS source code
+ * Copyright (c) 2014,2016 by Jorgen Lundman. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/vdev_disk.h>
+#include <sys/vdev_disk_os.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_trim.h>
+#include <sys/abd.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/ldi_osx.h>
+#include <sys/disk.h>
+
+/*
+ * Virtual device vector for disks.
+ */
+
+/* XXX leave extern if declared elsewhere - originally was in zfs_ioctl.c */
+ldi_ident_t zfs_li;
+
+static void vdev_disk_close(vdev_t *);
+
+typedef struct vdev_disk_ldi_cb {
+	list_node_t		lcb_next;
+	ldi_callback_id_t	lcb_id;
+} vdev_disk_ldi_cb_t;
+
+static void
+vdev_disk_alloc(vdev_t *vd)
+{
+	vdev_disk_t *dvd;
+
+	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+
+	/*
+	 * Create the LDI event callback list.
+	 */
+	list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
+	    offsetof(vdev_disk_ldi_cb_t, lcb_next));
+}
+
+static void
+vdev_disk_free(vdev_t *vd)
+{
+	vdev_disk_t *dvd = vd->vdev_tsd;
+	vdev_disk_ldi_cb_t *lcb;
+
+	if (dvd == NULL)
+		return;
+
+	/*
+	 * We have already closed the LDI handle. Clean up the LDI event
+	 * callbacks and free vd->vdev_tsd.
+	 */
+	while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
+		list_remove(&dvd->vd_ldi_cbs, lcb);
+		(void) ldi_ev_remove_callbacks(lcb->lcb_id);
+		kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
+	}
+	list_destroy(&dvd->vd_ldi_cbs);
+	kmem_free(dvd, sizeof (vdev_disk_t));
+	vd->vdev_tsd = NULL;
+}
+
+static int
+vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
+    void *ev_data)
+{
+	vdev_t *vd = (vdev_t *)arg;
+	vdev_disk_t *dvd = vd->vdev_tsd;
+
+	/*
+	 * Ignore events other than offline.
+	 */
+	if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
+		return (LDI_EV_SUCCESS);
+
+	/*
+	 * All LDI handles must be closed for the state change to succeed, so
+	 * call on vdev_disk_close() to do this.
+	 *
+	 * We inform vdev_disk_close that it is being called from offline
+	 * notify context so it will defer cleanup of LDI event callbacks and
+	 * freeing of vd->vdev_tsd to the offline finalize or a reopen.
+	 */
+	dvd->vd_ldi_offline = B_TRUE;
+	vdev_disk_close(vd);
+
+	/*
+	 * Now that the device is closed, request that the spa_async_thread
+	 * mark the device as REMOVED and notify FMA of the removal.
+	 */
+	zfs_post_remove(vd->vdev_spa, vd);
+	vd->vdev_remove_wanted = B_TRUE;
+	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
+
+	return (LDI_EV_SUCCESS);
+}
+
+/* ARGSUSED */
+static void
+vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
+    int ldi_result, void *arg, void *ev_data)
+{
+	vdev_t *vd = (vdev_t *)arg;
+
+	/*
+	 * Ignore events other than offline.
+	 */
+	if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
+		return;
+
+	/*
+	 * We have already closed the LDI handle in notify.
+	 * Clean up the LDI event callbacks and free vd->vdev_tsd.
+	 */
+	vdev_disk_free(vd);
+	/*
+	 * Request that the vdev be reopened if the offline state change was
+	 * unsuccessful.
+	 */
+	if (ldi_result != LDI_EV_SUCCESS) {
+		vd->vdev_probe_wanted = B_TRUE;
+		spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
+	}
+}
+
+static ldi_ev_callback_t vdev_disk_off_callb = {
+	.cb_vers = LDI_EV_CB_VERS,
+	.cb_notify = vdev_disk_off_notify,
+	.cb_finalize = vdev_disk_off_finalize
+};
+
+/*
+ * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
+ * even a fallback to DKIOCGMEDIAINFO fails.
+ */
+#ifdef DEBUG
+#define	VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__)
+#else
+#define	VDEV_DEBUG(...) /* Nothing... */
+#endif
+
+static int
+vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *ashift)
+{
+	spa_t *spa = vd->vdev_spa;
+	vdev_disk_t *dvd = vd->vdev_tsd;
+	ldi_ev_cookie_t ecookie;
+	vdev_disk_ldi_cb_t *lcb;
+	union {
+		struct dk_minfo_ext ude;
+		struct dk_minfo ud;
+	} dks;
+	struct dk_minfo_ext *dkmext = &dks.ude;
+	struct dk_minfo *dkm = &dks.ud;
+	int error;
+	uint64_t capacity = 0, blksz = 0, pbsize;
+	int isssd;
+
+	/*
+	 * We must have a pathname, and it must be absolute.
+	 */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Reopen the device if it's not currently open. Otherwise,
+	 * just update the physical size of the device.
+	 */
+	if (dvd != NULL) {
+		if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
+			/*
+			 * If we are opening a device in its offline notify
+			 * context, the LDI handle was just closed. Clean
+			 * up the LDI event callbacks and free vd->vdev_tsd.
+			 */
+			vdev_disk_free(vd);
+		} else {
+			ASSERT(vd->vdev_reopening);
+			goto skip_open;
+		}
+	}
+
+	/*
+	 * Create vd->vdev_tsd.
+	 */
+	vdev_disk_alloc(vd);
+	dvd = vd->vdev_tsd;
+
+	/*
+	 * When opening a disk device, we want to preserve the user's original
+	 * intent.  We always want to open the device by the path the user gave
+	 * us, even if it is one of multiple paths to the same device.  But we
+	 * also want to be able to survive disks being removed/recabled.
+	 * Therefore the sequence of opening devices is:
+	 *
+	 * 1. Try opening the device by path.  For legacy pools without the
+	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
+	 *
+	 * 2. If the devid of the device matches the stored value, return
+	 *    success.
+	 *
+	 * 3. Otherwise, the device may have moved.  Try opening the device
+	 *    by the devid instead.
+	 */
+
+	error = EINVAL;		/* presume failure */
+
+	if (vd->vdev_path != NULL) {
+
+		/*
+		 * If we have not yet opened the device, try to open it by the
+		 * specified path.
+		 */
+		if (error != 0) {
+			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
+			    kcred, &dvd->vd_lh, zfs_li);
+		}
+
+		/*
+		 * If we succeeded in opening the device, but 'vdev_wholedisk'
+		 * is not yet set, then this must be a slice.
+		 */
+		if (error == 0 && vd->vdev_wholedisk == -1ULL)
+			vd->vdev_wholedisk = 0;
+	}
+
+	/*
+	 * If all else fails, then try opening by physical path (if available)
+	 * or the logical path (if we failed due to the devid check).  While not
+	 * as reliable as the devid, this will give us something, and the higher
+	 * level vdev validation will prevent us from opening the wrong device.
+	 */
+	if (error) {
+
+		/*
+		 * Note that we don't support the legacy auto-wholedisk support
+		 * as above.  This hasn't been used in a very long time and we
+		 * don't need to propagate its oddities to this edge condition.
+		 */
+		if (error && vd->vdev_path != NULL)
+			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
+			    kcred, &dvd->vd_lh, zfs_li);
+	}
+
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]",
+		    error);
+		return (error);
+	}
+
+	/*
+	 * Register callbacks for the LDI offline event.
+	 */
+	if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
+	    LDI_EV_SUCCESS) {
+		lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
+		list_insert_tail(&dvd->vd_ldi_cbs, lcb);
+		(void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
+		    &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
+	}
+
+skip_open:
+	/*
+	 * Determine the actual size of the device.
+	 */
+	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		vdev_dbgmsg(vd, "vdev_disk_open: failed to get size");
+		return (SET_ERROR(EINVAL));
+	}
+
+	*max_psize = *psize;
+
+	/*
+	 * Determine the device's minimum transfer size.
+	 * If the ioctl isn't supported, assume DEV_BSIZE.
+	 */
+	if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT,
+	    (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) {
+		capacity = dkmext->dki_capacity - 1;
+		blksz = dkmext->dki_lbsize;
+		pbsize = dkmext->dki_pbsize;
+	} else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO,
+	    (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) {
+		VDEV_DEBUG(
+		    "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n",
+		    vd->vdev_path);
+		capacity = dkm->dki_capacity - 1;
+		blksz = dkm->dki_lbsize;
+		pbsize = blksz;
+	} else {
+		VDEV_DEBUG("vdev_disk_open(\"%s\"): "
+		    "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
+		    vd->vdev_path, error);
+		pbsize = DEV_BSIZE;
+	}
+
+	*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
+
+	if (vd->vdev_wholedisk == 1) {
+		int wce = 1;
+
+		/*
+		 * Since we own the whole disk, try to enable disk write
+		 * caching.  We ignore errors because it's OK if we can't do it.
+		 */
+		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
+		    FKIOCTL, kcred, NULL);
+	}
+
+	/*
+	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
+	 * try again.
+	 */
+	vd->vdev_nowritecache = B_FALSE;
+
+	/* Inform the ZIO pipeline that we are non-rotational */
+	vd->vdev_nonrot = B_FALSE;
+	if (ldi_ioctl(dvd->vd_lh, DKIOCISSOLIDSTATE, (intptr_t)&isssd,
+	    FKIOCTL, kcred, NULL) == 0) {
+		vd->vdev_nonrot = (isssd ? B_TRUE : B_FALSE);
+	}
+
+	// Assume no TRIM
+	vd->vdev_has_trim = B_FALSE;
+	uint32_t features;
+	if (ldi_ioctl(dvd->vd_lh, DKIOCGETFEATURES, (intptr_t)&features,
+	    FKIOCTL, kcred, NULL) == 0) {
+		if (features & DK_FEATURE_UNMAP)
+			vd->vdev_has_trim = B_TRUE;
+	}
+
+	/* Set when device reports it supports secure TRIM. */
+	// No secure trim in Apple yet.
+	vd->vdev_has_securetrim = B_FALSE;
+
+	return (0);
+}
+
+static void
+vdev_disk_close(vdev_t *vd)
+{
+	vdev_disk_t *dvd = vd->vdev_tsd;
+
+	if (vd->vdev_reopening || dvd == NULL)
+		return;
+
+	if (dvd->vd_lh != NULL) {
+		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
+		dvd->vd_lh = NULL;
+	}
+
+	vd->vdev_delayed_close = B_FALSE;
+	/*
+	 * If we closed the LDI handle due to an offline notify from LDI,
+	 * don't free vd->vdev_tsd or unregister the callbacks here;
+	 * the offline finalize callback or a reopen will take care of it.
+	 */
+	if (dvd->vd_ldi_offline)
+		return;
+
+	vdev_disk_free(vd);
+}
+
+int
+vdev_disk_physio(vdev_t *vd, caddr_t data,
+    size_t size, uint64_t offset, int flags, boolean_t isdump)
+{
+	vdev_disk_t *dvd = vd->vdev_tsd;
+
+	/*
+	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+	 * Nothing to be done here but return failure.
+	 */
+	if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
+		return (EIO);
+
+	ASSERT(vd->vdev_ops == &vdev_disk_ops);
+
+	return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
+}
+
+int
+vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
+    size_t size, uint64_t offset, int flags)
+{
+	ldi_buf_t *bp;
+	int error = 0;
+
+	if (vd_lh == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ASSERT(flags & B_READ || flags & B_WRITE);
+
+	bp = getrbuf(KM_SLEEP);
+	bp->b_flags = flags | B_BUSY | B_NOCACHE;
+	bp->b_bcount = size;
+	bp->b_un.b_addr = (void *)data;
+	bp->b_lblkno = lbtodb(offset);
+	bp->b_bufsize = size;
+
+	error = ldi_strategy(vd_lh, bp);
+	ASSERT(error == 0);
+
+	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
+		error = SET_ERROR(EIO);
+	freerbuf(bp);
+
+	return (error);
+}
+
+static void
+vdev_disk_io_intr(ldi_buf_t *bp)
+{
+	vdev_buf_t *vb = (vdev_buf_t *)bp;
+	zio_t *zio = vb->vb_io;
+
+	/*
+	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
+	 * Rather than teach the rest of the stack about other error
+	 * possibilities (EFAULT, etc), we normalize the error value here.
+	 */
+	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
+
+	if (zio->io_error == 0 && bp->b_resid != 0)
+		zio->io_error = SET_ERROR(EIO);
+
+	if (zio->io_type == ZIO_TYPE_READ) {
+		abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr,
+		    zio->io_size);
+	} else {
+		abd_return_buf(zio->io_abd, bp->b_un.b_addr,
+		    zio->io_size);
+	}
+
+	kmem_free(vb, sizeof (vdev_buf_t));
+
+	zio_delay_interrupt(zio);
+}
+
+static void
+vdev_disk_ioctl_free(zio_t *zio)
+{
+	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
+}
+
+static const zio_vsd_ops_t vdev_disk_vsd_ops = {
+	vdev_disk_ioctl_free,
+	zio_vsd_default_cksum_report
+};
+
+static void
+vdev_disk_ioctl_done(void *zio_arg, int error)
+{
+	zio_t *zio = zio_arg;
+
+	zio->io_error = error;
+
+	zio_interrupt(zio);
+}
+
+static void
+vdev_disk_io_start(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	vdev_disk_t *dvd = vd->vdev_tsd;
+	vdev_buf_t *vb;
+	struct dk_callback *dkc;
+	ldi_buf_t *bp = 0;
+	int flags, error = 0;
+
+	/*
+	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+	 * Nothing to be done here but return failure.
+	 */
+	if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
+		zio->io_error = ENXIO;
+		zio_interrupt(zio);
+		return;
+	}
+
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
+
+		if (!vdev_readable(vd)) {
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		}
+
+		switch (zio->io_cmd) {
+		case DKIOCFLUSHWRITECACHE:
+
+			if (zfs_nocacheflush)
+				break;
+
+			if (vd->vdev_nowritecache) {
+				zio->io_error = SET_ERROR(ENOTSUP);
+				break;
+			}
+
+			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
+			zio->io_vsd_ops = &vdev_disk_vsd_ops;
+
+			dkc->dkc_callback = vdev_disk_ioctl_done;
+			dkc->dkc_flag = FLUSH_VOLATILE;
+			dkc->dkc_cookie = zio;
+
+			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
+			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
+
+			if (error == 0) {
+				/*
+				 * The ioctl will be done asychronously,
+				 * and will call vdev_disk_ioctl_done()
+				 * upon completion.
+				 */
+				return;
+			}
+
+			zio->io_error = error;
+
+			break;
+
+		default:
+			zio->io_error = SET_ERROR(ENOTSUP);
+		} /* io_cmd */
+
+		zio_execute(zio);
+		return;
+
+	case ZIO_TYPE_WRITE:
+		if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
+			flags = B_WRITE;
+		else
+			flags = B_WRITE | B_ASYNC;
+		break;
+
+	case ZIO_TYPE_READ:
+		if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
+			flags = B_READ;
+		else
+			flags = B_READ | B_ASYNC;
+		break;
+
+	case ZIO_TYPE_TRIM:
+	{
+		dkioc_free_list_ext_t dfle;
+		dfle.dfle_start = zio->io_offset;
+		dfle.dfle_length = zio->io_size;
+		zio->io_error = ldi_ioctl(dvd->vd_lh, DKIOCFREE,
+		    (uintptr_t)&dfle, FKIOCTL, kcred, NULL);
+		zio_interrupt(zio);
+		return;
+	}
+
+	default:
+		zio->io_error = SET_ERROR(ENOTSUP);
+		zio_execute(zio);
+		return;
+	} /* io_type */
+
+	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+
+	/* Stop OSX from also caching our data */
+	flags |= B_NOCACHE | B_PASSIVE;
+
+	zio->io_target_timestamp = zio_handle_io_delay(zio);
+
+	vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
+
+	vb->vb_io = zio;
+	bp = &vb->vb_buf;
+
+	ASSERT(bp != NULL);
+	ASSERT(zio->io_abd != NULL);
+	ASSERT(zio->io_size != 0);
+
+	bioinit(bp);
+	bp->b_flags = B_BUSY | flags;
+	bp->b_bcount = zio->io_size;
+
+	if (zio->io_type == ZIO_TYPE_READ) {
+		ASSERT3S(zio->io_abd->abd_size, >=, zio->io_size);
+		bp->b_un.b_addr =
+		    abd_borrow_buf(zio->io_abd, zio->io_size);
+	} else {
+		ASSERT3S(zio->io_abd->abd_size, >=, zio->io_size);
+		bp->b_un.b_addr =
+		    abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+	}
+
+	bp->b_lblkno = lbtodb(zio->io_offset);
+	bp->b_bufsize = zio->io_size;
+	bp->b_iodone = (int (*)(struct ldi_buf *))vdev_disk_io_intr;
+
+	error = ldi_strategy(dvd->vd_lh, bp);
+	if (error != 0) {
+		dprintf("%s error from ldi_strategy %d\n", __func__, error);
+		zio->io_error = EIO;
+		kmem_free(vb, sizeof (vdev_buf_t));
+		zio_execute(zio);
+		// zio_interrupt(zio);
+	}
+}
+
+static void
+vdev_disk_io_done(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+
+	/*
+	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
+	 * the device has been removed.  If this is the case, then we trigger an
+	 * asynchronous removal of the device. Otherwise, probe the device and
+	 * make sure it's still accessible.
+	 */
+	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
+		vdev_disk_t *dvd = vd->vdev_tsd;
+		int state = DKIO_NONE;
+
+		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
+		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
+			/*
+			 * We post the resource as soon as possible, instead of
+			 * when the async removal actually happens, because the
+			 * DE is using this information to discard previous I/O
+			 * errors.
+			 */
+			zfs_post_remove(zio->io_spa, vd);
+			vd->vdev_remove_wanted = B_TRUE;
+			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
+		} else if (!vd->vdev_delayed_close) {
+			vd->vdev_delayed_close = B_TRUE;
+		}
+	}
+}
+
+static void
+vdev_disk_hold(vdev_t *vd)
+{
+	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+	/* We must have a pathname, and it must be absolute. */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
+		return;
+
+	/*
+	 * Only prefetch path and devid info if the device has
+	 * never been opened.
+	 */
+	if (vd->vdev_tsd != NULL)
+		return;
+
+}
+
+static void
+vdev_disk_rele(vdev_t *vd)
+{
+	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+	/* XXX: Implement me as a vnode rele for the device */
+}
+
+vdev_ops_t vdev_disk_ops = {
+	vdev_disk_open,
+	vdev_disk_close,
+	vdev_default_asize,
+	vdev_disk_io_start,
+	vdev_disk_io_done,
+	NULL,
+	NULL,
+	vdev_disk_hold,
+	vdev_disk_rele,
+	NULL,
+	vdev_default_xlate,
+	VDEV_TYPE_DISK,		/* name of this vdev type */
+	B_TRUE			/* leaf vdev */
+};
+
+/*
+ * Given the root disk device devid or pathname, read the label from
+ * the device, and construct a configuration nvlist.
+ */
+int
+vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
+{
+	ldi_handle_t vd_lh;
+	vdev_label_t *label;
+	uint64_t s, size;
+	int l;
+	int error = -1;
+
+	/*
+	 * Read the device label and build the nvlist.
+	 */
+
+	/* Apple: Error will be -1 at this point, allowing open_by_name */
+	error = -1;
+	vd_lh = 0;	/* Dismiss compiler warning */
+
+	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
+	    zfs_li)))
+		return (error);
+
+	if (ldi_get_size(vd_lh, &s)) {
+		(void) ldi_close(vd_lh, FREAD, kcred);
+		return (SET_ERROR(EIO));
+	}
+
+	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
+	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
+
+	*config = NULL;
+	for (l = 0; l < VDEV_LABELS; l++) {
+		uint64_t offset, state, txg = 0;
+
+		/* read vdev label */
+		offset = vdev_label_offset(size, l, 0);
+		if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label,
+		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
+			continue;
+
+		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
+			*config = NULL;
+			continue;
+		}
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
+			nvlist_free(*config);
+			*config = NULL;
+			continue;
+		}
+
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0) {
+			nvlist_free(*config);
+			*config = NULL;
+			continue;
+		}
+
+		break;
+	}
+
+	kmem_free(label, sizeof (vdev_label_t));
+	(void) ldi_close(vd_lh, FREAD, kcred);
+	if (*config == NULL)
+		error = SET_ERROR(EIDRM);
+
+	return (error);
+}
diff --git a/module/os/macos/zfs/vdev_file.c b/module/os/macos/zfs/vdev_file.c
new file mode 100644
index 0000000000..ca04765f1b
--- /dev/null
+++ b/module/os/macos/zfs/vdev_file.c
@@ -0,0 +1,322 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_file.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_trim.h>
+#include <sys/zio.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/fs/zfs.h>
+#include <sys/abd.h>
+#include <sys/fcntl.h>
+#include <sys/vnode.h>
+
+/*
+ * Virtual device vector for files.
+ */
+
+static taskq_t *vdev_file_taskq;
+
+static void
+vdev_file_hold(vdev_t *vd)
+{
+	ASSERT(vd->vdev_path != NULL);
+}
+
+static void
+vdev_file_rele(vdev_t *vd)
+{
+	ASSERT(vd->vdev_path != NULL);
+}
+
+static mode_t
+vdev_file_open_mode(spa_mode_t spa_mode)
+{
+	mode_t mode = 0;
+
+	if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
+		mode = O_RDWR;
+	} else if (spa_mode & SPA_MODE_READ) {
+		mode = O_RDONLY;
+	} else if (spa_mode & SPA_MODE_WRITE) {
+		mode = O_WRONLY;
+	}
+
+	return (mode | O_LARGEFILE);
+}
+
+static int
+vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
+    uint64_t *ashift)
+{
+	vdev_file_t *vf;
+	zfs_file_t *fp;
+	zfs_file_attr_t zfa;
+	int error = 0;
+
+	dprintf("vdev_file_open %p\n", vd->vdev_tsd);
+
+	/*
+	 * Rotational optimizations only make sense on block devices.
+	 */
+	vd->vdev_nonrot = B_TRUE;
+
+	/*
+	 * Allow TRIM on file based vdevs.  This may not always be supported,
+	 * since it depends on your kernel version and underlying filesystem
+	 * type but it is always safe to attempt.
+	 */
+	vd->vdev_has_trim = B_TRUE;
+
+	/*
+	 * Disable secure TRIM on file based vdevs.  There is no way to
+	 * request this behavior from the underlying filesystem.
+	 */
+	vd->vdev_has_securetrim = B_FALSE;
+
+	/*
+	 * We must have a pathname, and it must be absolute.
+	 */
+	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
+		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Reopen the device if it's not currently open.  Otherwise,
+	 * just update the physical size of the device.
+	 */
+#ifdef _KERNEL
+	if (vd->vdev_tsd != NULL) {
+		ASSERT(vd->vdev_reopening);
+		vf = vd->vdev_tsd;
+		goto skip_open;
+	}
+#endif
+
+	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
+
+	/*
+	 * We always open the files from the root of the global zone, even if
+	 * we're in a local zone.  If the user has gotten to this point, the
+	 * administrator has already decided that the pool should be available
+	 * to local zone users, so the underlying devices should be as well.
+	 */
+	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
+
+	error = zfs_file_open(vd->vdev_path + 1,
+	    vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
+
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	vf->vf_file = fp;
+
+	/*
+	 * Make sure it's a regular file.
+	 */
+	if (zfs_file_getattr(fp, &zfa)) {
+		return (SET_ERROR(ENODEV));
+	}
+
+skip_open:
+	/*
+	 * Determine the physical size of the file.
+	 */
+	error = zfs_file_getattr(vf->vf_file, &zfa);
+
+	if (error) {
+		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
+		return (error);
+	}
+
+	*max_psize = *psize = zfa.zfa_size;
+	*ashift = SPA_MINBLOCKSHIFT;
+
+	return (0);
+}
+
+static void
+vdev_file_close(vdev_t *vd)
+{
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (vd->vdev_reopening || vf == NULL)
+		return;
+
+	if (vf->vf_file != NULL) {
+		zfs_file_close(vf->vf_file);
+	}
+
+	vd->vdev_delayed_close = B_FALSE;
+	kmem_free(vf, sizeof (vdev_file_t));
+	vd->vdev_tsd = NULL;
+}
+
+static void
+vdev_file_io_strategy(void *arg)
+{
+	zio_t *zio = (zio_t *)arg;
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf = vd->vdev_tsd;
+	ssize_t resid;
+	loff_t off;
+	void *data;
+	ssize_t size;
+	int err;
+
+	off = zio->io_offset;
+	size = zio->io_size;
+	resid = 0;
+
+	if (zio->io_type == ZIO_TYPE_READ) {
+		data =
+		    abd_borrow_buf(zio->io_abd, size);
+		err = zfs_file_pread(vf->vf_file, data, size, off, &resid);
+		abd_return_buf_copy(zio->io_abd, data, size);
+	} else {
+		data =
+		    abd_borrow_buf_copy(zio->io_abd, size);
+		err = zfs_file_pwrite(vf->vf_file, data, size, off, &resid);
+		abd_return_buf(zio->io_abd, data, size);
+	}
+
+	zio->io_error = (err != 0 ? EIO : 0);
+
+	if (zio->io_error == 0 && resid != 0)
+		zio->io_error = SET_ERROR(ENOSPC);
+
+	zio_delay_interrupt(zio);
+}
+
+static void
+vdev_file_io_start(zio_t *zio)
+{
+	vdev_t *vd = zio->io_vd;
+	vdev_file_t *vf = vd->vdev_tsd;
+
+	if (zio->io_type == ZIO_TYPE_IOCTL) {
+
+		if (!vdev_readable(vd)) {
+			zio->io_error = SET_ERROR(ENXIO);
+			zio_interrupt(zio);
+			return;
+		}
+
+		switch (zio->io_cmd) {
+			case DKIOCFLUSHWRITECACHE:
+				zio->io_error = zfs_file_fsync(vf->vf_file,
+				    O_SYNC|O_DSYNC);
+				break;
+			default:
+				zio->io_error = SET_ERROR(ENOTSUP);
+		}
+
+		zio_execute(zio);
+		return;
+	} else if (zio->io_type == ZIO_TYPE_TRIM) {
+		int mode = 0;
+
+		ASSERT3U(zio->io_size, !=, 0);
+
+		/* XXX FreeBSD has no fallocate routine in file ops */
+		zio->io_error = zfs_file_fallocate(vf->vf_file,
+		    mode, zio->io_offset, zio->io_size);
+		zio_execute(zio);
+		return;
+	}
+
+	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+	zio->io_target_timestamp = zio_handle_io_delay(zio);
+
+	VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, zio,
+	    TQ_SLEEP), !=, 0);
+}
+
+
+/* ARGSUSED */
+static void
+vdev_file_io_done(zio_t *zio)
+{
+}
+
+vdev_ops_t vdev_file_ops = {
+	vdev_file_open,
+	vdev_file_close,
+	vdev_default_asize,
+	vdev_file_io_start,
+	vdev_file_io_done,
+	NULL,
+	NULL,
+	vdev_file_hold,
+	vdev_file_rele,
+	NULL,
+	vdev_default_xlate,
+	VDEV_TYPE_FILE,		/* name of this vdev type */
+	B_TRUE			/* leaf vdev */
+};
+
+void
+vdev_file_init(void)
+{
+	vdev_file_taskq = taskq_create("vdev_file_taskq", 100, minclsyspri,
+	    max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
+
+	VERIFY(vdev_file_taskq);
+}
+
+void
+vdev_file_fini(void)
+{
+	taskq_destroy(vdev_file_taskq);
+}
+
+/*
+ * From userland we access disks just like files.
+ */
+#ifndef _KERNEL
+
+vdev_ops_t vdev_disk_ops = {
+	vdev_file_open,
+	vdev_file_close,
+	vdev_default_asize,
+	vdev_file_io_start,
+	vdev_file_io_done,
+	NULL,
+	NULL,
+	vdev_file_hold,
+	vdev_file_rele,
+	NULL,
+	vdev_default_xlate,
+	VDEV_TYPE_DISK,		/* name of this vdev type */
+	B_TRUE			/* leaf vdev */
+};
+
+#endif
diff --git a/module/os/macos/zfs/zfs_acl.c b/module/os/macos/zfs/zfs_acl.c
new file mode 100644
index 0000000000..0757a8e26a
--- /dev/null
+++ b/module/os/macos/zfs/zfs_acl.c
@@ -0,0 +1,2983 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/unistd.h>
+#include <sys/fs/zfs.h>
+#include <sys/policy.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/dmu.h>
+#include <sys/dnode.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/zfs_quota.h>
+
+#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
+#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
+#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
+#define	MIN_ACE_TYPE	ALLOW
+
+#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
+#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
+    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
+#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
+    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
+
+#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
+    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
+    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
+    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
+
+#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
+#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
+    ACE_DELETE|ACE_DELETE_CHILD)
+#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
+
+#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
+    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
+
+#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
+    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
+
+#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
+
+#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
+    ZFS_ACL_PROTECTED)
+
+#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
+    ZFS_ACL_OBJ_ACE)
+
+#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
+
+#define	IDMAP_WK_CREATOR_OWNER_UID		2147483648U
+
+static uint16_t
+zfs_ace_v0_get_type(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_v0_get_flags(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_v0_get_mask(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_v0_get_who(void *acep)
+{
+	return (((zfs_oldace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_v0_set_type(void *acep, uint16_t type)
+{
+	((zfs_oldace_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_v0_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_oldace_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_v0_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_oldace_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_v0_set_who(void *acep, uint64_t who)
+{
+	((zfs_oldace_t *)acep)->z_fuid = who;
+}
+
+/*ARGSUSED*/
+static size_t
+zfs_ace_v0_size(void *acep)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static size_t
+zfs_ace_v0_abstract_size(void)
+{
+	return (sizeof (zfs_oldace_t));
+}
+
+static int
+zfs_ace_v0_mask_off(void)
+{
+	return (offsetof(zfs_oldace_t, z_access_mask));
+}
+
+/*ARGSUSED*/
+static int
+zfs_ace_v0_data(void *acep, void **datap)
+{
+	*datap = NULL;
+	return (0);
+}
+
+static acl_ops_t zfs_acl_v0_ops = {
+	zfs_ace_v0_get_mask,
+	zfs_ace_v0_set_mask,
+	zfs_ace_v0_get_flags,
+	zfs_ace_v0_set_flags,
+	zfs_ace_v0_get_type,
+	zfs_ace_v0_set_type,
+	zfs_ace_v0_get_who,
+	zfs_ace_v0_set_who,
+	zfs_ace_v0_size,
+	zfs_ace_v0_abstract_size,
+	zfs_ace_v0_mask_off,
+	zfs_ace_v0_data
+};
+
+static uint16_t
+zfs_ace_fuid_get_type(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_type);
+}
+
+static uint16_t
+zfs_ace_fuid_get_flags(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_flags);
+}
+
+static uint32_t
+zfs_ace_fuid_get_mask(void *acep)
+{
+	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
+}
+
+static uint64_t
+zfs_ace_fuid_get_who(void *args)
+{
+	uint16_t entry_type;
+	zfs_ace_t *acep = args;
+
+	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return (-1);
+	return (((zfs_ace_t *)acep)->z_fuid);
+}
+
+static void
+zfs_ace_fuid_set_type(void *acep, uint16_t type)
+{
+	((zfs_ace_hdr_t *)acep)->z_type = type;
+}
+
+static void
+zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
+{
+	((zfs_ace_hdr_t *)acep)->z_flags = flags;
+}
+
+static void
+zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
+{
+	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
+}
+
+static void
+zfs_ace_fuid_set_who(void *arg, uint64_t who)
+{
+	zfs_ace_t *acep = arg;
+
+	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
+
+	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
+	    entry_type == ACE_EVERYONE)
+		return;
+	acep->z_fuid = who;
+}
+
+static size_t
+zfs_ace_fuid_size(void *acep)
+{
+	zfs_ace_hdr_t *zacep = acep;
+	uint16_t entry_type;
+
+	switch (zacep->z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		return (sizeof (zfs_object_ace_t));
+	case ALLOW:
+	case DENY:
+		entry_type =
+		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
+		if (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)
+			return (sizeof (zfs_ace_hdr_t));
+		/*FALLTHROUGH*/
+	default:
+		return (sizeof (zfs_ace_t));
+	}
+}
+
+static size_t
+zfs_ace_fuid_abstract_size(void)
+{
+	return (sizeof (zfs_ace_hdr_t));
+}
+
+static int
+zfs_ace_fuid_mask_off(void)
+{
+	return (offsetof(zfs_ace_hdr_t, z_access_mask));
+}
+
+static int
+zfs_ace_fuid_data(void *acep, void **datap)
+{
+	zfs_ace_t *zacep = acep;
+	zfs_object_ace_t *zobjp;
+
+	switch (zacep->z_hdr.z_type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		zobjp = acep;
+		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
+		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
+	default:
+		*datap = NULL;
+		return (0);
+	}
+}
+
+static acl_ops_t zfs_acl_fuid_ops = {
+	zfs_ace_fuid_get_mask,
+	zfs_ace_fuid_set_mask,
+	zfs_ace_fuid_get_flags,
+	zfs_ace_fuid_set_flags,
+	zfs_ace_fuid_get_type,
+	zfs_ace_fuid_set_type,
+	zfs_ace_fuid_get_who,
+	zfs_ace_fuid_set_who,
+	zfs_ace_fuid_size,
+	zfs_ace_fuid_abstract_size,
+	zfs_ace_fuid_mask_off,
+	zfs_ace_fuid_data
+};
+
+/*
+ * The following three functions are provided for compatibility with
+ * older ZPL version in order to determine if the file use to have
+ * an external ACL and what version of ACL previously existed on the
+ * file.  Would really be nice to not need this, sigh.
+ */
+uint64_t
+zfs_external_acl(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+	int error;
+
+	if (zp->z_is_sa)
+		return (0);
+
+	/*
+	 * Need to deal with a potential
+	 * race where zfs_sa_upgrade could cause
+	 * z_isa_sa to change.
+	 *
+	 * If the lookup fails then the state of z_is_sa should have
+	 * changed.
+	 */
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+	    &acl_phys, sizeof (acl_phys))) == 0)
+		return (acl_phys.z_acl_extern_obj);
+	else {
+		/*
+		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
+		 * removed
+		 */
+		VERIFY(zp->z_is_sa && error == ENOENT);
+		return (0);
+	}
+}
+
+/*
+ * Determine size of ACL in bytes
+ *
+ * This is more complicated than it should be since we have to deal
+ * with old external ACLs.
+ */
+static int
+zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
+    zfs_acl_phys_t *aclphys)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t acl_count;
+	int size;
+	int error;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+	if (zp->z_is_sa) {
+		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
+		    &size)) != 0)
+			return (error);
+		*aclsize = size;
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
+		    &acl_count, sizeof (acl_count))) != 0)
+			return (error);
+		*aclcount = acl_count;
+	} else {
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    aclphys, sizeof (*aclphys))) != 0)
+			return (error);
+
+		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
+			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
+			*aclcount = aclphys->z_acl_size;
+		} else {
+			*aclsize = aclphys->z_acl_size;
+			*aclcount = aclphys->z_acl_count;
+		}
+	}
+	return (0);
+}
+
+int
+zfs_znode_acl_version(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+
+	if (zp->z_is_sa)
+		return (ZFS_ACL_VERSION_FUID);
+	else {
+		int error;
+
+		/*
+		 * Need to deal with a potential
+		 * race where zfs_sa_upgrade could cause
+		 * z_isa_sa to change.
+		 *
+		 * If the lookup fails then the state of z_is_sa should have
+		 * changed.
+		 */
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) == 0)
+			return (acl_phys.z_acl_version);
+		else {
+			/*
+			 * After upgrade SA_ZPL_ZNODE_ACL should have
+			 * been removed.
+			 */
+			VERIFY(zp->z_is_sa && error == ENOENT);
+			return (ZFS_ACL_VERSION_FUID);
+		}
+	}
+}
+
+static int
+zfs_acl_version(int version)
+{
+	if (version < ZPL_VERSION_FUID)
+		return (ZFS_ACL_VERSION_INITIAL);
+	else
+		return (ZFS_ACL_VERSION_FUID);
+}
+
+static int
+zfs_acl_version_zp(znode_t *zp)
+{
+	return (zfs_acl_version(zp->z_zfsvfs->z_version));
+}
+
+zfs_acl_t *
+zfs_acl_alloc(int vers)
+{
+	zfs_acl_t *aclp;
+
+	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
+	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
+	    offsetof(zfs_acl_node_t, z_next));
+	aclp->z_version = vers;
+	if (vers == ZFS_ACL_VERSION_FUID)
+		aclp->z_ops = &zfs_acl_fuid_ops;
+	else
+		aclp->z_ops = &zfs_acl_v0_ops;
+	return (aclp);
+}
+
+zfs_acl_node_t *
+zfs_acl_node_alloc(size_t bytes)
+{
+	zfs_acl_node_t *aclnode;
+
+	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
+	if (bytes) {
+		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
+		aclnode->z_allocdata = aclnode->z_acldata;
+		aclnode->z_allocsize = bytes;
+		aclnode->z_size = bytes;
+	}
+
+	return (aclnode);
+}
+
+static void
+zfs_acl_node_free(zfs_acl_node_t *aclnode)
+{
+	if (aclnode->z_allocsize)
+		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
+	kmem_free(aclnode, sizeof (zfs_acl_node_t));
+}
+
+static void
+zfs_acl_release_nodes(zfs_acl_t *aclp)
+{
+	zfs_acl_node_t *aclnode;
+
+	while ((aclnode = list_head(&aclp->z_acl))) {
+		list_remove(&aclp->z_acl, aclnode);
+		zfs_acl_node_free(aclnode);
+	}
+	aclp->z_acl_count = 0;
+	aclp->z_acl_bytes = 0;
+}
+
+void
+zfs_acl_free(zfs_acl_t *aclp)
+{
+	zfs_acl_release_nodes(aclp);
+	list_destroy(&aclp->z_acl);
+	kmem_free(aclp, sizeof (zfs_acl_t));
+}
+
+static boolean_t
+zfs_acl_valid_ace_type(uint_t type, uint_t flags)
+{
+	uint16_t entry_type;
+
+	switch (type) {
+	case ALLOW:
+	case DENY:
+	case ACE_SYSTEM_AUDIT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_ACE_TYPE:
+		entry_type = flags & ACE_TYPE_FLAGS;
+		return (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE || entry_type == 0 ||
+		    entry_type == ACE_IDENTIFIER_GROUP);
+	default:
+		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+static boolean_t
+zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
+{
+	/*
+	 * first check type of entry
+	 */
+
+	if (!zfs_acl_valid_ace_type(type, iflags))
+		return (B_FALSE);
+
+	switch (type) {
+	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
+			return (B_FALSE);
+		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+	}
+
+	/*
+	 * next check inheritance level flags
+	 */
+
+	if (S_ISDIR(obj_mode) &&
+	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
+		if ((iflags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE)) == 0)
+			return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+static void *
+zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
+    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
+{
+	zfs_acl_node_t *aclnode;
+
+	ASSERT(aclp);
+
+	if (start == NULL) {
+		aclnode = list_head(&aclp->z_acl);
+		if (aclnode == NULL)
+			return (NULL);
+
+		aclp->z_next_ace = aclnode->z_acldata;
+		aclp->z_curr_node = aclnode;
+		aclnode->z_ace_idx = 0;
+	}
+
+	aclnode = aclp->z_curr_node;
+
+	if (aclnode == NULL)
+		return (NULL);
+
+	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
+		aclnode = list_next(&aclp->z_acl, aclnode);
+		if (aclnode == NULL)
+			return (NULL);
+		else {
+			aclp->z_curr_node = aclnode;
+			aclnode->z_ace_idx = 0;
+			aclp->z_next_ace = aclnode->z_acldata;
+		}
+	}
+
+	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
+		void *acep = aclp->z_next_ace;
+		size_t ace_size;
+
+		/*
+		 * Make sure we don't overstep our bounds
+		 */
+		ace_size = aclp->z_ops->ace_size(acep);
+
+		if (((caddr_t)acep + ace_size) >
+		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
+			return (NULL);
+		}
+
+		*iflags = aclp->z_ops->ace_flags_get(acep);
+		*type = aclp->z_ops->ace_type_get(acep);
+		*access_mask = aclp->z_ops->ace_mask_get(acep);
+		*who = aclp->z_ops->ace_who_get(acep);
+		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
+		aclnode->z_ace_idx++;
+
+		return ((void *)acep);
+	}
+	return (NULL);
+}
+
+/*ARGSUSED*/
+static uint64_t
+zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
+    uint16_t *flags, uint16_t *type, uint32_t *mask)
+{
+	zfs_acl_t *aclp = datap;
+	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
+	uint64_t who;
+
+	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
+	    flags, type);
+	return ((uint64_t)(uintptr_t)acep);
+}
+
+#if 0 // unused function
+static zfs_acl_node_t *
+zfs_acl_curr_node(zfs_acl_t *aclp)
+{
+	ASSERT(aclp->z_curr_node);
+	return (aclp->z_curr_node);
+}
+#endif
+
+/*
+ * Copy ACE to internal ZFS format.
+ * While processing the ACL each ACE will be validated for correctness.
+ * ACE FUIDs will be created later.
+ */
+int
+zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,
+    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
+    zfs_fuid_info_t **fuidp, cred_t *cr)
+{
+	int i;
+	uint16_t entry_type;
+	zfs_ace_t *aceptr = z_acl;
+	ace_t *acep = datap;
+	zfs_object_ace_t *zobjacep;
+	ace_object_t *aceobjp;
+
+	for (i = 0; i != aclcnt; i++) {
+		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
+		aceptr->z_hdr.z_flags = acep->a_flags;
+		aceptr->z_hdr.z_type = acep->a_type;
+		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
+		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE) {
+			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
+			    cr, (entry_type == 0) ?
+			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
+		}
+
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,
+		    aceptr->z_hdr.z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+
+		switch (acep->a_type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			zobjacep = (zfs_object_ace_t *)aceptr;
+			aceobjp = (ace_object_t *)acep;
+
+			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
+			    sizeof (aceobjp->a_obj_type));
+			bcopy(aceobjp->a_inherit_obj_type,
+			    zobjacep->z_inherit_type,
+			    sizeof (aceobjp->a_inherit_obj_type));
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
+			break;
+		default:
+			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
+		}
+
+		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
+		    aclp->z_ops->ace_size(aceptr));
+	}
+
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+
+	return (0);
+}
+
+/*
+ * Copy ZFS ACEs to fixed size ace_t layout
+ */
+#if 0 // unused function
+static void
+zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
+    void *datap, int filter)
+{
+	uint64_t who;
+	uint32_t access_mask;
+	uint16_t iflags, type;
+	zfs_ace_hdr_t *zacep = NULL;
+	ace_t *acep = datap;
+	ace_object_t *objacep;
+	zfs_object_ace_t *zobjacep;
+	size_t ace_size;
+	uint16_t entry_type;
+
+	while ((zacep = zfs_acl_next_ace(aclp, zacep,
+	    &who, &access_mask, &iflags, &type))) {
+
+		switch (type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			if (filter)
+				continue;
+
+			zobjacep = (zfs_object_ace_t *)zacep;
+			objacep = (ace_object_t *)acep;
+			bcopy(zobjacep->z_object_type,
+			    objacep->a_obj_type,
+			    sizeof (zobjacep->z_object_type));
+			bcopy(zobjacep->z_inherit_type,
+			    objacep->a_inherit_obj_type,
+			    sizeof (zobjacep->z_inherit_type));
+			ace_size = sizeof (ace_object_t);
+			break;
+		default:
+			ace_size = sizeof (ace_t);
+			break;
+		}
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		if ((entry_type != ACE_OWNER &&
+		    entry_type != OWNING_GROUP &&
+		    entry_type != ACE_EVERYONE)) {
+			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
+			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
+			    ZFS_ACE_GROUP : ZFS_ACE_USER);
+		} else {
+			acep->a_who = (uid_t)(int64_t)who;
+		}
+		acep->a_access_mask = access_mask;
+		acep->a_flags = iflags;
+		acep->a_type = type;
+		acep = (ace_t *)((caddr_t)acep + ace_size);
+	}
+}
+#endif
+
+static int
+zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,
+    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
+{
+	int i;
+	zfs_oldace_t *aceptr = z_acl;
+
+	for (i = 0; i != aclcnt; i++, aceptr++) {
+		aceptr->z_access_mask = acep[i].a_access_mask;
+		aceptr->z_type = acep[i].a_type;
+		aceptr->z_flags = acep[i].a_flags;
+		aceptr->z_fuid = acep[i].a_who;
+		/*
+		 * Make sure ACE is valid
+		 */
+		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,
+		    aceptr->z_flags) != B_TRUE)
+			return (SET_ERROR(EINVAL));
+	}
+	*size = (caddr_t)aceptr - (caddr_t)z_acl;
+	return (0);
+}
+
+/*
+ * convert old ACL format to new
+ */
+void
+zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
+{
+	zfs_oldace_t *oldaclp;
+	int i;
+	uint16_t type, iflags;
+	uint32_t access_mask;
+	uint64_t who;
+	void *cookie = NULL;
+	zfs_acl_node_t *newaclnode;
+
+	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
+	/*
+	 * First create the ACE in a contiguous piece of memory
+	 * for zfs_copy_ace_2_fuid().
+	 *
+	 * We only convert an ACL once, so this won't happen
+	 * everytime.
+	 */
+	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
+	    KM_SLEEP);
+	i = 0;
+	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
+	    &access_mask, &iflags, &type))) {
+		oldaclp[i].z_flags = iflags;
+		oldaclp[i].z_type = type;
+		oldaclp[i].z_fuid = who;
+		oldaclp[i++].z_access_mask = access_mask;
+	}
+
+	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
+	    sizeof (zfs_object_ace_t));
+	aclp->z_ops = &zfs_acl_fuid_ops;
+	VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, zp->z_mode, aclp,
+	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
+	    &newaclnode->z_size, NULL, cr) == 0);
+	newaclnode->z_ace_count = aclp->z_acl_count;
+	aclp->z_version = ZFS_ACL_VERSION;
+	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
+
+	/*
+	 * Release all previous ACL nodes
+	 */
+
+	zfs_acl_release_nodes(aclp);
+
+	list_insert_head(&aclp->z_acl, newaclnode);
+
+	aclp->z_acl_bytes = newaclnode->z_size;
+	aclp->z_acl_count = newaclnode->z_ace_count;
+
+}
+
+/*
+ * Convert unix access mask to v4 access mask
+ */
+static uint32_t
+zfs_unix_to_v4(uint32_t access_mask)
+{
+	uint32_t new_mask = 0;
+
+	if (access_mask & S_IXOTH)
+		new_mask |= ACE_EXECUTE;
+	if (access_mask & S_IWOTH)
+		new_mask |= ACE_WRITE_DATA;
+	if (access_mask & S_IROTH)
+		new_mask |= ACE_READ_DATA;
+	return (new_mask);
+}
+
+static void
+zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
+    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
+{
+	uint16_t type = entry_type & ACE_TYPE_FLAGS;
+
+	aclp->z_ops->ace_mask_set(acep, access_mask);
+	aclp->z_ops->ace_type_set(acep, access_type);
+	aclp->z_ops->ace_flags_set(acep, entry_type);
+	if ((type != ACE_OWNER && type != OWNING_GROUP &&
+	    type != ACE_EVERYONE))
+		aclp->z_ops->ace_who_set(acep, fuid);
+}
+
+/*
+ * Determine mode of file based on ACL.
+ */
+uint64_t
+zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
+    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
+{
+	int		entry_type;
+	mode_t		mode;
+	mode_t		seen = 0;
+	zfs_ace_hdr_t	*acep = NULL;
+	uint64_t	who;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	boolean_t	an_exec_denied = B_FALSE;
+
+
+	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		/*
+		 * Skip over any inherit_only ACEs
+		 */
+		if (iflags & ACE_INHERIT_ONLY_ACE)
+			continue;
+
+
+		/*
+		 * Apple has unusual expectations to emulate hfs in that the
+		 * mode is not updated:
+		 *      -rw-r--r--  1 root  wheel  0 Nov 12 12:39 file.txt
+		 * chmod +a "root allow execute" file.txt
+		 * ZFS: -rwxr--r--+ 1 root  wheel  0 Nov 12 12:39 file.txt
+		 * HFS: -rw-r--r--+ 1 root  wheel  0 Nov 12 12:39 file.txt
+		 *       0: user:root allow execute
+		 */
+		if (entry_type == ACE_OWNER) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRUSR))) {
+				seen |= S_IRUSR;
+				if (type == ALLOW) {
+					mode |= S_IRUSR;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWUSR))) {
+				seen |= S_IWUSR;
+				if (type == ALLOW) {
+					mode |= S_IWUSR;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXUSR))) {
+				seen |= S_IXUSR;
+				if (type == ALLOW) {
+					mode |= S_IXUSR;
+				}
+			}
+		} else if (entry_type == OWNING_GROUP) {
+			if ((access_mask & ACE_READ_DATA) &&
+			    (!(seen & S_IRGRP))) {
+				seen |= S_IRGRP;
+				if (type == ALLOW) {
+					mode |= S_IRGRP;
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA) &&
+			    (!(seen & S_IWGRP))) {
+				seen |= S_IWGRP;
+				if (type == ALLOW) {
+					mode |= S_IWGRP;
+				}
+			}
+			if ((access_mask & ACE_EXECUTE) &&
+			    (!(seen & S_IXGRP))) {
+				seen |= S_IXGRP;
+				if (type == ALLOW) {
+					mode |= S_IXGRP;
+				}
+			}
+		} else if (entry_type == ACE_EVERYONE) {
+			if ((access_mask & ACE_READ_DATA)) {
+				if (!(seen & S_IRUSR)) {
+					seen |= S_IRUSR;
+					if (type == ALLOW) {
+						mode |= S_IRUSR;
+					}
+				}
+				if (!(seen & S_IRGRP)) {
+					seen |= S_IRGRP;
+					if (type == ALLOW) {
+						mode |= S_IRGRP;
+					}
+				}
+				if (!(seen & S_IROTH)) {
+					seen |= S_IROTH;
+					if (type == ALLOW) {
+						mode |= S_IROTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_WRITE_DATA)) {
+				if (!(seen & S_IWUSR)) {
+					seen |= S_IWUSR;
+					if (type == ALLOW) {
+						mode |= S_IWUSR;
+					}
+				}
+				if (!(seen & S_IWGRP)) {
+					seen |= S_IWGRP;
+					if (type == ALLOW) {
+						mode |= S_IWGRP;
+					}
+				}
+				if (!(seen & S_IWOTH)) {
+					seen |= S_IWOTH;
+					if (type == ALLOW) {
+						mode |= S_IWOTH;
+					}
+				}
+			}
+			if ((access_mask & ACE_EXECUTE)) {
+				if (!(seen & S_IXUSR)) {
+					seen |= S_IXUSR;
+					if (type == ALLOW) {
+						mode |= S_IXUSR;
+					}
+				}
+				if (!(seen & S_IXGRP)) {
+					seen |= S_IXGRP;
+					if (type == ALLOW) {
+						mode |= S_IXGRP;
+					}
+				}
+				if (!(seen & S_IXOTH)) {
+					seen |= S_IXOTH;
+					if (type == ALLOW) {
+						mode |= S_IXOTH;
+					}
+				}
+			}
+		} else {
+			/*
+			 * Only care if this IDENTIFIER_GROUP or
+			 * USER ACE denies execute access to someone,
+			 * mode is not affected
+			 */
+			if ((access_mask & ACE_EXECUTE) && type == DENY)
+				an_exec_denied = B_TRUE;
+		}
+	}
+
+	/*
+	 * Failure to allow is effectively a deny, so execute permission
+	 * is denied if it was never mentioned or if we explicitly
+	 * weren't allowed it.
+	 */
+	if (!an_exec_denied &&
+	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
+	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
+		an_exec_denied = B_TRUE;
+
+	if (an_exec_denied)
+		*pflags &= ~ZFS_NO_EXECS_DENIED;
+	else
+		*pflags |= ZFS_NO_EXECS_DENIED;
+
+	return (mode);
+}
+
+/*
+ * Read an external acl object.  If the intent is to modify, always
+ * create a new acl and leave any cached acl in place.
+ */
+int
+zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
+    boolean_t will_modify)
+{
+	zfs_acl_t	*aclp;
+	int		aclsize = 0;
+	int		acl_count = 0;
+	zfs_acl_node_t	*aclnode;
+	zfs_acl_phys_t	znode_acl;
+	int		version;
+	int		error;
+	boolean_t	drop_lock = B_FALSE;
+
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+
+	if (zp->z_acl_cached && !will_modify) {
+		*aclpp = zp->z_acl_cached;
+		return (0);
+	}
+
+	/*
+	 * close race where znode could be upgrade while trying to
+	 * read the znode attributes.
+	 *
+	 * But this could only happen if the file isn't already an SA
+	 * znode
+	 */
+	if (!zp->z_is_sa && !have_lock) {
+		mutex_enter(&zp->z_lock);
+		drop_lock = B_TRUE;
+	}
+	version = zfs_znode_acl_version(zp);
+
+	if ((error = zfs_acl_znode_info(zp, &aclsize,
+	    &acl_count, &znode_acl)) != 0) {
+		goto done;
+	}
+
+	aclp = zfs_acl_alloc(version);
+
+	aclp->z_acl_count = acl_count;
+	aclp->z_acl_bytes = aclsize;
+
+	aclnode = zfs_acl_node_alloc(aclsize);
+	aclnode->z_ace_count = aclp->z_acl_count;
+	aclnode->z_size = aclsize;
+
+	if (!zp->z_is_sa) {
+		if (znode_acl.z_acl_extern_obj) {
+			error = dmu_read(zp->z_zfsvfs->z_os,
+			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
+			    aclnode->z_acldata, DMU_READ_PREFETCH);
+		} else {
+			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
+			    aclnode->z_size);
+		}
+	} else {
+		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
+		    aclnode->z_acldata, aclnode->z_size);
+	}
+
+	if (error != 0) {
+		zfs_acl_free(aclp);
+		zfs_acl_node_free(aclnode);
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+		goto done;
+	}
+
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	*aclpp = aclp;
+	if (!will_modify)
+		zp->z_acl_cached = aclp;
+done:
+	if (drop_lock)
+		mutex_exit(&zp->z_lock);
+	return (error);
+}
+
+/*ARGSUSED*/
+void
+zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
+    boolean_t start, void *userdata)
+{
+	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
+
+	if (start) {
+		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
+	} else {
+		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
+		    cb->cb_acl_node);
+	}
+	*dataptr = cb->cb_acl_node->z_acldata;
+	*length = cb->cb_acl_node->z_size;
+}
+
+int
+zfs_acl_chown_setattr(znode_t *zp)
+{
+	int error;
+	zfs_acl_t *aclp;
+
+	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACLTYPE_POSIXACL)
+		return (0);
+
+	ASSERT(MUTEX_HELD(&zp->z_lock));
+	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+
+	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
+	if (error == 0 && aclp->z_acl_count > 0)
+		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
+		    &zp->z_pflags, zp->z_uid, zp->z_gid);
+
+	/*
+	 * Some ZFS implementations (ZEVO) create neither a ZNODE_ACL
+	 * nor a DACL_ACES SA in which case ENOENT is returned from
+	 * zfs_acl_node_read() when the SA can't be located.
+	 * Allow chown/chgrp to succeed in these cases rather than
+	 * returning an error that makes no sense in the context of
+	 * the caller.
+	 */
+	if (error == ENOENT)
+		return (0);
+
+	return (error);
+}
+
+/*
+ * common code for setting ACLs.
+ *
+ * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
+ * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
+ * already checked the acl and knows whether to inherit.
+ */
+int
+zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
+{
+	int			error;
+	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
+	dmu_object_type_t	otype;
+	zfs_acl_locator_cb_t	locate = { 0 };
+	uint64_t		mode;
+	sa_bulk_attr_t		bulk[5];
+	uint64_t		ctime[2];
+	int			count = 0;
+	zfs_acl_phys_t		acl_phys;
+
+	mode = zp->z_mode;
+
+	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
+	    zp->z_uid, zp->z_gid);
+
+	zp->z_mode = mode;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, sizeof (ctime));
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	/*
+	 * Upgrade needed?
+	 */
+	if (!zfsvfs->z_use_fuids) {
+		otype = DMU_OT_OLDACL;
+	} else {
+		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
+		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
+			zfs_acl_xform(zp, aclp, cr);
+		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
+		otype = DMU_OT_ACL;
+	}
+
+	/*
+	 * Arrgh, we have to handle old on disk format
+	 * as well as newer (preferred) SA format.
+	 */
+
+	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
+		locate.cb_aclp = aclp;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
+		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
+	} else { /* Painful legacy way */
+		zfs_acl_node_t *aclnode;
+		uint64_t off = 0;
+		uint64_t aoid;
+
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) != 0)
+			return (error);
+
+		aoid = acl_phys.z_acl_extern_obj;
+
+		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			/*
+			 * If ACL was previously external and we are now
+			 * converting to new ACL format then release old
+			 * ACL object and create a new one.
+			 */
+			if (aoid &&
+			    aclp->z_version != acl_phys.z_acl_version) {
+				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
+				if (error)
+					return (error);
+				aoid = 0;
+			}
+			if (aoid == 0) {
+				aoid = dmu_object_alloc(zfsvfs->z_os,
+				    otype, aclp->z_acl_bytes,
+				    otype == DMU_OT_ACL ?
+				    DMU_OT_SYSACL : DMU_OT_NONE,
+				    otype == DMU_OT_ACL ?
+				    DN_OLD_MAX_BONUSLEN : 0, tx);
+			} else {
+				(void) dmu_object_set_blocksize(zfsvfs->z_os,
+				    aoid, aclp->z_acl_bytes, 0, tx);
+			}
+			acl_phys.z_acl_extern_obj = aoid;
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				dmu_write(zfsvfs->z_os, aoid, off,
+				    aclnode->z_size, aclnode->z_acldata, tx);
+				off += aclnode->z_size;
+			}
+		} else {
+			void *start = acl_phys.z_ace_data;
+			/*
+			 * Migrating back embedded?
+			 */
+			if (acl_phys.z_acl_extern_obj) {
+				error = dmu_object_free(zfsvfs->z_os,
+				    acl_phys.z_acl_extern_obj, tx);
+				if (error)
+					return (error);
+				acl_phys.z_acl_extern_obj = 0;
+			}
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
+		}
+		/*
+		 * If Old version then swap count/bytes to match old
+		 * layout of znode_acl_phys_t.
+		 */
+		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+			acl_phys.z_acl_size = aclp->z_acl_count;
+			acl_phys.z_acl_count = aclp->z_acl_bytes;
+		} else {
+			acl_phys.z_acl_size = aclp->z_acl_bytes;
+			acl_phys.z_acl_count = aclp->z_acl_count;
+		}
+		acl_phys.z_acl_version = aclp->z_version;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (acl_phys));
+	}
+
+	/*
+	 * Replace ACL wide bits, but first clear them.
+	 */
+	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
+
+	zp->z_pflags |= aclp->z_hints;
+
+	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
+		zp->z_pflags |= ZFS_ACL_TRIVIAL;
+
+	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
+	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
+}
+
+static void
+zfs_acl_chmod(umode_t umode, uint64_t mode, boolean_t split, boolean_t trim,
+    zfs_acl_t *aclp)
+{
+	void		*acep = NULL;
+	uint64_t	who;
+	int		new_count, new_bytes;
+	int		ace_size;
+	int		entry_type;
+	uint16_t	iflags, type;
+	uint32_t	access_mask;
+	zfs_acl_node_t	*newnode;
+	size_t		abstract_size = aclp->z_ops->ace_abstract_size();
+	void		*zacep;
+	boolean_t	isdir;
+	trivial_acl_t	masks;
+
+	new_count = new_bytes = 0;
+
+	isdir = S_ISDIR(umode);
+
+	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
+
+	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
+
+	zacep = newnode->z_acldata;
+	if (masks.allow0) {
+		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	} if (masks.deny1) {
+		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+	if (masks.deny2) {
+		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
+		zacep = (void *)((uintptr_t)zacep + abstract_size);
+		new_count++;
+		new_bytes += abstract_size;
+	}
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+		/*
+		 * ACEs used to represent the file mode may be divided
+		 * into an equivalent pair of inherit-only and regular
+		 * ACEs, if they are inheritable.
+		 * Skip regular ACEs, which are replaced by the new mode.
+		 */
+		if (split && (entry_type == ACE_OWNER ||
+		    entry_type == OWNING_GROUP ||
+		    entry_type == ACE_EVERYONE)) {
+			if (!isdir || !(iflags &
+			    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+				continue;
+			/*
+			 * We preserve owner@, group@, or @everyone
+			 * permissions, if they are inheritable, by
+			 * copying them to inherit_only ACEs. This
+			 * prevents inheritable permissions from being
+			 * altered along with the file mode.
+			 */
+			iflags |= ACE_INHERIT_ONLY_ACE;
+		}
+
+		/*
+		 * If this ACL has any inheritable ACEs, mark that in
+		 * the hints (which are later masked into the pflags)
+		 * so create knows to do inheritance.
+		 */
+		if (isdir && (iflags &
+		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
+			aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		if ((type != ALLOW && type != DENY) ||
+		    (iflags & ACE_INHERIT_ONLY_ACE)) {
+			switch (type) {
+			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
+				break;
+			}
+		} else {
+			/*
+			 * Limit permissions granted by ACEs to be no greater
+			 * than permissions of the requested group mode.
+			 * Applies when the "aclmode" property is set to
+			 * "groupmask".
+			 */
+			if ((type == ALLOW) && trim)
+				access_mask &= masks.group;
+		}
+		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
+		ace_size = aclp->z_ops->ace_size(acep);
+		zacep = (void *)((uintptr_t)zacep + ace_size);
+		new_count++;
+		new_bytes += ace_size;
+	}
+	zfs_set_ace(aclp, zacep, masks.owner, ALLOW, -1, ACE_OWNER);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.group, ALLOW, -1, OWNING_GROUP);
+	zacep = (void *)((uintptr_t)zacep + abstract_size);
+	zfs_set_ace(aclp, zacep, masks.everyone, ALLOW, -1, ACE_EVERYONE);
+
+	new_count += 3;
+	new_bytes += abstract_size * 3;
+	zfs_acl_release_nodes(aclp);
+	aclp->z_acl_count = new_count;
+	aclp->z_acl_bytes = new_bytes;
+	newnode->z_ace_count = new_count;
+	newnode->z_size = new_bytes;
+	list_insert_tail(&aclp->z_acl, newnode);
+}
+
+int
+zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
+{
+	int error = 0;
+
+	mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+
+	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
+		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
+	else
+		error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
+
+	if (error == 0) {
+		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
+		zfs_acl_chmod(zp->z_mode, mode, B_TRUE,
+		    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
+	}
+	mutex_exit(&zp->z_lock);
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+/*
+ * Should ACE be inherited?
+ */
+static int
+zfs_ace_can_use(umode_t umode, uint16_t acep_flags)
+{
+	int	iflags = (acep_flags & 0xf);
+
+	if (S_ISDIR(umode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
+		return (1);
+	else if (iflags & ACE_FILE_INHERIT_ACE)
+		return (!S_ISDIR((umode) &&
+		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
+	return (0);
+}
+
+/*
+ * inherit inheritable ACEs from parent
+ */
+static zfs_acl_t *
+zfs_acl_inherit(zfsvfs_t *zfsvfs, zfs_acl_t *paclp,
+    uint64_t umode, boolean_t *need_chmod)
+{
+	void		*pacep = NULL;
+	void		*acep;
+	zfs_acl_node_t  *aclnode;
+	zfs_acl_t	*aclp = NULL;
+	uint64_t	who;
+	uint32_t	access_mask;
+	uint16_t	iflags, newflags, type;
+	size_t		ace_size;
+	void		*data1, *data2;
+	size_t		data1sz, data2sz;
+	uint_t		aclinherit;
+	boolean_t	isdir = S_ISDIR(umode);
+	boolean_t	islnk = S_ISLNK(umode);
+	boolean_t	isreg = S_ISREG(umode);
+
+	*need_chmod = B_TRUE;
+
+	aclp = zfs_acl_alloc(paclp->z_version);
+	aclinherit = zfsvfs->z_acl_inherit;
+	if (aclinherit == ZFS_ACL_DISCARD || islnk)
+		return (aclp);
+
+	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
+	    &access_mask, &iflags, &type))) {
+
+		/*
+		 * don't inherit bogus ACEs
+		 */
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		/*
+		 * Check if ACE is inheritable by this vnode
+		 */
+		if ((aclinherit == ZFS_ACL_NOALLOW && type == ALLOW) ||
+		    !zfs_ace_can_use(umode, iflags))
+			continue;
+
+		/*
+		 * If owner@, group@, or everyone@ inheritable
+		 * then zfs_acl_chmod() isn't needed.
+		 */
+		if ((aclinherit == ZFS_ACL_PASSTHROUGH ||
+		    aclinherit == ZFS_ACL_PASSTHROUGH_X) &&
+		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
+		    ((iflags & OWNING_GROUP) == OWNING_GROUP)) &&
+		    (isreg || (isdir && (iflags & ACE_DIRECTORY_INHERIT_ACE))))
+			*need_chmod = B_FALSE;
+
+		/*
+		 * Strip inherited execute permission from file if
+		 * not in mode
+		 */
+		if (aclinherit == ZFS_ACL_PASSTHROUGH_X && type == ALLOW &&
+		    !isdir && ((umode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)) {
+			access_mask &= ~ACE_EXECUTE;
+		}
+
+		/*
+		 * Strip write_acl and write_owner from permissions
+		 * when inheriting an ACE
+		 */
+		if (aclinherit == ZFS_ACL_RESTRICTED && type == ALLOW) {
+			access_mask &= ~RESTRICTED_CLEAR;
+		}
+
+		ace_size = aclp->z_ops->ace_size(pacep);
+		aclnode = zfs_acl_node_alloc(ace_size);
+		list_insert_tail(&aclp->z_acl, aclnode);
+		acep = aclnode->z_acldata;
+
+		zfs_set_ace(aclp, acep, access_mask, type,
+		    who, iflags|ACE_INHERITED_ACE);
+
+		/*
+		 * Copy special opaque data if any
+		 */
+		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
+			VERIFY((data2sz = aclp->z_ops->ace_data(acep,
+			    &data2)) == data1sz);
+			bcopy(data1, data2, data2sz);
+		}
+
+		aclp->z_acl_count++;
+		aclnode->z_ace_count++;
+		aclp->z_acl_bytes += aclnode->z_size;
+		newflags = aclp->z_ops->ace_flags_get(acep);
+
+		/*
+		 * If ACE is not to be inherited further, or if the vnode is
+		 * not a directory, remove all inheritance flags
+		 */
+		if (!isdir || (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)) {
+			newflags &= ~ALL_INHERIT;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+			continue;
+		}
+
+		/*
+		 * This directory has an inheritable ACE
+		 */
+		aclp->z_hints |= ZFS_INHERIT_ACE;
+
+		/*
+		 * If only FILE_INHERIT is set then turn on
+		 * inherit_only
+		 */
+		if ((iflags & (ACE_FILE_INHERIT_ACE |
+		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
+			newflags |= ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		} else {
+			newflags &= ~ACE_INHERIT_ONLY_ACE;
+			aclp->z_ops->ace_flags_set(acep,
+			    newflags|ACE_INHERITED_ACE);
+		}
+	}
+
+	return (aclp);
+}
+
+/*
+ * Create file system object initial permissions
+ * including inheritable ACEs.
+ * Also, create FUIDs for owner and group.
+ */
+int
+zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
+    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
+{
+	int		error;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	zfs_acl_t	*paclp;
+	gid_t		gid;
+	boolean_t	need_chmod = B_TRUE;
+	boolean_t	trim = B_FALSE;
+	boolean_t	inherited = B_FALSE;
+
+	bzero(acl_ids, sizeof (zfs_acl_ids_t));
+	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+
+	if (vsecp)
+		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
+		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
+			return (error);
+	/*
+	 * Determine uid and gid.
+	 */
+	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
+	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
+		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
+		    (uint64_t)vap->va_uid, cr,
+		    ZFS_OWNER, &acl_ids->z_fuidp);
+		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
+		    (uint64_t)vap->va_gid, cr,
+		    ZFS_GROUP, &acl_ids->z_fuidp);
+		gid = vap->va_gid;
+	} else {
+		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
+		    cr, &acl_ids->z_fuidp);
+		acl_ids->z_fgid = 0;
+		if (vap->va_mask & ATTR_GID)  {
+			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_gid,
+			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
+			gid = vap->va_gid;
+			if (acl_ids->z_fgid != dzp->z_gid &&
+			    !groupmember(vap->va_gid, cr) &&
+			    secpolicy_vnode_create_gid(cr) != 0)
+				acl_ids->z_fgid = 0;
+		}
+		if (acl_ids->z_fgid == 0) {
+			if (dzp->z_mode & S_ISGID) {
+				char		*domain;
+				uint32_t	rid;
+
+				acl_ids->z_fgid = dzp->z_gid;
+				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
+				    cr, ZFS_GROUP);
+
+				if (zfsvfs->z_use_fuids &&
+				    IS_EPHEMERAL(acl_ids->z_fgid)) {
+					domain = zfs_fuid_idx_domain(
+					    &zfsvfs->z_fuid_idx,
+					    FUID_INDEX(acl_ids->z_fgid));
+					rid = FUID_RID(acl_ids->z_fgid);
+					zfs_fuid_node_add(&acl_ids->z_fuidp,
+					    domain, rid,
+					    FUID_INDEX(acl_ids->z_fgid),
+					    acl_ids->z_fgid, ZFS_GROUP);
+				}
+			} else {
+				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
+				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
+#ifdef __FreeBSD__
+				gid = acl_ids->z_fgid = dzp->z_gid;
+#else
+				gid = crgetgid(cr);
+#endif
+			}
+		}
+	}
+
+	/*
+	 * If we're creating a directory, and the parent directory has the
+	 * set-GID bit set, set in on the new directory.
+	 * Otherwise, if the user is neither privileged nor a member of the
+	 * file's new group, clear the file's set-GID bit.
+	 */
+
+	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
+	    (vap->va_type == VDIR)) {
+		acl_ids->z_mode |= S_ISGID;
+	} else {
+		if ((acl_ids->z_mode & S_ISGID) &&
+		    secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0)
+			acl_ids->z_mode &= ~S_ISGID;
+	}
+
+	if (acl_ids->z_aclp == NULL) {
+		mutex_enter(&dzp->z_acl_lock);
+		mutex_enter(&dzp->z_lock);
+		if (!(flag & IS_ROOT_NODE) &&
+		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
+		    !(dzp->z_pflags & ZFS_XATTR)) {
+			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
+			    &paclp, B_FALSE));
+			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
+			    paclp, acl_ids->z_mode, &need_chmod);
+			inherited = B_TRUE;
+		} else {
+			acl_ids->z_aclp =
+			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+		}
+		mutex_exit(&dzp->z_lock);
+		mutex_exit(&dzp->z_acl_lock);
+
+		if (need_chmod) {
+			if (vap->va_type == VDIR)
+				acl_ids->z_aclp->z_hints |=
+				    ZFS_ACL_AUTO_INHERIT;
+
+			if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH &&
+			    zfsvfs->z_acl_inherit != ZFS_ACL_PASSTHROUGH_X)
+				trim = B_TRUE;
+			zfs_acl_chmod(vap->va_type, acl_ids->z_mode, B_FALSE,
+			    trim, acl_ids->z_aclp);
+		}
+	}
+
+	if (inherited || vsecp) {
+		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
+		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+	}
+
+	return (0);
+}
+
+/*
+ * Free ACL and fuid_infop, but not the acl_ids structure
+ */
+void
+zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
+{
+	if (acl_ids->z_aclp)
+		zfs_acl_free(acl_ids->z_aclp);
+	if (acl_ids->z_fuidp)
+		zfs_fuid_info_free(acl_ids->z_fuidp);
+	acl_ids->z_aclp = NULL;
+	acl_ids->z_fuidp = NULL;
+}
+
+boolean_t
+zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
+{
+	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
+	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
+	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
+	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
+}
+
+/*
+ * Retrieve a file's ACL
+ */
+int
+zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclcheck,
+    cred_t *cr)
+{
+	struct kauth_acl **aclpp = (struct kauth_acl **)vsecp;
+	zfs_acl_t	*aclp;
+	kauth_acl_t	k_acl;
+	u_int32_t	ace_flags = 0;
+	kauth_ace_rights_t rights = 0;
+	guid_t		*guidp;
+	uint64_t	who;
+	uint32_t	access_mask;
+	uint16_t	flags;
+	uint16_t	type;
+	int		i;
+	int		error;
+	void		*zacep = NULL;
+
+	mutex_enter(&zp->z_acl_lock);
+
+	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_TRUE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+	if ((k_acl = kauth_acl_alloc(aclp->z_acl_count)) == NULL) {
+		mutex_exit(&zp->z_acl_lock);
+		*aclpp = (kauth_acl_t)KAUTH_FILESEC_NONE;
+		return (ENOMEM);
+	}
+
+	dprintf("acl_count %d\n", aclp->z_acl_count);
+
+	k_acl->acl_entrycount = aclp->z_acl_count;
+	k_acl->acl_flags = 0;
+	*aclpp = k_acl;
+
+	/*
+	 * Translate Open Solaris ACEs to Mac OS X ACLs
+	 */
+	i = 0;
+	while ((zacep = zfs_acl_next_ace(aclp, zacep,
+	    &who, &access_mask, &flags, &type))) {
+		rights = 0;
+		ace_flags = 0;
+
+		guidp = &k_acl->acl_ace[i].ace_applicable;
+
+		if (flags & ACE_OWNER) {
+#if HIDE_TRIVIAL_ACL
+			continue;
+#endif
+			who = -1;
+			nfsacl_set_wellknown(KAUTH_WKG_OWNER, guidp);
+		} else if ((flags & OWNING_GROUP) == OWNING_GROUP) {
+#if HIDE_TRIVIAL_ACL
+			continue;
+#endif
+			who = -1;
+			nfsacl_set_wellknown(KAUTH_WKG_GROUP, guidp);
+		} else if (flags & ACE_EVERYONE) {
+#if HIDE_TRIVIAL_ACL
+			continue;
+#endif
+			who = -1;
+			nfsacl_set_wellknown(KAUTH_WKG_EVERYBODY, guidp);
+			/* Try to get a guid from our uid */
+		} else {
+
+			dprintf("ZFS: trying to map uid %d flags %x type %x\n",
+			    who, flags, type);
+
+			if (flags & OWNING_GROUP) {
+				if (kauth_cred_gid2guid(who, guidp) == 0) {
+					dprintf("ZFS: appears to be a group\n");
+				}
+			} else if (kauth_cred_uid2guid(who, guidp) == 0) {
+				dprintf("ZFS: appears to be a user\n");
+			} else {
+				dprintf("ZFS: Unable to map\n");
+				bzero(guidp, sizeof (guid_t));
+			}
+		}
+
+		// access_mask = aclp->z_acl[i].a_access_mask;
+		if (access_mask & ACE_READ_DATA)
+			rights |= KAUTH_VNODE_READ_DATA;
+		if (access_mask & ACE_WRITE_DATA)
+			rights |= KAUTH_VNODE_WRITE_DATA;
+		if (access_mask & ACE_APPEND_DATA)
+			rights |= KAUTH_VNODE_APPEND_DATA;
+		if (access_mask & ACE_READ_NAMED_ATTRS)
+			rights |= KAUTH_VNODE_READ_EXTATTRIBUTES;
+		if (access_mask & ACE_WRITE_NAMED_ATTRS)
+			rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES;
+		if (access_mask & ACE_EXECUTE)
+			rights |= KAUTH_VNODE_EXECUTE;
+		if (access_mask & ACE_DELETE_CHILD)
+			rights |= KAUTH_VNODE_DELETE_CHILD;
+		if (access_mask & ACE_READ_ATTRIBUTES)
+			rights |= KAUTH_VNODE_READ_ATTRIBUTES;
+		if (access_mask & ACE_WRITE_ATTRIBUTES)
+			rights |= KAUTH_VNODE_WRITE_ATTRIBUTES;
+		if (access_mask & ACE_DELETE)
+			rights |= KAUTH_VNODE_DELETE;
+		if (access_mask & ACE_READ_ACL)
+			rights |= KAUTH_VNODE_READ_SECURITY;
+		if (access_mask & ACE_WRITE_ACL)
+			rights |= KAUTH_VNODE_WRITE_SECURITY;
+		if (access_mask & ACE_WRITE_OWNER)
+			rights |= KAUTH_VNODE_TAKE_OWNERSHIP;
+		if (access_mask & ACE_SYNCHRONIZE)
+			rights |= KAUTH_VNODE_SYNCHRONIZE;
+		k_acl->acl_ace[i].ace_rights = rights;
+
+		// flags = aclp->z_acl[i].a_flags;
+		if (flags & ACE_FILE_INHERIT_ACE)
+			ace_flags |= KAUTH_ACE_FILE_INHERIT;
+		if (flags & ACE_DIRECTORY_INHERIT_ACE)
+			ace_flags |= KAUTH_ACE_DIRECTORY_INHERIT;
+		if (flags & ACE_NO_PROPAGATE_INHERIT_ACE)
+			ace_flags |= KAUTH_ACE_LIMIT_INHERIT;
+		if (flags & ACE_INHERIT_ONLY_ACE)
+			ace_flags |= KAUTH_ACE_ONLY_INHERIT;
+
+		// type = aclp->z_acl[i].a_type;
+		switch (type) {
+			case ACE_ACCESS_ALLOWED_ACE_TYPE:
+				ace_flags |= KAUTH_ACE_PERMIT;
+				break;
+			case ACE_ACCESS_DENIED_ACE_TYPE:
+				ace_flags |= KAUTH_ACE_DENY;
+				break;
+			case ACE_SYSTEM_AUDIT_ACE_TYPE:
+				ace_flags |= KAUTH_ACE_AUDIT;
+				break;
+			case ACE_SYSTEM_ALARM_ACE_TYPE:
+				ace_flags |= KAUTH_ACE_ALARM;
+				break;
+		}
+		k_acl->acl_ace[i].ace_flags = ace_flags;
+		i++;
+	}
+	k_acl->acl_entrycount = i;
+	mutex_exit(&zp->z_acl_lock);
+
+	zfs_acl_free(aclp);
+
+	return (0);
+}
+
+int
+zfs_addacl_trivial(znode_t *zp, ace_t *aces, int *nentries, int seen_type)
+{
+	zfs_acl_t	*aclp;
+	uint64_t	who;
+	uint32_t	access_mask;
+	uint16_t	flags;
+	uint16_t	type;
+	int		i;
+	int		error;
+	void		*zacep = NULL;
+
+	mutex_enter(&zp->z_acl_lock);
+
+	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_TRUE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	dprintf("ondisk acl_count %d\n", aclp->z_acl_count);
+
+	// Start at the end
+	i = *nentries;
+
+	/*
+	 * Translate Open Solaris ACEs to Mac OS X ACLs
+	 */
+	while ((zacep = zfs_acl_next_ace(aclp, zacep,
+	    &who, &access_mask, &flags, &type))) {
+
+		if (flags & ACE_OWNER) {
+			if (seen_type & ACE_OWNER) continue;
+			seen_type |= ACE_OWNER;
+			who = -1;
+		} else if ((flags & OWNING_GROUP) == OWNING_GROUP) {
+			if (seen_type & ACE_GROUP) continue;
+			seen_type |= ACE_GROUP;
+			who = -1;
+		} else if (flags & ACE_EVERYONE) {
+			if (seen_type & ACE_EVERYONE) continue;
+			seen_type |= ACE_EVERYONE;
+			who = -1;
+			/* Try to get a guid from our uid */
+		} else {
+			// Only deal with the trivials
+			continue;
+		}
+
+		aces[i].a_who = who;
+		aces[i].a_access_mask = access_mask;
+		aces[i].a_flags = flags;
+		aces[i].a_type = type;
+
+		dprintf("zfs: adding entry %d for type %x sizeof %d\n", i, type,
+		    sizeof (aces[i]));
+		i++;
+	}
+
+	*nentries = i;
+	mutex_exit(&zp->z_acl_lock);
+
+	zfs_acl_free(aclp);
+
+	return (0);
+}
+
+
+int
+zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,
+    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
+{
+	zfs_acl_t *aclp;
+	zfs_acl_node_t *aclnode;
+	int aclcnt = vsecp->vsa_aclcnt;
+	int error;
+
+	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
+		return (SET_ERROR(EINVAL));
+
+	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
+
+	aclp->z_hints = 0;
+	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
+	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+		if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,
+		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
+		    aclcnt, &aclnode->z_size)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	} else {
+		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,
+		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
+		    &aclnode->z_size, fuidp, cr)) != 0) {
+			zfs_acl_free(aclp);
+			zfs_acl_node_free(aclnode);
+			return (error);
+		}
+	}
+	aclp->z_acl_bytes = aclnode->z_size;
+	aclnode->z_ace_count = aclcnt;
+	aclp->z_acl_count = aclcnt;
+	list_insert_head(&aclp->z_acl, aclnode);
+
+	/*
+	 * If flags are being set then add them to z_hints
+	 */
+	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
+		if (vsecp->vsa_aclflags & ACL_PROTECTED)
+			aclp->z_hints |= ZFS_ACL_PROTECTED;
+		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
+			aclp->z_hints |= ZFS_ACL_DEFAULTED;
+		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
+			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
+	}
+
+	*zaclp = aclp;
+
+	return (0);
+}
+
+
+
+/*
+ * Set a file's ACL
+ */
+int
+zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zilog_t		*zilog = zfsvfs->z_log;
+	dmu_tx_t	*tx;
+	int		error;
+	zfs_acl_t	*aclp;
+	zfs_fuid_info_t	*fuidp = NULL;
+	boolean_t	fuid_dirtied;
+	uint64_t	acl_obj;
+
+	if (zp->z_pflags & ZFS_IMMUTABLE)
+		return (SET_ERROR(EPERM));
+
+	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
+		return (error);
+
+	error = zfs_vsec_2_aclp(zfsvfs, vnode_vtype(ZTOV(zp)), vsecp, cr,
+	    &fuidp, &aclp);
+	if (error)
+		return (error);
+
+	/*
+	 * If ACL wide flags aren't being set then preserve any
+	 * existing flags.
+	 */
+
+top:
+	mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	/*
+	 * If old version and ACL won't fit in bonus and we aren't
+	 * upgrading then take out necessary DMU holds
+	 */
+
+	if ((acl_obj = zfs_external_acl(zp)) != 0) {
+		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
+			dmu_tx_hold_free(tx, acl_obj, 0,
+			    DMU_OBJECT_END);
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+			    aclp->z_acl_bytes);
+		} else {
+			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
+		}
+	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
+	}
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_NOWAIT);
+	if (error) {
+		mutex_exit(&zp->z_acl_lock);
+		mutex_exit(&zp->z_lock);
+
+		if (error == ERESTART) {
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zfs_acl_free(aclp);
+		return (error);
+	}
+
+	error = zfs_aclset_common(zp, aclp, cr, tx);
+	ASSERT(error == 0);
+	ASSERT(zp->z_acl_cached == NULL);
+	zp->z_acl_cached = aclp;
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
+
+	if (fuidp)
+		zfs_fuid_info_free(fuidp);
+	dmu_tx_commit(tx);
+
+	mutex_exit(&zp->z_lock);
+	mutex_exit(&zp->z_acl_lock);
+
+	return (error);
+}
+
+
+/*
+ * Check accesses of interest (AoI) against attributes of the dataset
+ * such as read-only.  Returns zero if no AoI conflict with dataset
+ * attributes, otherwise an appropriate errno is returned.
+ */
+static int
+zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
+{
+	if ((v4_mode & WRITE_MASK) &&
+	    (vfs_isrdonly(zp->z_zfsvfs->z_vfs)) &&
+	    (!IS_DEVVP(ZTOV(zp)) ||
+	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * Intentionally allow ZFS_READONLY through here.
+	 * See zfs_zaccess_common().
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    (zp->z_pflags & ZFS_IMMUTABLE)) {
+		return (EPERM);
+	}
+#ifdef sun
+	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
+	    (zp->z_pflags & ZFS_NOUNLINK)) {
+		return (EPERM);
+#else
+	/*
+	 * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK
+	 * (sunlnk) is set. We just don't allow directory removal, which is
+	 * handled in zfs_zaccess_delete().
+	 */
+	if ((v4_mode & ACE_DELETE) &&
+	    (zp->z_pflags & ZFS_NOUNLINK)) {
+		return (SET_ERROR(EPERM));
+	}
+#endif
+
+	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
+	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
+		return (SET_ERROR(EACCES));
+	}
+
+	return (0);
+}
+
+/*
+ * The primary usage of this function is to loop through all of the
+ * ACEs in the znode, determining what accesses of interest (AoI) to
+ * the caller are allowed or denied.  The AoI are expressed as bits in
+ * the working_mode parameter.  As each ACE is processed, bits covered
+ * by that ACE are removed from the working_mode.  This removal
+ * facilitates two things.  The first is that when the working mode is
+ * empty (= 0), we know we've looked at all the AoI. The second is
+ * that the ACE interpretation rules don't allow a later ACE to undo
+ * something granted or denied by an earlier ACE.  Removing the
+ * discovered access or denial enforces this rule.  At the end of
+ * processing the ACEs, all AoI that were found to be denied are
+ * placed into the working_mode, giving the caller a mask of denied
+ * accesses.  Returns:
+ *	0		if all AoI granted
+ *	EACCES		if the denied mask is non-zero
+ *	other error	if abnormal failure (e.g., IO error)
+ *
+ * A secondary usage of the function is to determine if any of the
+ * AoI are granted.  If an ACE grants any access in
+ * the working_mode, we immediately short circuit out of the function.
+ * This mode is chosen by setting anyaccess to B_TRUE.  The
+ * working_mode is not a denied access mask upon exit if the function
+ * is used in this manner.
+ */
+static int
+zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
+    boolean_t anyaccess, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	zfs_acl_t	*aclp;
+	int		error;
+	uid_t		uid = crgetuid(cr);
+	uint64_t	who;
+	uint16_t	type, iflags;
+	uint16_t	entry_type;
+	uint32_t	access_mask;
+	uint32_t	deny_mask = 0;
+	zfs_ace_hdr_t	*acep = NULL;
+	boolean_t	checkit;
+	uid_t		gowner;
+	uid_t		fowner;
+
+	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+
+	mutex_enter(&zp->z_acl_lock);
+
+	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
+	if (error != 0) {
+		mutex_exit(&zp->z_acl_lock);
+		return (error);
+	}
+
+	ASSERT(zp->z_acl_cached);
+
+	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
+	    &iflags, &type))) {
+		uint32_t mask_matched;
+
+		if (!zfs_acl_valid_ace_type(type, iflags))
+			continue;
+
+		if (vnode_isdir(ZTOV(zp)) && (iflags & ACE_INHERIT_ONLY_ACE))
+			continue;
+
+		/* Skip ACE if it does not affect any AoI */
+		mask_matched = (access_mask & *working_mode);
+		if (!mask_matched)
+			continue;
+
+		entry_type = (iflags & ACE_TYPE_FLAGS);
+
+		checkit = B_FALSE;
+
+		switch (entry_type) {
+		case ACE_OWNER:
+			if (uid == fowner)
+				checkit = B_TRUE;
+			break;
+		case OWNING_GROUP:
+			who = gowner;
+			/*FALLTHROUGH*/
+		case ACE_IDENTIFIER_GROUP:
+			checkit = zfs_groupmember(zfsvfs, who, cr);
+			break;
+		case ACE_EVERYONE:
+			checkit = B_TRUE;
+			break;
+
+		/* USER Entry */
+		default:
+			if (entry_type == 0) {
+				uid_t newid;
+
+				newid = zfs_fuid_map_id(zfsvfs, who, cr,
+				    ZFS_ACE_USER);
+				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
+				    uid == newid)
+					checkit = B_TRUE;
+				break;
+			} else {
+				mutex_exit(&zp->z_acl_lock);
+				return (SET_ERROR(EIO));
+			}
+		}
+
+		if (checkit) {
+			if (type == DENY) {
+				DTRACE_PROBE3(zfs__ace__denies,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				deny_mask |= mask_matched;
+			} else {
+				DTRACE_PROBE3(zfs__ace__allows,
+				    znode_t *, zp,
+				    zfs_ace_hdr_t *, acep,
+				    uint32_t, mask_matched);
+				if (anyaccess) {
+					mutex_exit(&zp->z_acl_lock);
+					return (0);
+				}
+			}
+			*working_mode &= ~mask_matched;
+		}
+
+		/* Are we done? */
+		if (*working_mode == 0)
+			break;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+
+	/* Put the found 'denies' back on the working mode */
+	if (deny_mask) {
+		*working_mode |= deny_mask;
+		return (SET_ERROR(EACCES));
+	} else if (*working_mode) {
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Return true if any access whatsoever granted, we don't actually
+ * care what access is granted.
+ */
+boolean_t
+zfs_has_access(znode_t *zp, cred_t *cr)
+{
+	uint32_t have = ACE_ALL_PERMS;
+
+	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
+		uid_t owner;
+
+		owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+		return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
+	}
+	return (B_TRUE);
+}
+
+static int
+zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
+    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int err;
+
+	*working_mode = v4_mode;
+	*check_privs = B_TRUE;
+
+	/*
+	 * Short circuit empty requests
+	 */
+	if (v4_mode == 0 || zfsvfs->z_replay) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
+		*check_privs = B_FALSE;
+		return (err);
+	}
+
+	/*
+	 * The caller requested that the ACL check be skipped.  This
+	 * would only happen if the caller checked VOP_ACCESS() with a
+	 * 32 bit ACE mask and already had the appropriate permissions.
+	 */
+	if (skipaclchk) {
+		*working_mode = 0;
+		return (0);
+	}
+
+	/*
+	 * Note: ZFS_READONLY represents the "DOS R/O" attribute.
+	 * When that flag is set, we should behave as if write access
+	 * were not granted by anything in the ACL.  In particular:
+	 * We _must_ allow writes after opening the file r/w, then
+	 * setting the DOS R/O attribute, and writing some more.
+	 * (Similar to how you can write after fchmod(fd, 0444).)
+	 *
+	 * Therefore ZFS_READONLY is ignored in the dataset check
+	 * above, and checked here as if part of the ACL check.
+	 * Also note: DOS R/O is ignored for directories.
+	 */
+	if ((v4_mode & WRITE_MASK_DATA) &&
+	    !vnode_isdir(ZTOV(zp)) &&
+	    (zp->z_pflags & ZFS_READONLY)) {
+		return (SET_ERROR(EPERM));
+	}
+
+	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
+}
+
+static int
+zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
+    cred_t *cr)
+{
+	if (*working_mode != ACE_WRITE_DATA)
+		return (SET_ERROR(EACCES));
+
+	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
+	    check_privs, B_FALSE, cr));
+}
+
+int
+zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
+{
+	boolean_t owner = B_FALSE;
+	boolean_t groupmbr = B_FALSE;
+	boolean_t is_attr;
+	uid_t uid = crgetuid(cr);
+	int error;
+
+	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
+		return (SET_ERROR(EACCES));
+
+	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
+	    (vnode_isdir(ZTOV(zdp))));
+	if (is_attr)
+		goto slow;
+
+
+	mutex_enter(&zdp->z_acl_lock);
+
+	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
+		mutex_exit(&zdp->z_acl_lock);
+		return (0);
+	}
+
+	if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
+		mutex_exit(&zdp->z_acl_lock);
+		goto slow;
+	}
+
+	if (uid == zdp->z_uid) {
+		owner = B_TRUE;
+		if (zdp->z_mode & S_IXUSR) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (groupmember(zdp->z_gid, cr)) {
+		groupmbr = B_TRUE;
+		if (zdp->z_mode & S_IXGRP) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		} else {
+			mutex_exit(&zdp->z_acl_lock);
+			goto slow;
+		}
+	}
+	if (!owner && !groupmbr) {
+		if (zdp->z_mode & S_IXOTH) {
+			mutex_exit(&zdp->z_acl_lock);
+			return (0);
+		}
+	}
+
+	mutex_exit(&zdp->z_acl_lock);
+
+slow:
+	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
+	ZFS_ENTER(zdp->z_zfsvfs);
+	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
+	ZFS_EXIT(zdp->z_zfsvfs);
+	return (error);
+}
+
+/*
+ * Determine whether Access should be granted/denied.
+ *
+ * The least priv subsytem is always consulted as a basic privilege
+ * can define any form of access.
+ */
+int
+zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
+{
+	uint32_t	working_mode;
+	int		error;
+	int		is_attr;
+	boolean_t 	check_privs;
+	znode_t		*xzp = NULL;
+	znode_t 	*check_zp = zp;
+	mode_t		needed_bits;
+	uid_t		owner;
+
+	is_attr = ((zp->z_pflags & ZFS_XATTR) && (vnode_isdir(ZTOV(zp))));
+
+#ifdef __APPLE__
+	/*
+	 * In FreeBSD, we don't care about permissions of individual ADS.
+	 * Note that not checking them is not just an optimization - without
+	 * this shortcut, EA operations may bogusly fail with EACCES.
+	 */
+	if (zp->z_pflags & ZFS_XATTR)
+		return (0);
+#else
+	/*
+	 * If attribute then validate against base file
+	 */
+	if (is_attr) {
+		uint64_t	parent;
+
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
+		    sizeof (parent))) != 0)
+			return (error);
+
+			/*
+			 * Cache the lookup on the parent file znode as
+			 * zp->z_xattr_parent and hold a reference.  This
+			 * effectively pins the parent in memory until all
+			 * child xattr znodes have been destroyed and
+			 * release their references in zfs_inode_destroy().
+			 */
+			error = zfs_zget(zp->z_zfsvfs, parent, &check_zp);
+			if (error)
+				return (error);
+
+			rw_enter(&zp->z_xattr_lock, RW_WRITER);
+			if (zp->z_xattr_parent == NULL)
+				zp->z_xattr_parent = check_zp;
+			rw_exit(&zp->z_xattr_lock);
+		}
+
+		check_zp = xzp;
+
+		/*
+		 * fixup mode to map to xattr perms
+		 */
+
+		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
+			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
+			mode |= ACE_WRITE_NAMED_ATTRS;
+		}
+
+		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
+			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
+			mode |= ACE_READ_NAMED_ATTRS;
+		}
+	}
+#endif
+
+	owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+	/*
+	 * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
+	 * in needed_bits.  Map the bits mapped by working_mode (currently
+	 * missing) in missing_bits.
+	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
+	 * needed_bits.
+	 */
+	needed_bits = 0;
+
+	working_mode = mode;
+	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+	    owner == crgetuid(cr))
+		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= VREAD;
+	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+		needed_bits |= VWRITE;
+	if (working_mode & ACE_EXECUTE)
+		needed_bits |= VEXEC;
+
+	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
+	    &check_privs, skipaclchk, cr)) == 0) {
+		if (is_attr)
+			VN_RELE(ZTOV(xzp));
+		return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
+		    needed_bits, needed_bits));
+	}
+
+	if (error && !check_privs) {
+		if (is_attr)
+			VN_RELE(ZTOV(xzp));
+		return (error);
+	}
+
+	if (error && (flags & V_APPEND)) {
+		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
+	}
+
+	if (error && check_privs) {
+		mode_t		checkmode = 0;
+
+		/*
+		 * First check for implicit owner permission on
+		 * read_acl/read_attributes
+		 */
+
+		error = 0;
+		ASSERT(working_mode != 0);
+
+		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
+		    owner == crgetuid(cr)))
+			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
+
+		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
+		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= VREAD;
+		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
+		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
+			checkmode |= VWRITE;
+		if (working_mode & ACE_EXECUTE)
+			checkmode |= VEXEC;
+
+		error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
+		    needed_bits & ~checkmode, needed_bits);
+
+		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
+			error = secpolicy_vnode_chown(ZTOV(check_zp), cr,
+			    owner);
+		if (error == 0 && (working_mode & ACE_WRITE_ACL))
+			error = secpolicy_vnode_setdac(ZTOV(check_zp), cr,
+			    owner);
+
+		if (error == 0 && (working_mode &
+		    (ACE_DELETE|ACE_DELETE_CHILD)))
+			error = secpolicy_vnode_remove(ZTOV(check_zp), cr);
+
+		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
+			error = secpolicy_vnode_chown(ZTOV(check_zp), cr,
+			    owner);
+		}
+		if (error == 0) {
+			/*
+			 * See if any bits other than those already checked
+			 * for are still present.  If so then return EACCES
+			 */
+			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
+				error = SET_ERROR(EACCES);
+			}
+		}
+	} else if (error == 0) {
+		error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
+		    needed_bits, needed_bits);
+	}
+
+
+	if (is_attr)
+		VN_RELE(ZTOV(xzp));
+
+	return (error);
+}
+
+/*
+ * Translate traditional unix VREAD/VWRITE/VEXEC mode into
+ * native ACL format and call zfs_zaccess()
+ */
+int
+zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
+{
+	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
+}
+
+/*
+ * Access function for secpolicy_vnode_setattr
+ */
+int
+zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
+{
+	int v4_mode = zfs_unix_to_v4(mode >> 6);
+
+	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
+}
+
+/* See zfs_zaccess_delete() */
+uint64_t zfs_write_implies_delete_child = 1;
+
+/*
+ * Determine whether delete access should be granted.
+ *
+ * The following chart outlines how we handle delete permissions which is
+ * how recent versions of windows (Windows 2008) handles it.  The efficiency
+ * comes from not having to check the parent ACL where the object itself grants
+ * delete:
+ *
+ *      -------------------------------------------------------
+ *      |   Parent Dir  |      Target Object Permissions      |
+ *      |  permissions  |                                     |
+ *      -------------------------------------------------------
+ *      |               | ACL Allows | ACL Denies| Delete     |
+ *      |               |  Delete    |  Delete   | unspecified|
+ *      -------------------------------------------------------
+ *      | ACL Allows    | Permit     | Deny *    | Permit     |
+ *      | DELETE_CHILD  |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL Denies    | Permit     | Deny      | Deny       |
+ *      | DELETE_CHILD  |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL specifies |            |           |            |
+ *      | only allow    | Permit     | Deny *    | Permit     |
+ *      | write and     |            |           |            |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *      | ACL denies    |            |           |            |
+ *      | write and     | Permit     | Deny      | Deny       |
+ *      | execute       |            |           |            |
+ *      -------------------------------------------------------
+ *         ^
+ *         |
+ *         Re. execute permission on the directory:  if that's missing,
+ *        the vnode lookup of the target will fail before we get here.
+ *
+ * Re [*] in the table above:  NFSv4 would normally Permit delete for
+ * these two cells of the matrix.
+ * See acl.h for notes on which ACE_... flags should be checked for which
+ * operations.  Specifically, the NFSv4 committee recommendation is in
+ * conflict with the Windows interpretation of DENY ACEs, where DENY ACEs
+ * should take precedence ahead of ALLOW ACEs.
+ *
+ * This implementation always consults the target object's ACL first.
+ * If a DENY ACE is present on the target object that specifies ACE_DELETE,
+ * delete access is denied.  If an ALLOW ACE with ACE_DELETE is present on
+ * the target object, access is allowed.  If and only if no entries with
+ * ACE_DELETE are present in the object's ACL, check the container's ACL
+ * for entries with ACE_DELETE_CHILD.
+ *
+ * A summary of the logic implemented from the table above is as follows:
+ *
+ * First check for DENY ACEs that apply.
+ * If either target or container has a deny, EACCES.
+ *
+ * Delete access can then be summarized as follows:
+ * 1: The object to be deleted grants ACE_DELETE, or
+ * 2: The containing directory grants ACE_DELETE_CHILD.
+ * In a Windows system, that would be the end of the story.
+ * In this system, (2) has some complications...
+ * 2a: "sticky" bit on a directory adds restrictions, and
+ * 2b: existing ACEs from previous versions of ZFS may
+ * not carry ACE_DELETE_CHILD where they should, so we
+ * also allow delete when ACE_WRITE_DATA is granted.
+ *
+ * Note: 2b is technically a work-around for a prior bug,
+ * which hopefully can go away some day.  For those who
+ * no longer need the work around, and for testing, this
+ * work-around is made conditional via the tunable:
+ * zfs_write_implies_delete_child
+ */
+int
+zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
+{
+	uint32_t wanted_dirperms;
+	uint32_t dzp_working_mode = 0;
+	uint32_t zp_working_mode = 0;
+	int dzp_error, zp_error;
+	boolean_t dzpcheck_privs;
+	boolean_t zpcheck_privs;
+
+	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
+		return (SET_ERROR(EPERM));
+
+	/*
+	 * Case 1:
+	 * If target object grants ACE_DELETE then we are done.  This is
+	 * indicated by a return value of 0.  For this case we don't worry
+	 * about the sticky bit because sticky only applies to the parent
+	 * directory and this is the child access result.
+	 *
+	 * If we encounter a DENY ACE here, we're also done (EACCES).
+	 * Note that if we hit a DENY ACE here (on the target) it should
+	 * take precedence over a DENY ACE on the container, so that when
+	 * we have more complete auditing support we will be able to
+	 * report an access failure against the specific target.
+	 * (This is part of why we're checking the target first.)
+	 */
+	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
+	    &zpcheck_privs, B_FALSE, cr);
+	if (zp_error == EACCES) {
+		/* We hit a DENY ACE. */
+		if (!zpcheck_privs)
+			return (SET_ERROR(zp_error));
+
+		return (secpolicy_vnode_remove(ZTOV(zp), cr));
+	}
+	if (zp_error == 0)
+		return (0);
+
+	/*
+	 * Case 2:
+	 * If the containing directory grants ACE_DELETE_CHILD,
+	 * or we're in backward compatibility mode and the
+	 * containing directory has ACE_WRITE_DATA, allow.
+	 * Case 2b is handled with wanted_dirperms.
+	 */
+	wanted_dirperms = ACE_DELETE_CHILD;
+	if (zfs_write_implies_delete_child)
+		wanted_dirperms |= ACE_WRITE_DATA;
+	dzp_error = zfs_zaccess_common(dzp, wanted_dirperms,
+	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
+	if (dzp_error == EACCES) {
+		/* We hit a DENY ACE. */
+		if (!dzpcheck_privs)
+			return (SET_ERROR(dzp_error));
+		return (secpolicy_vnode_remove(ZTOV(zp), cr));
+	}
+
+	/*
+	 * Cases 2a, 2b (continued)
+	 *
+	 * Note: dzp_working_mode now contains any permissions
+	 * that were NOT granted.  Therefore, if any of the
+	 * wanted_dirperms WERE granted, we will have:
+	 *   dzp_working_mode != wanted_dirperms
+	 * We're really asking if ANY of those permissions
+	 * were granted, and if so, grant delete access.
+	 */
+	if (dzp_working_mode != wanted_dirperms)
+		dzp_error = 0;
+
+	/*
+	 * dzp_error is 0 if the container granted us permissions to "modify".
+	 * If we do not have permission via one or more ACEs, our current
+	 * privileges may still permit us to modify the container.
+	 *
+	 * dzpcheck_privs is false when i.e. the FS is read-only.
+	 * Otherwise, do privilege checks for the container.
+	 */
+	if (dzp_error != 0 && dzpcheck_privs) {
+		uid_t owner;
+		/*
+		 * The secpolicy call needs the requested access and
+		 * the current access mode of the container, but it
+		 * only knows about Unix-style modes (VEXEC, VWRITE),
+		 * so this must condense the fine-grained ACE bits into
+		 * Unix modes.
+		 *
+		 * The VEXEC flag is easy, because we know that has
+		 * always been checked before we get here (during the
+		 * lookup of the target vnode).  The container has not
+		 * granted us permissions to "modify", so we do not set
+		 * the VWRITE flag in the current access mode.
+		 */
+		owner = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr,
+		    ZFS_OWNER);
+		dzp_error = secpolicy_vnode_access2(cr, ZTOV(dzp),
+		    owner, VEXEC, VWRITE|VEXEC);
+	}
+	if (dzp_error != 0) {
+		/*
+		 * Note: We may have dzp_error = -1 here (from
+		 * zfs_zacess_common).  Don't return that.
+		 */
+		return (SET_ERROR(EACCES));
+	}
+
+	/*
+	 * At this point, we know that the directory permissions allow
+	 * us to modify, but we still need to check for the additional
+	 * restrictions that apply when the "sticky bit" is set.
+	 *
+	 * Yes, zfs_sticky_remove_access() also checks this bit, but
+	 * checking it here and skipping the call below is nice when
+	 * you're watching all of this with dtrace.
+	 */
+	if ((dzp->z_mode & S_ISVTX) == 0)
+		return (0);
+	/*
+	 * zfs_sticky_remove_access will succeed if:
+	 * 1. The sticky bit is absent.
+	 * 2. We pass the sticky bit restrictions.
+	 * 3. We have privileges that always allow file removal.
+	 */
+	return (zfs_sticky_remove_access(dzp, zp, cr));
+}
+
+int
+zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
+    znode_t *tzp, cred_t *cr)
+{
+	int add_perm;
+	int error;
+
+	if (szp->z_pflags & ZFS_AV_QUARANTINED)
+		return (SET_ERROR(EACCES));
+
+	add_perm = (vnode_isdir(ZTOV(szp))) ?
+	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
+
+	/*
+	 * Rename permissions are combination of delete permission +
+	 * add file/subdir permission.
+	 *
+	 * BSD operating systems also require write permission
+	 * on the directory being moved from one parent directory
+	 * to another.
+	 */
+	if (vnode_isdir(ZTOV(szp)) && ZTOV(sdzp) != ZTOV(tdzp)) {
+		if ((error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr)))
+			return (error);
+	}
+
+	/*
+	 * first make sure we do the delete portion.
+	 *
+	 * If that succeeds then check for add_file/add_subdir permissions
+	 */
+
+	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
+		return (error);
+
+	/*
+	 * If we have a tzp, see if we can delete it?
+	 */
+	if (tzp) {
+		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
+			return (error);
+	}
+
+	/*
+	 * Now check for add permissions
+	 */
+	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
+
+	return (error);
+}
diff --git a/module/os/macos/zfs/zfs_boot.cpp b/module/os/macos/zfs/zfs_boot.cpp
new file mode 100644
index 0000000000..7493792ed0
--- /dev/null
+++ b/module/os/macos/zfs/zfs_boot.cpp
@@ -0,0 +1,2972 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Evan Susarret.  All rights reserved.
+ */
+/*
+ * ZFS boot utils
+ *
+ * While loading the kext, check if early boot and zfs-boot
+ * kernel flag.
+ * Allocate pool_list (and lock).
+ * Register matching notification zfs_boot_probe_disk to check
+ * IOMediaBSDClient devices as they are published (or matched?),
+ * passing pool_list (automatically calls handler for all
+ * existing devices).
+ * Dispatch zfs_boot_import_thread on system_taskq.
+ *
+ * In notification handler zfs_boot_probe_disk:
+ * Check provider IOMedia for:
+ *  1 Leaf node and whole disk.
+ *  2 Leaf node and type ZFS.
+ *  3 Leaf node and type FreeBSD-ZFS.
+ * Check IOMedia meets minimum size or bail.
+ * Allocate char* buffer.
+ * Call vdev_disk_read_rootlabel.
+ * XXX Alternately:
+ * Alloc and prep IOMemoryDescriptor.
+ * Open IOMedia device (read-only).
+ * Try to read vdev label from device.
+ * Close IOMedia device.
+ * Release IOMemoryDescriptor (data is in buffer).
+ * XXX
+ * If label was read, try to generate a config from label.
+ * Check pool name matches zfs-boot or bail.
+ * Check pool status.
+ * Update this vdev's path and set status.
+ * Set other vdevs to missing status.
+ * Check-in config in thread-safe manner:
+ * Take pool_list lock.
+ * If config not found, insert new config, or update existing.
+ * Unlock pool_list.
+ * If found config is complete, wake import thread.
+ *
+ * In vdev_disk_read_rootlabel:
+ * Use vdev_disk_physio to read label.
+ * If label was read, try to unpack.
+ * Return label or failure.
+ *
+ * In vdev_disk_physio:
+ * Open device (read-only) using vnop/VOP.
+ * Try to read vdev label from device.
+ * Close device using vnop/VOP.
+ *
+ * In zfs_boot_import_thread:
+ * Loop checking for work and sleeping on lock between loops.
+ * Take pool_list lock and check for work.
+ * Attempt to import root pool using spa_import_rootpool.
+ * If successful, remove notification handler (waits for
+ * all tasks).
+ * Empty and deallocate pool_list (and lock).
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/IODeviceTreeSupport.h>
+#include <IOKit/IOPlatformExpert.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/IOMemoryDescriptor.h>
+
+extern "C" {
+#include <sys/taskq.h>
+#include <sys/param.h>
+#include <sys/uuid.h>
+#include <sys/nvpair.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_disk.h>
+#include <sys/vdev_disk_os.h>
+#include <sys/spa_impl.h>
+#include <sys/spa_boot.h>
+#include <sys/spa.h>
+
+#include <sys/zfs_context.h>
+#include <sys/mount.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_rlock.h>
+#include <sys/dataset_kstats.h>
+#include <sys/zvol_impl.h>
+
+int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
+
+} /* extern "C" */
+
+#include <sys/zvolIO.h>
+#include <sys/ZFSPool.h>
+#include <sys/ZFSDatasetScheme.h>
+#include <sys/zfs_boot.h>
+
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#define	DSTATIC
+#else
+#define	DSTATIC	static
+#endif
+
+#ifndef verify
+#define	verify(EX) (void)((EX) || \
+	(printf("%s, %s, %d, %s\n", #EX, __FILE__, __LINE__, __func__), 0))
+#endif  /* verify */
+
+#ifndef	dprintf
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#define	dprintf(fmt, ...) do {					\
+	printf("%s " fmt, __func__, __VA_ARGS__);		\
+_NOTE(CONSTCOND) } while (0)
+#else
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif /* if DEBUG or ZFS_DEBUG */
+#endif /* ifndef dprintf */
+
+/* Most of this is only built when configured with --enable-boot */
+
+/* block size is 512 B, count is 512 M blocks */
+#define	ZFS_BOOT_DEV_BSIZE	(UInt64)(1<<9)
+#define	ZFS_BOOT_DEV_BCOUNT	(UInt64)(2<<29)
+#define	ZFS_BOOT_DATASET_NAME_KEY	"zfs_dataset_name"
+#define	ZFS_BOOT_DATASET_UUID_KEY	"zfs_dataset_uuid"
+#define	ZFS_BOOT_DATASET_RDONLY_KEY	"zfs_dataset_rdonly"
+#define	ZFS_MOUNTROOT_RETRIES	50
+#define	ZFS_BOOTLOG_DELAY	100
+
+/*
+ * C functions for boot-time vdev discovery
+ */
+
+/*
+ * Intermediate structures used to gather configuration information.
+ */
+typedef struct config_entry {
+	uint64_t		ce_txg;
+	nvlist_t		*ce_config;
+	struct config_entry	*ce_next;
+} config_entry_t;
+
+typedef struct vdev_entry {
+	uint64_t		ve_guid;
+	config_entry_t		*ve_configs;
+	struct vdev_entry	*ve_next;
+} vdev_entry_t;
+
+typedef struct pool_entry {
+	uint64_t		pe_guid;
+	vdev_entry_t		*pe_vdevs;
+	struct pool_entry	*pe_next;
+	uint64_t		complete;
+} pool_entry_t;
+
+typedef struct name_entry {
+	char			*ne_name;
+	uint64_t		ne_guid;
+	uint64_t		ne_order;
+	uint64_t		ne_num_labels;
+	struct name_entry	*ne_next;
+} name_entry_t;
+
+typedef struct pool_list {
+	pool_entry_t		*pools;
+	name_entry_t		*names;
+	uint64_t		pool_guid;
+	char			*pool_name;
+	OSSet			*new_disks;
+	OSSet			*disks;
+	kmutex_t		lock;
+	kcondvar_t		cv;
+	IOService		*zfs_hl;
+	IONotifier		*notifier;
+	volatile UInt64		terminating;
+} pool_list_t;
+
+#define	ZFS_BOOT_ACTIVE		0x1
+#define	ZFS_BOOT_TERMINATING	0x2
+#define	ZFS_BOOT_INVALID	0x99
+
+#define	ZFS_BOOT_PREALLOC_SET	5
+
+#if 0
+static ZFSBootDevice *bootdev = 0;
+#endif
+static pool_list_t *zfs_boot_pool_list = 0;
+
+DSTATIC char *
+zfs_boot_get_devid(const char *path)
+{
+	/*
+	 * XXX Unavailable interface
+	 *
+	 * If we implement one in spl, it could
+	 * simplify import when device paths
+	 * have changed (e.g. USB pools).
+	 *
+	 * Could use ldi DeviceTree path, or
+	 * IOService path if not in DTPlane.
+	 */
+	return (NULL);
+}
+
+/*
+ * Go through and fix up any path and/or devid information for the given vdev
+ * configuration.
+ *
+ * Copied from libzfs_import.c
+ */
+DSTATIC int
+zfs_boot_fix_paths(nvlist_t *nv, name_entry_t *names)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	uint64_t guid;
+	name_entry_t *ne, *best;
+	char *path, *devid;
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (zfs_boot_fix_paths(child[c], names) != 0)
+				return (-1);
+		return (0);
+	}
+
+	/*
+	 * This is a leaf (file or disk) vdev.  In either case, go through
+	 * the name list and see if we find a matching guid.  If so, replace
+	 * the path and see if we can calculate a new devid.
+	 *
+	 * There may be multiple names associated with a particular guid, in
+	 * which case we have overlapping partitions or multiple paths to the
+	 * same disk.  In this case we prefer to use the path name which
+	 * matches the ZPOOL_CONFIG_PATH.  If no matching entry is found we
+	 * use the lowest order device which corresponds to the first match
+	 * while traversing the ZPOOL_IMPORT_PATH search path.
+	 */
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
+	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+		path = NULL;
+
+	best = NULL;
+	for (ne = names; ne != NULL; ne = ne->ne_next) {
+		if (ne->ne_guid == guid) {
+
+			if (path == NULL) {
+				best = ne;
+				break;
+			}
+
+			if ((strlen(path) == strlen(ne->ne_name)) &&
+			    strncmp(path, ne->ne_name, strlen(path)) == 0) {
+				best = ne;
+				break;
+			}
+
+			if (best == NULL) {
+				best = ne;
+				continue;
+			}
+
+			/* Prefer paths with more vdev labels. */
+			if (ne->ne_num_labels > best->ne_num_labels) {
+				best = ne;
+				continue;
+			}
+
+			/* Prefer paths earlier in the search order. */
+			if (ne->ne_num_labels == best->ne_num_labels &&
+			    ne->ne_order < best->ne_order) {
+				best = ne;
+				continue;
+			}
+		}
+	}
+
+	if (best == NULL)
+		return (0);
+
+	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+		return (-1);
+
+	if ((devid = zfs_boot_get_devid(best->ne_name)) == NULL) {
+		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+	} else {
+		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
+			spa_strfree(devid);
+			return (-1);
+		}
+		spa_strfree(devid);
+	}
+
+	return (0);
+}
+
+/*
+ * Add the given configuration to the list of known devices.
+ *
+ * Copied from libzfs_import.c
+ * diffs: kmem_alloc, kmem_free with size
+ */
+DSTATIC int
+zfs_boot_add_config(pool_list_t *pl, const char *path,
+    int order, int num_labels, nvlist_t *config)
+{
+	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	name_entry_t *ne;
+
+	dprintf("%s %p [%s] %d %d %p\n", __func__,
+	    pl, path, order, num_labels, config);
+
+	/*
+	 * If this is a hot spare not currently in use or level 2 cache
+	 * device, add it to the list of names to translate, but don't do
+	 * anything else.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &state) == 0 &&
+	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+		if ((ne = (name_entry_t *) kmem_alloc(
+		    sizeof (name_entry_t), KM_SLEEP)) == NULL) {
+			return (-1);
+		}
+		bzero(ne, sizeof (name_entry_t));
+
+		if ((ne->ne_name = spa_strdup(path)) == NULL) {
+			kmem_free(ne, sizeof (name_entry_t));
+			return (-1);
+		}
+		ne->ne_guid = vdev_guid;
+		ne->ne_order = order;
+		ne->ne_num_labels = num_labels;
+		ne->ne_next = pl->names;
+		pl->names = ne;
+		return (0);
+	}
+
+	/*
+	 * If we have a valid config but cannot read any of these fields, then
+	 * it means we have a half-initialized label.  In vdev_label_init()
+	 * we write a label with txg == 0 so that we can identify the device
+	 * in case the user refers to the same disk later on.  If we fail to
+	 * create the pool, we'll be left with a label in this state
+	 * which should not be considered part of a valid pool.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &pool_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+	    &vdev_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+	    &top_guid) != 0 ||
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+	    &txg) != 0 || txg == 0) {
+		nvlist_free(config);
+		return (0);
+	}
+
+	/*
+	 * First, see if we know about this pool.  If not, then add it to the
+	 * list of known pools.
+	 */
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		if (pe->pe_guid == pool_guid)
+			break;
+	}
+
+	if (pe == NULL) {
+		if ((pe = (pool_entry_t *) kmem_alloc(
+		    sizeof (pool_entry_t), KM_SLEEP)) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		bzero(pe, sizeof (pool_entry_t));
+		pe->pe_guid = pool_guid;
+		pe->pe_next = pl->pools;
+		pl->pools = pe;
+	}
+
+	/*
+	 * Second, see if we know about this toplevel vdev.  Add it if its
+	 * missing.
+	 */
+	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+		if (ve->ve_guid == top_guid)
+			break;
+	}
+
+	if (ve == NULL) {
+		if ((ve = (vdev_entry_t *) kmem_alloc(
+		    sizeof (vdev_entry_t), KM_SLEEP)) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		bzero(ve, sizeof (vdev_entry_t));
+		ve->ve_guid = top_guid;
+		ve->ve_next = pe->pe_vdevs;
+		pe->pe_vdevs = ve;
+	}
+
+	/*
+	 * Third, see if we have a config with a matching transaction group.  If
+	 * so, then we do nothing.  Otherwise, add it to the list of known
+	 * configs.
+	 */
+	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
+		if (ce->ce_txg == txg)
+			break;
+	}
+
+	if (ce == NULL) {
+		if ((ce = (config_entry_t *) kmem_alloc(
+		    sizeof (config_entry_t), KM_SLEEP)) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+		bzero(ce, sizeof (config_entry_t));
+		ce->ce_txg = txg;
+		ce->ce_config = config;
+		ce->ce_next = ve->ve_configs;
+		ve->ve_configs = ce;
+	} else {
+		nvlist_free(config);
+	}
+
+	/*
+	 * At this point we've successfully added our config to the list of
+	 * known configs.  The last thing to do is add the vdev guid -> path
+	 * mappings so that we can fix up the configuration as necessary before
+	 * doing the import.
+	 */
+	if ((ne = (name_entry_t *) kmem_alloc(
+	    sizeof (name_entry_t), KM_SLEEP)) == NULL) {
+		return (-1);
+	}
+	bzero(ne, sizeof (name_entry_t));
+
+	if ((ne->ne_name = spa_strdup(path)) == NULL) {
+		kmem_free(ne, sizeof (name_entry_t));
+		return (-1);
+	}
+
+	ne->ne_guid = vdev_guid;
+	ne->ne_order = order;
+	ne->ne_num_labels = num_labels;
+	ne->ne_next = pl->names;
+	pl->names = ne;
+
+	return (0);
+}
+
+/*
+ * libzfs_import used the libzfs handle and a zfs
+ * command to issue tryimport in-kernel via ioctl.
+ * This should leave config as-is, and return nvl.
+ * Since zfs_boot is already in-kernel, duplicate
+ * config into nvl, and call spa_tryimport on it.
+ */
+DSTATIC nvlist_t *
+zfs_boot_refresh_config(nvlist_t *config)
+{
+	nvlist_t *nvl = 0;
+
+	/* tryimport does not free config, and returns new nvl or null */
+	nvl = spa_tryimport(config);
+	return (nvl);
+}
+
+/*
+ * Determine if the vdev id is a hole in the namespace.
+ */
+DSTATIC boolean_t
+zfs_boot_vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+{
+	int c;
+
+	for (c = 0; c < holes; c++) {
+		/* Top-level is a hole */
+		if (hole_array[c] == id)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Convert our list of pools into the definitive set of configurations.  We
+ * start by picking the best config for each toplevel vdev.  Once that's done,
+ * we assemble the toplevel vdevs into a full config for the pool.  We make a
+ * pass to fix up any incorrect paths, and then add it to the main list to
+ * return to the user.
+ */
+DSTATIC nvlist_t *
+zfs_boot_get_configs(pool_list_t *pl, boolean_t active_ok)
+{
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
+	nvlist_t **spares, **l2cache;
+	uint_t i, nspares, nl2cache;
+	boolean_t config_seen;
+	uint64_t best_txg;
+	char *name, *hostname = NULL;
+	uint64_t guid;
+	uint_t children = 0;
+	nvlist_t **child = NULL;
+	uint_t holes;
+	uint64_t *hole_array, max_id;
+	uint_t c;
+#if 0
+	boolean_t isactive;
+#endif
+	uint64_t hostid;
+	nvlist_t *nvl;
+	boolean_t valid_top_config = B_FALSE;
+
+	if (nvlist_alloc(&ret, 0, 0) != 0)
+		goto nomem;
+
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+		uint64_t id, max_txg = 0;
+
+		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		config_seen = B_FALSE;
+
+		/*
+		 * Iterate over all toplevel vdevs.  Grab the pool configuration
+		 * from the first one we find, and then go through the rest and
+		 * add them as necessary to the 'vdevs' member of the config.
+		 */
+		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+
+			/*
+			 * Determine the best configuration for this vdev by
+			 * selecting the config with the latest transaction
+			 * group.
+			 */
+			best_txg = 0;
+			for (ce = ve->ve_configs; ce != NULL;
+			    ce = ce->ce_next) {
+
+				if (ce->ce_txg > best_txg) {
+					tmp = ce->ce_config;
+					best_txg = ce->ce_txg;
+				}
+			}
+
+			/*
+			 * We rely on the fact that the max txg for the
+			 * pool will contain the most up-to-date information
+			 * about the valid top-levels in the vdev namespace.
+			 */
+			if (best_txg > max_txg) {
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_VDEV_CHILDREN,
+				    DATA_TYPE_UINT64);
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_HOLE_ARRAY,
+				    DATA_TYPE_UINT64_ARRAY);
+
+				max_txg = best_txg;
+				hole_array = NULL;
+				holes = 0;
+				max_id = 0;
+				valid_top_config = B_FALSE;
+
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
+					verify(nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_VDEV_CHILDREN,
+					    max_id) == 0);
+					valid_top_config = B_TRUE;
+				}
+
+				if (nvlist_lookup_uint64_array(tmp,
+				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
+				    &holes) == 0) {
+					verify(nvlist_add_uint64_array(config,
+					    ZPOOL_CONFIG_HOLE_ARRAY,
+					    hole_array, holes) == 0);
+				}
+			}
+
+			if (!config_seen) {
+				/*
+				 * Copy the relevant pieces of data to the pool
+				 * configuration:
+				 *
+				 *	version
+				 *	pool guid
+				 *	name
+				 *	pool txg (if available)
+				 *	comment (if available)
+				 *	pool state
+				 *	hostid (if available)
+				 *	hostname (if available)
+				 */
+				uint64_t state, version, pool_txg;
+				char *comment = NULL;
+
+				version = fnvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VERSION);
+				fnvlist_add_uint64(config,
+				    ZPOOL_CONFIG_VERSION, version);
+				guid = fnvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_GUID);
+				fnvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_GUID, guid);
+				name = fnvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_POOL_NAME);
+				fnvlist_add_string(config,
+				    ZPOOL_CONFIG_POOL_NAME, name);
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_TXG, &pool_txg) == 0)
+					fnvlist_add_uint64(config,
+					    ZPOOL_CONFIG_POOL_TXG, pool_txg);
+
+				if (nvlist_lookup_string(tmp,
+				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
+					fnvlist_add_string(config,
+					    ZPOOL_CONFIG_COMMENT, comment);
+
+				state = fnvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_POOL_STATE);
+				fnvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, state);
+
+				hostid = 0;
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+					fnvlist_add_uint64(config,
+					    ZPOOL_CONFIG_HOSTID, hostid);
+					hostname = fnvlist_lookup_string(tmp,
+					    ZPOOL_CONFIG_HOSTNAME);
+					fnvlist_add_string(config,
+					    ZPOOL_CONFIG_HOSTNAME, hostname);
+				}
+
+				config_seen = B_TRUE;
+			}
+
+			/*
+			 * Add this top-level vdev to the child array.
+			 */
+			verify(nvlist_lookup_nvlist(tmp,
+			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
+			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+
+			if (id >= children) {
+				nvlist_t **newchild;
+
+				newchild = (nvlist_t **) kmem_alloc((id + 1) *
+				    sizeof (nvlist_t *), KM_SLEEP);
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				kmem_free(child, children *
+				    sizeof (nvlist_t *));
+				child = newchild;
+				children = id + 1;
+			}
+			if (nvlist_dup(nvtop, &child[id], 0) != 0)
+				goto nomem;
+
+		}
+
+		/*
+		 * If we have information about all the top-levels then
+		 * clean up the nvlist which we've constructed. This
+		 * means removing any extraneous devices that are
+		 * beyond the valid range or adding devices to the end
+		 * of our array which appear to be missing.
+		 */
+		if (valid_top_config) {
+			if (max_id < children) {
+				for (c = max_id; c < children; c++)
+					nvlist_free(child[c]);
+				children = max_id;
+			} else if (max_id > children) {
+				nvlist_t **newchild;
+
+				newchild = (nvlist_t **) kmem_alloc((max_id) *
+				    sizeof (nvlist_t *), KM_SLEEP);
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				kmem_free(child, children *
+				    sizeof (nvlist_t *));
+				child = newchild;
+				children = max_id;
+			}
+		}
+
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		/*
+		 * The vdev namespace may contain holes as a result of
+		 * device removal. We must add them back into the vdev
+		 * tree before we process any missing devices.
+		 */
+		if (holes > 0) {
+			ASSERT(valid_top_config);
+
+			for (c = 0; c < children; c++) {
+				nvlist_t *holey;
+
+				if (child[c] != NULL ||
+				    !zfs_boot_vdev_is_hole(hole_array, holes,
+				    c))
+					continue;
+
+				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+
+				/*
+				 * Holes in the namespace are treated as
+				 * "hole" top-level vdevs and have a
+				 * special flag set on them.
+				 */
+				if (nvlist_add_string(holey,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_HOLE) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(holey);
+					goto nomem;
+				}
+				child[c] = holey;
+			}
+		}
+
+		/*
+		 * Look for any missing top-level vdevs.  If this is the case,
+		 * create a faked up 'missing' vdev as a placeholder.  We cannot
+		 * simply compress the child array, because the kernel performs
+		 * certain checks to make sure the vdev IDs match their location
+		 * in the configuration.
+		 */
+		for (c = 0; c < children; c++) {
+			if (child[c] == NULL) {
+				nvlist_t *missing;
+				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+				if (nvlist_add_string(missing,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_MISSING) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(missing);
+					goto nomem;
+				}
+				child[c] = missing;
+			}
+		}
+
+		/*
+		 * Put all of this pool's top-level vdevs into a root vdev.
+		 */
+		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+		    child, children) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		for (c = 0; c < children; c++)
+			nvlist_free(child[c]);
+		kmem_free(child, children * sizeof (nvlist_t *));
+		children = 0;
+		child = NULL;
+
+		/*
+		 * Go through and fix up any paths and/or devids based on our
+		 * known list of vdev GUID -> path mappings.
+		 */
+		if (zfs_boot_fix_paths(nvroot, pl->names) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+
+		/*
+		 * Add the root vdev to this pool's configuration.
+		 */
+		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    nvroot) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
+		nvlist_free(nvroot);
+
+		/*
+		 * zdb uses this path to report on active pools that were
+		 * imported or created using -R.
+		 */
+		if (active_ok)
+			goto add_pool;
+
+#if 0
+/*
+ * For root-pool import, no pools are active yet.
+ * Pool name and guid were looked up from the config and only used here.
+ * (Later we lookup the pool name for a separate test).
+ */
+		/*
+		 * Determine if this pool is currently active, in which case we
+		 * can't actually import it.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+
+		if (zfs_boot_pool_active(name, guid, &isactive) != 0)
+			goto error;
+
+		if (isactive) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
+#endif
+
+		if ((nvl = zfs_boot_refresh_config(config)) == NULL) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
+
+		nvlist_free(config);
+		config = nvl;
+
+		/*
+		 * Go through and update the paths for spares, now that we have
+		 * them.
+		 */
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0) {
+			for (i = 0; i < nspares; i++) {
+				if (zfs_boot_fix_paths(spares[i], pl->names) !=
+				    0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Update the paths for l2cache devices.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0) {
+			for (i = 0; i < nl2cache; i++) {
+				if (zfs_boot_fix_paths(l2cache[i], pl->names) !=
+				    0)
+					goto nomem;
+			}
+		}
+
+		/*
+		 * Restore the original information read from the actual label.
+		 */
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
+		    DATA_TYPE_UINT64);
+		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
+		    DATA_TYPE_STRING);
+		if (hostid != 0) {
+			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
+			    hostid) == 0);
+			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+			    hostname) == 0);
+		}
+
+add_pool:
+		/*
+		 * Add this pool to the list of configs.
+		 */
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		if (nvlist_add_nvlist(ret, name, config) != 0)
+			goto nomem;
+
+		nvlist_free(config);
+		config = NULL;
+	}
+
+	return (ret);
+
+nomem:
+#ifdef DEBUG
+	printf("zfs_boot_get_configs failed to allocate memory\n");
+#endif
+	if (config) nvlist_free(config);
+	if (ret) nvlist_free(ret);
+	for (c = 0; c < children; c++)
+		nvlist_free(child[c]);
+	if (children > 0) {
+		kmem_free(child, children * sizeof (nvlist_t *));
+	}
+	/*
+	 * libzfs_import simply calls free(child), we need to
+	 * pass kmem_free the size of the array. Array is
+	 * allocated above as (children * sizeof nvlist_t*).
+	 */
+
+	return (NULL);
+}
+
+/*
+ * Return the offset of the given label.
+ */
+DSTATIC uint64_t
+zfs_boot_label_offset(uint64_t size, int l)
+{
+	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
+	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
+	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
+/*
+ * Given an IOMedia, read the label information and return an nvlist
+ * describing the configuration, if there is one.  The number of valid
+ * labels found will be returned in num_labels when non-NULL.
+ */
+DSTATIC int
+zfs_boot_read_label(IOService *zfs_hl, IOMedia *media,
+    nvlist_t **config, int *num_labels)
+{
+	IOMemoryDescriptor *buffer = NULL;
+	uint64_t mediaSize;
+	uint64_t nread = 0;
+	vdev_label_t *label;
+	nvlist_t *expected_config = NULL;
+	uint64_t expected_guid = 0, size, labelsize;
+	int l, count = 0;
+	IOReturn ret;
+
+	*config = NULL;
+
+	/* Verify IOMedia pointer and device size */
+	if (!media || (mediaSize = media->getSize()) == 0) {
+		dprintf("%s couldn't get media or size\n", __func__);
+		return (-1);
+	}
+
+	/* Determine vdev label size and aligned vdev size */
+	labelsize = sizeof (vdev_label_t);
+	size = P2ALIGN_TYPED(mediaSize, labelsize, uint64_t);
+
+	/* Allocate a buffer to read labels into */
+	label = (vdev_label_t *) kmem_alloc(labelsize, KM_SLEEP);
+	if (!label) {
+		dprintf("%s couldn't allocate label for read\n", __func__);
+		return (-1);
+	}
+
+	/* Allocate a memory descriptor with the label pointer */
+	buffer = IOMemoryDescriptor::withAddress((void*)label, labelsize,
+	    kIODirectionIn);
+
+	/* Verify buffer was allocated */
+	if (!buffer || (buffer->getLength() != labelsize)) {
+		dprintf("%s couldn't allocate buffer for read\n", __func__);
+		goto error;
+	}
+
+	/* Open the device for reads */
+	if (false == media->IOMedia::open(zfs_hl, 0,
+	    kIOStorageAccessReader)) {
+		dprintf("%s media open failed\n", __func__);
+		goto error;
+	}
+
+	/* Read all four vdev labels */
+	for (l = 0; l < VDEV_LABELS; l++) {
+		uint64_t state, guid, txg;
+
+		/* Zero the label buffer */
+		bzero(label, labelsize);
+
+		/* Prepare the buffer for IO */
+		buffer->prepare(kIODirectionIn);
+
+		/* Read a label from the specified offset */
+		ret = media->IOMedia::read(zfs_hl,
+		    zfs_boot_label_offset(size, l),
+		    buffer, 0, &nread);
+
+		/* Call the buffer completion */
+		buffer->complete();
+
+		/* Skip failed reads, try next label */
+		if (ret != kIOReturnSuccess) {
+			dprintf("%s media->read failed\n", __func__);
+			continue;
+		}
+
+		/* Skip incomplete reads, try next label */
+		if (nread < labelsize) {
+			dprintf("%s nread %llu / %llu\n",
+			    __func__, nread, labelsize);
+			continue;
+		}
+
+		/* Skip invalid labels that can't be unpacked */
+		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+			continue;
+
+		/* Verify GUID */
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
+		    &guid) != 0 || guid == 0) {
+			dprintf("%s nvlist_lookup guid failed %llu\n",
+			    __func__, guid);
+			nvlist_free(*config);
+			continue;
+		}
+
+		/* Verify vdev state */
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
+			dprintf("%s nvlist_lookup state failed %llu\n",
+			    __func__, state);
+			nvlist_free(*config);
+			continue;
+		}
+
+		/* Verify txg number */
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			dprintf("%s nvlist_lookup txg failed %llu\n",
+			    __func__, txg);
+			nvlist_free(*config);
+			continue;
+		}
+
+		/* Increment count for first match, or if guid matches */
+		if (expected_guid) {
+			if (expected_guid == guid)
+				count++;
+
+			nvlist_free(*config);
+		} else {
+			expected_config = *config;
+			expected_guid = guid;
+			count++;
+		}
+	}
+
+	/* Close IOMedia */
+	media->close(zfs_hl);
+
+	/* Copy out the config and number of labels */
+	if (num_labels != NULL)
+		*num_labels = count;
+
+	kmem_free(label, labelsize);
+	buffer->release();
+	*config = expected_config;
+
+	return (0);
+
+
+error:
+	/* Clean up */
+	if (buffer) {
+		buffer->release();
+		buffer = 0;
+	}
+	if (label) {
+		kmem_free(label, labelsize);
+		label = 0;
+	}
+
+	return (-1);
+}
+
+DSTATIC bool
+zfs_boot_probe_media(void* target, void* refCon,
+    IOService* newService, __unused IONotifier* notifier)
+{
+	IOMedia *media = 0;
+	OSObject *isLeaf = 0;
+	OSString *ospath = 0;
+	uint64_t mediaSize = 0;
+	pool_list_t *pools = (pool_list_t *) refCon;
+
+	/* Verify pool list can be cast */
+	if (!pools) {
+		dprintf("%s invalid refCon\n", __func__);
+		return (false);
+	}
+	/* Should never happen */
+	if (!newService) {
+		printf("%s %s\n", "zfs_boot_probe_media",
+		    "called with null newService");
+		return (false);
+	}
+
+	/* Abort early */
+	if (pools->terminating != ZFS_BOOT_ACTIVE) {
+		dprintf("%s terminating 1\n", __func__);
+		return (false);
+	}
+
+	/* Validate pool name */
+	if (!pools->pool_name || strlen(pools->pool_name) == 0) {
+		dprintf("%s no pool name specified\n", __func__);
+		return (false);
+	}
+
+	/* Get the parent IOMedia device */
+	media = OSDynamicCast(IOMedia, newService->getProvider());
+
+	if (!media) {
+		dprintf("%s couldn't be cast as IOMedia\n",
+		    __func__);
+		return (false);
+	}
+
+	isLeaf = media->getProperty(kIOMediaLeafKey);
+	if (!isLeaf) {
+		dprintf("%s skipping non-leaf\n", __func__);
+		goto out;
+	}
+
+	mediaSize = media->getSize();
+	if (mediaSize < SPA_MINDEVSIZE) {
+		dprintf("%s skipping device with size %llu\n",
+		    __func__, mediaSize);
+		goto out;
+	}
+
+	ospath = OSDynamicCast(OSString, media->getProperty(
+	    kIOBSDNameKey, gIOServicePlane,
+	    kIORegistryIterateRecursively));
+	if (!ospath || (ospath->getLength() == 0)) {
+		dprintf("%s skipping device with no bsd disk node\n",
+		    __func__);
+		goto out;
+	}
+
+	/* Abort early */
+	if (pools->terminating != ZFS_BOOT_ACTIVE) {
+		dprintf("%s terminating 2\n", __func__);
+		goto out;
+	}
+
+
+	/* Take pool_list lock */
+	mutex_enter(&pools->lock);
+
+	/* Abort early */
+	if (pools->terminating != ZFS_BOOT_ACTIVE) {
+		dprintf("%s terminating 3\n", __func__);
+		/* Unlock the pool list lock */
+		mutex_exit(&pools->lock);
+		goto out;
+	}
+
+	/* Add this IOMedia to the disk set */
+	pools->disks->setObject(media);
+
+	/* Unlock the pool list lock */
+	mutex_exit(&pools->lock);
+
+	/* Wakeup zfs_boot_import_thread */
+	cv_signal(&pools->cv);
+
+out:
+	media = 0;
+	return (true);
+}
+
+DSTATIC bool
+zfs_boot_probe_disk(pool_list_t *pools, IOMedia *media)
+{
+	OSString *ospath, *uuid;
+	char *path = 0, *pname;
+	const char prefix[] = "/private/var/run/disk/by-id/media-";
+	uint64_t this_guid;
+	int num_labels, err, len = 0;
+	nvlist_t *config;
+	boolean_t matched = B_FALSE;
+
+	dprintf("%s: with %s media\n", __func__,
+	    (media ? "valid" : "missing"));
+	ASSERT3U(media, !=, NULL);
+
+	/* Verify pool list can be cast */
+	if (!pools) {
+		dprintf("%s missing pool_list\n", __func__);
+		return (false);
+	}
+
+	/* Abort early */
+	if (pools->terminating != ZFS_BOOT_ACTIVE) {
+		dprintf("%s terminating 1\n", __func__);
+		return (false);
+	}
+
+	/* Validate pool name */
+	if (!pools->pool_name || strlen(pools->pool_name) == 0) {
+		dprintf("%s no pool name specified\n", __func__);
+		return (false);
+	}
+
+	/* Try to get a UUID from the media */
+	uuid = OSDynamicCast(OSString, media->getProperty(kIOMediaUUIDKey));
+	if (uuid && uuid->getLength() != 0) {
+		/* Allocate room for prefix, UUID, and null terminator */
+		len = (strlen(prefix) + uuid->getLength()) + 1;
+
+		path = (char *) kmem_alloc(len, KM_SLEEP);
+		if (!path) {
+			dprintf("%s couldn't allocate path\n", __func__);
+			return (false);
+		}
+
+		snprintf(path, len, "%s%s", prefix, uuid->getCStringNoCopy());
+		uuid = 0;
+	} else {
+		/* Get the BSD name as a C string */
+		ospath = OSDynamicCast(OSString, media->getProperty(
+		    kIOBSDNameKey, gIOServicePlane,
+		    kIORegistryIterateRecursively));
+		if (!ospath || (ospath->getLength() == 0)) {
+			dprintf("%s skipping device with no bsd disk node\n",
+			    __func__);
+			return (false);
+		}
+
+		/* Allocate room for "/dev/" + "diskNsN" + '\0' */
+		len = (strlen("/dev/") + ospath->getLength() + 1);
+		path = (char *) kmem_alloc(len, KM_SLEEP);
+		if (!path) {
+			dprintf("%s couldn't allocate path\n", __func__);
+			return (false);
+		}
+
+		/* "/dev/" is 5 characters, plus null character */
+		snprintf(path, len, "/dev/%s", ospath->getCStringNoCopy());
+		ospath = 0;
+	}
+	dprintf("%s path [%s]\n", __func__, (path ? path : ""));
+
+	/* Read vdev labels, if any */
+	err = zfs_boot_read_label(pools->zfs_hl, media,
+	    &config, &num_labels);
+
+	/* Skip disks with no labels */
+	if (err != 0 || num_labels == 0 || !config) {
+		goto out;
+	}
+
+	/* Lookup pool name */
+	if (pools->pool_name != NULL &&
+	    (nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &pname) == 0)) {
+		/* Compare with pool_name */
+		if (strlen(pools->pool_name) == strlen(pname) &&
+		    strncmp(pools->pool_name, pname,
+		    strlen(pname)) == 0) {
+			printf("%s matched pool %s\n",
+			    __func__, pname);
+			matched = B_TRUE;
+		}
+	/* Compare with pool_guid */
+	} else if (pools->pool_guid != 0) {
+		matched = nvlist_lookup_uint64(config,
+		    ZPOOL_CONFIG_POOL_GUID,
+		    &this_guid) == 0 &&
+		    pools->pool_guid == this_guid;
+	}
+
+	/* Skip non-matches */
+	if (!matched) {
+		nvlist_free(config);
+		config = NULL;
+		goto out;
+	}
+
+	/*
+	 * Add this config to the pool list.
+	 * Always assigns order 1 since all disks are
+	 * referenced by /private/var/run/disk/by-id/ paths.
+	 */
+	dprintf("%s: add_config %s\n", __func__, path);
+	if (zfs_boot_add_config(pools, path, 1,
+	    num_labels, config) != 0) {
+		printf("%s couldn't add config to pool list\n",
+		    __func__);
+	}
+
+out:
+	/* Clean up */
+	if (path && len > 0) {
+		kmem_free(path, len);
+	}
+	return (true);
+}
+
+DSTATIC void
+zfs_boot_free()
+{
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+	pool_list_t *pools = zfs_boot_pool_list;
+
+	/* Verify pool list can be cast */
+	if (!pools) {
+		dprintf("%s: no pool_list to clear\n", __func__);
+		return;
+	}
+
+	/* Clear global ptr */
+	zfs_boot_pool_list = 0;
+
+	pools->terminating = ZFS_BOOT_TERMINATING;
+
+	/* Remove IONotifier (waits for tasks to complete) */
+	if (pools->notifier) {
+		pools->notifier->remove();
+		pools->notifier = 0;
+	}
+
+	/* Release the lock */
+	mutex_destroy(&pools->lock);
+
+	/* Release the disk set */
+	if (pools->disks) {
+		pools->disks->flushCollection();
+		pools->disks->release();
+		pools->disks = 0;
+	}
+
+	/* Clear the zfs IOService handle */
+	if (pools->zfs_hl) {
+		pools->zfs_hl = 0;
+	}
+
+	/* Free the pool_name string */
+	if (pools->pool_name) {
+		kmem_free(pools->pool_name, strlen(pools->pool_name) + 1);
+		pools->pool_name = 0;
+	}
+
+	/* Clear the pool config list */
+	for (pe = pools->pools; pe != NULL; pe = penext) {
+		/* Clear the vdev list */
+		penext = pe->pe_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+			/* Clear the vdev config list */
+			venext = ve->ve_next;
+			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+				cenext = ce->ce_next;
+				if (ce->ce_config)
+					nvlist_free(ce->ce_config);
+				kmem_free(ce, sizeof (config_entry_t));
+			}
+			kmem_free(ve, sizeof (vdev_entry_t));
+		}
+		kmem_free(pe, sizeof (pool_entry_t));
+	}
+	pools->pools = 0;
+
+	/* Clear the vdev name list */
+	for (ne = pools->names; ne != NULL; ne = nenext) {
+		nenext = ne->ne_next;
+		if (ne->ne_name)
+			spa_strfree(ne->ne_name);
+		kmem_free(ne, sizeof (name_entry_t));
+	}
+	pools->names = 0;
+
+	/* Finally, free the pool list struct */
+	kmem_free(pools, sizeof (pool_list_t));
+	pools = 0;
+}
+
+void
+zfs_boot_fini()
+{
+	pool_list_t *pools = zfs_boot_pool_list;
+
+	if (!pools) {
+		printf("%s no pool_list to clear\n", __func__);
+		return;
+	}
+
+	/* Set terminating flag */
+	if (false == OSCompareAndSwap64(ZFS_BOOT_ACTIVE,
+	    ZFS_BOOT_TERMINATING, &(pools->terminating))) {
+		printf("%s already terminating? %llu\n",
+		    __func__, pools->terminating);
+	}
+
+	/* Wakeup zfs_boot_import_thread */
+	cv_signal(&pools->cv);
+
+	/* Clean up */
+	pools = 0;
+}
+
+#define	kBootUUIDKey		"boot-uuid"
+#define	kBootUUIDMediaKey	"boot-uuid-media"
+
+DSTATIC int
+zfs_boot_publish_bootfs(IOService *zfs_hl, pool_list_t *pools)
+{
+	ZFSDataset *dataset = NULL;
+	IOMedia *media;
+	IOService *resourceService = NULL;
+	OSDictionary *properties = NULL;
+	spa_t *spa = NULL;
+	char *zfs_bootfs = NULL;
+	uint64_t bootfs = 0ULL;
+	int error, len = ZFS_MAX_DATASET_NAME_LEN;
+
+	dprintf("%s\n", __func__);
+	if (!zfs_hl || !pools) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+#if 0
+	ZFSPool *pool_proxy = NULL;
+	if (bootdev) {
+		dprintf("%s bootdev already set\n", __func__);
+		return (EBUSY);
+	}
+#endif
+
+	zfs_bootfs = (char *)kmem_alloc(len, KM_SLEEP);
+	if (!zfs_bootfs) {
+		printf("%s string alloc failed\n", __func__);
+		return (ENOMEM);
+	}
+	zfs_bootfs[0] = '\0';
+
+	mutex_enter(&spa_namespace_lock);
+	spa = spa_next(NULL);
+	if (spa) {
+		bootfs = spa_bootfs(spa);
+	}
+	if (bootfs == 0) {
+		mutex_exit(&spa_namespace_lock);
+		dprintf("%s no bootfs, nothing to do\n", __func__);
+		kmem_free(zfs_bootfs, len);
+		return (0);
+	}
+
+#if 0
+	/* Get pool proxy */
+	if (!spa->spa_iokit_proxy ||
+	    (pool_proxy = spa->spa_iokit_proxy->proxy) == NULL) {
+		mutex_exit(&spa_namespace_lock);
+		dprintf("%s no spa_pool_proxy\n", __func__);
+		kmem_free(zfs_bootfs, len);
+		return (0);
+	}
+#endif
+
+	error = dsl_dsobj_to_dsname(spa_name(spa),
+	    spa_bootfs(spa), zfs_bootfs);
+	mutex_exit(&spa_namespace_lock);
+
+	if (error != 0) {
+		dprintf("%s bootfs to name failed\n", __func__);
+		kmem_free(zfs_bootfs, len);
+		return (ENODEV);
+	}
+
+	printf("%s: publishing bootfs [%s]\n", __func__, zfs_bootfs);
+
+	/* Create prop dict for the proxy, with 6 or more keys */
+	if ((properties = OSDictionary::withCapacity(6)) == NULL) {
+		dprintf("%s prop dict allocation failed\n", __func__);
+		kmem_free(zfs_bootfs, len);
+		return (ENOMEM);
+	}
+
+	/* Set Content Hint and Content */
+	do {
+		const OSSymbol *partUUID;
+
+		/* ZFS (BF01) partition type */
+		if ((partUUID = OSSymbol::withCString(
+		    "6A898CC3-1DD2-11B2-99A6-080020736631")) == NULL) {
+			dprintf("%s couldn't make partUUID\n", __func__);
+			break;
+			// kmem_free(zfs_bootfs, len);
+			// return (ENOMEM);
+		}
+
+		/* Assign ZFS partiton UUID to both */
+		if (properties->setObject(kIOMediaContentKey,
+		    partUUID) == false ||
+		    properties->setObject(kIOMediaContentHintKey,
+		    partUUID) == false) {
+			dprintf("%s content hint failed\n", __func__);
+			// kmem_free(zfs_bootfs, len);
+			// return (ENOMEM);
+		}
+		partUUID->release();
+	} while (0);
+
+	/* XXX Set dataset name, rdonly, and UUID */
+	do {
+		OSString *nameStr;
+		OSString *uuidStr;
+		char uuid_cstr[UUID_PRINTABLE_STRING_LENGTH];
+		uuid_t uuid;
+
+		bzero(uuid, sizeof (uuid_t));
+		bzero(uuid_cstr, UUID_PRINTABLE_STRING_LENGTH);
+
+		zfs_vfs_uuid_gen(zfs_bootfs, uuid);
+		zfs_vfs_uuid_unparse(uuid, uuid_cstr);
+
+		nameStr = OSString::withCString(zfs_bootfs);
+		uuidStr = OSString::withCString(uuid_cstr);
+
+		if (properties->setObject(ZFS_BOOT_DATASET_NAME_KEY,
+		    nameStr) == false ||
+		    properties->setObject(ZFS_BOOT_DATASET_UUID_KEY,
+		    uuidStr) == false ||
+		    properties->setObject(ZFS_BOOT_DATASET_RDONLY_KEY,
+		    kOSBooleanFalse) == false) {
+			dprintf("ZFSBootDevice::%s couldn't setup"
+			    "property dict\n", __func__);
+			nameStr->release();
+			uuidStr->release();
+			kmem_free(zfs_bootfs, len);
+			return (ENOMEM);
+		}
+		nameStr->release();
+		uuidStr->release();
+	} while (0);
+
+	/* Create proxy device */
+	error = zfs_osx_proxy_create(zfs_bootfs);
+	if (error == 0) {
+		dataset = zfs_osx_proxy_get(zfs_bootfs);
+	}
+	/* Done with this string */
+	kmem_free(zfs_bootfs, len);
+	zfs_bootfs = 0;
+
+	if (!dataset) {
+		printf("%s: couldn't create proxy device\n",
+		    __func__);
+		return (ENXIO);
+	}
+
+	media = OSDynamicCast(IOMedia, dataset);
+	if (!media) {
+		printf("%s: couldn't cast proxy media\n",
+		    __func__);
+		dataset->release();
+		return (ENXIO);
+	}
+
+#if 0
+	bootdev = new ZFSBootDevice;
+
+	if (!bootdev) {
+		printf("%s: couldn't create boot device\n", __func__);
+		return (ENOMEM);
+	}
+
+	if (bootdev->init(properties) == false) {
+		printf("%s init failed\n", __func__);
+		properties->release();
+		bootdev->release();
+		bootdev = 0;
+		return (ENXIO);
+	}
+	properties->release();
+	properties = 0;
+
+	if (bootdev->attach(pool_proxy) == false) {
+		printf("%s attach failed\n", __func__);
+		bootdev->release();
+		bootdev = 0;
+		return (ENXIO);
+	}
+
+	/* Technically should start but this doesn't do much */
+	if (bootdev->start(pool_proxy) == false) {
+		printf("%s start failed\n", __func__);
+		bootdev->detach(pool_proxy);
+		bootdev->release();
+		bootdev = 0;
+		return (ENXIO);
+	}
+
+	/* Get matching started */
+	bootdev->registerService(kIOServiceAsynchronous);
+	// bootdev->registerService(kIOServiceSynchronous);
+
+	do {
+		if (bootdev->getClient() != 0) {
+			media = OSDynamicCast(IOMedia,
+			    bootdev->getClient()->getClient());
+			if (media) {
+				media->retain();
+				break;
+			}
+		}
+
+		/* Sleep until media is available */
+		/*
+		 * XXX Should use waitForServiceMatching or IONotifier
+		 */
+		IOSleep(200);
+	} while (!media);
+
+	if (!media) {
+		/* XXX currently unreachable */
+		printf("%s couldn't get bootdev media\n", __func__);
+		return (ENXIO);
+	}
+#endif
+
+	resourceService = IOService::getResourceService();
+	if (!resourceService) {
+		dprintf("%s missing resource service\n", __func__);
+		/* Handle error */
+		media->release();
+		return (ENXIO);
+	}
+
+#if 1
+	/* XXX publish an IOMedia as the BootUUIDMedia resource */
+	/* uses same method as AppleFileSystemDriver */
+
+	/* Set IOMedia UUID */
+	/* XXX skip (moved get uuid below) */
+	// media->setProperty(kIOMediaUUIDKey, uuid);
+	/* Publish this IOMedia as the boot-uuid-media */
+	IOService::publishResource(kBootUUIDMediaKey, media);
+
+	/* Drop retain from earlier */
+	media->release();
+	/* Remove boot-uuid key so AppleFileSystem stops matching */
+	resourceService->removeProperty(kBootUUIDKey);
+#else
+	OSString *uuid = 0;
+	/* Get the current boot-uuid string */
+	uuid = OSDynamicCast(OSString,
+	    resourceService->getProperty(kBootUUIDKey, gIOServicePlane));
+	if (!uuid) {
+		dprintf("%s missing boot-uuid IOResource\n", __func__);
+		/* Handle error */
+		return (ENXIO);
+	}
+	printf("%s: got boot-uuid %s\n", __func__, uuid->getCStringNoCopy());
+
+	/* XXX Or use below and let AppleFileSystemDriver match it */
+	/* Leaves the Apple_Boot content hint (at least for now) */
+	media->setProperty(kIOMediaContentHintKey, "Apple_Boot");
+	media->setProperty(kIOMediaUUIDKey, uuid);
+	/* Register for notifications (not matching) */
+	media->registerService(kIOServiceAsynchronous);
+	/* Drop retain from earlier */
+	media->release();
+#endif
+
+	printf("%s done\n", __func__);
+	return (0);
+}
+
+DSTATIC void
+zfs_boot_import_thread(void *arg)
+{
+	nvlist_t *configs, *nv, *newnv;
+	nvpair_t *elem;
+	IOService *zfs_hl = 0;
+	OSSet *disks, *new_set = 0;
+	OSCollectionIterator *iter = 0;
+	OSObject *next;
+	IOMedia *media;
+	pool_list_t *pools = (pool_list_t *)arg;
+	uint64_t pool_state;
+	boolean_t pool_imported = B_FALSE;
+	int error = EINVAL;
+
+	/* Verify pool list coult be cast */
+	ASSERT3U(pools, !=, 0);
+	if (!pools) {
+		printf("%s %p %s\n", "zfs_boot_import_thread",
+		    arg, "couldn't be cast as pool_list_t*");
+		return;
+	}
+
+	/* Abort early */
+	if (pools->terminating != ZFS_BOOT_ACTIVE) {
+		dprintf("%s terminating 1\n", __func__);
+		goto out_unlocked;
+	}
+
+	new_set = OSSet::withCapacity(1);
+	/* To swap with pools->disks while locked */
+	if (!new_set) {
+		dprintf("%s couldn't allocate new_set\n", __func__);
+		goto out_unlocked;
+	}
+
+	/* Take pool list lock */
+	mutex_enter(&pools->lock);
+
+	zfs_hl = pools->zfs_hl;
+
+	/* Check for work, then sleep on the lock */
+	do {
+		/* Abort early */
+		if (pools->terminating != ZFS_BOOT_ACTIVE) {
+			dprintf("%s terminating 2\n", __func__);
+			goto out_locked;
+		}
+
+		/* Check for work */
+		if (pools->disks->getCount() == 0) {
+			dprintf("%s no disks to check\n", __func__);
+			goto next_locked;
+		}
+
+		/* Swap full set with a new empty one */
+		ASSERT3U(new_set, !=, 0);
+		disks = pools->disks;
+		pools->disks = new_set;
+		new_set = 0;
+
+		/* Release pool list lock */
+		mutex_exit(&pools->lock);
+
+		/* Create an iterator over the objects in the set */
+		iter = OSCollectionIterator::withCollection(disks);
+
+		/* couldn't be initialized */
+		if (!iter) {
+			dprintf("%s %s %d %s\n", "zfs_boot_import_thread",
+			    "couldn't get iterator from collection",
+			    disks->getCount(), "disks skipped");
+
+			/* Merge disks back into pools->disks */
+			mutex_enter(&pools->lock);
+			pools->disks->merge(disks);
+			mutex_exit(&pools->lock);
+
+			/* Swap 'disks' back to new_set */
+			disks->flushCollection();
+			new_set = disks;
+			disks = 0;
+
+			continue;
+		}
+
+		/* Iterate over all disks */
+		while ((next = iter->getNextObject()) != NULL) {
+			/* Cast each IOMedia object */
+			media = OSDynamicCast(IOMedia, next);
+
+			if (!iter->isValid()) {
+				/* Oh gosh, need to start over */
+				iter->reset();
+				continue;
+			}
+
+			if (!media) {
+				dprintf("%s couldn't cast IOMedia\n",
+				    __func__);
+				continue;
+			}
+
+			/* Check this IOMedia device for a vdev label */
+			if (!zfs_boot_probe_disk(pools, media)) {
+				dprintf("%s couldn't probe disk\n",
+				    __func__);
+				continue;
+			}
+		}
+
+		/* Clean up */
+		media = 0;
+		iter->release();
+		iter = 0;
+
+		/* Swap 'disks' back to new_set */
+		disks->flushCollection();
+		new_set = disks;
+		disks = 0;
+
+		/* Abort early */
+		if (pools->terminating != ZFS_BOOT_ACTIVE) {
+			dprintf("%s terminating 3\n", __func__);
+			goto out_unlocked;
+		}
+
+		mutex_enter(&pools->lock);
+		/* Check for work */
+		if (pools->disks->getCount() != 0) {
+			dprintf("%s more disks available, looping\n", __func__);
+			continue;
+		}
+		/* Release pool list lock */
+		mutex_exit(&pools->lock);
+
+		/* Generate a list of pool configs to import */
+		configs = zfs_boot_get_configs(pools, B_TRUE);
+
+		/* Abort early */
+		if (pools->terminating != ZFS_BOOT_ACTIVE) {
+			dprintf("%s terminating 4\n", __func__);
+			goto out_unlocked;
+		}
+
+		/* Iterate over the nvlists (stored as nvpairs in nvlist) */
+		elem = NULL;
+		while ((elem = nvlist_next_nvpair(configs,
+		    elem)) != NULL) {
+			/* Cast the nvpair back to nvlist */
+			nv = NULL;
+			verify(nvpair_value_nvlist(elem, &nv) == 0);
+
+			/* Check vdev state */
+			verify(nvlist_lookup_uint64(nv,
+			    ZPOOL_CONFIG_POOL_STATE,
+			    &pool_state) == 0);
+			if (pool_state == POOL_STATE_DESTROYED) {
+				dprintf("%s skipping destroyed pool\n",
+				    __func__);
+				continue;
+			}
+
+			/* Abort early */
+			if (pools->terminating != ZFS_BOOT_ACTIVE) {
+				dprintf("%s terminating 5\n", __func__);
+				goto out_unlocked;
+			}
+
+			/* Try import */
+			newnv = spa_tryimport(nv);
+			nvlist_free(nv);
+			nv = 0;
+			if (newnv) {
+				dprintf("%s newnv: %p\n", __func__, newnv);
+
+				/* Stop probing disks */
+				if (pools->notifier)
+					pools->notifier->disable();
+
+				/* Do import */
+				pool_imported = (spa_import(pools->pool_name,
+				    newnv, 0, 0) == 0);
+				nvlist_free(newnv);
+				newnv = 0;
+				// pool_imported = spa_import_rootpool(nv);
+			} else {
+				dprintf("%s no newnv returned\n", __func__);
+			}
+
+			dprintf("%s spa_import returned %d\n", __func__,
+			    pool_imported);
+
+			if (pool_imported) {
+				/* Get bootfs and publish IOMedia */
+				error = zfs_boot_publish_bootfs(zfs_hl, pools);
+				if (error != 0) {
+					dprintf("%s publish bootfs error %d\n",
+					    __func__, error);
+				}
+
+				goto out_unlocked;
+			} else {
+				/* Resume notifications */
+				if (pools->notifier)
+					pools->notifier->enable(true);
+			}
+		}
+
+		/* Retake pool list lock */
+		mutex_enter(&pools->lock);
+
+next_locked:
+		/* Check for work */
+		if (pools->disks->getCount() != 0) {
+			continue;
+		}
+
+		/* Abort early */
+		if (pools->terminating != ZFS_BOOT_ACTIVE) {
+			dprintf("%s terminating 6\n", __func__);
+			goto out_locked;
+		}
+
+		dprintf("%s sleeping on lock\n", __func__);
+		/* Sleep on lock, thread is resumed with lock held */
+		cv_timedwait_sig(&pools->cv, &pools->lock,
+		    ddi_get_lbolt() + hz);
+
+	/* Loop forever */
+	} while (true);
+
+out_locked:
+	/* Unlock pool list lock */
+	mutex_exit(&pools->lock);
+
+out_unlocked:
+	/* Cleanup new_set */
+	if (new_set) {
+		new_set->flushCollection();
+		new_set->release();
+		new_set = 0;
+	}
+
+	/* Teardown pool list, lock, etc */
+	zfs_boot_free();
+
+	return;	/* taskq_dispatch */
+#if 0
+	thread_exit();	/* thread_create */
+#endif
+}
+
+DSTATIC bool
+zfs_boot_check_mountroot(char **pool_name, uint64_t *pool_guid)
+{
+	/*
+	 * Check if the kext is loading during early boot
+	 * and/or check if root is mounted (IORegistry?)
+	 * Use PE Boot Args to determine the root pool name.
+	 */
+	char *zfs_boot;
+	char *split;
+	uint64_t len;
+	bool result = false;
+	uint64_t uptime =   0;
+
+
+	if (!pool_name || !pool_guid) {
+		dprintf("%s %s\n", __func__,
+		    "invalid pool_name or pool_guid ptr");
+		return (false);
+	}
+
+	/* XXX Ugly hack to determine if this is early boot */
+	/*
+	 * Could just check if boot-uuid (or rd= or rootdev=)
+	 * are set, and abort otherwise
+	 * IOResource "boot-uuid" only published before root is
+	 * mounted, or "boot-uuid-media" once discovered
+	 */
+	clock_get_uptime(&uptime); /* uptime since boot in nanoseconds */
+	dprintf("%s uptime: %llu\n", __func__, uptime);
+
+	/* 3 billion nanoseconds ~= 3 seconds */
+	// if (uptime >= 3LLU<<30) {
+	/* 60 billion nanoseconds ~= 60 seconds */
+	if (uptime >= 7LLU<<33) {
+		dprintf("%s %s\n", __func__, "Already booted");
+		/*
+		 * Start the getrootdir() from working, the vfs_start() call
+		 * isn't called until first mount, which is too late for
+		 * spa_async_dispatch().
+		 */
+		return (false);
+	} else {
+		dprintf("%s %s\n", __func__, "Boot time");
+	}
+
+	zfs_boot = (char *) kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (!zfs_boot) {
+		dprintf("%s couldn't allocate zfs_boot\n", __func__);
+		return (false);
+	}
+
+	result = PE_parse_boot_argn("zfs_boot", zfs_boot,
+	    ZFS_MAX_DATASET_NAME_LEN);
+	// dprintf( "Raw zfs_boot: [%llu] {%s}\n",
+	//    (uint64_t)strlen(zfs_boot), zfs_boot);
+
+	result = (result && (zfs_boot != 0) && strlen(zfs_boot) > 0);
+
+	if (!result) {
+		result = PE_parse_boot_argn("rd", zfs_boot,
+		    MAXPATHLEN);
+		result = (result && (zfs_boot != 0) &&
+		    strlen(zfs_boot) > 0 &&
+		    strncmp(zfs_boot, "zfs:", 4));
+		// dprintf("Raw rd: [%llu] {%s}\n",
+		//    (uint64_t)strlen(zfs_boot), zfs_boot );
+	}
+	if (!result) {
+		result = PE_parse_boot_argn("rootdev", zfs_boot,
+		    MAXPATHLEN);
+		result = (result && (zfs_boot != 0) &&
+		    strlen(zfs_boot) > 0 &&
+		    strncmp(zfs_boot, "zfs:", 4));
+		// dprintf("Raw rootdev: [%llu] {%s}\n",
+		//    (uint64_t)strlen(zfs_boot), zfs_boot );
+	}
+
+	/*
+	 * XXX To Do - parse zpool_guid boot arg
+	 */
+	*pool_guid = 0;
+
+	if (result) {
+		/* Check for first slash in zfs_boot */
+		split = strchr(zfs_boot, '/');
+		if (split) {
+			/* copy pool name up to first slash */
+			len = (split - zfs_boot);
+		} else {
+			/* or copy whole string */
+			len = strlen(zfs_boot);
+		}
+
+		*pool_name = (char *) kmem_alloc(len+1, KM_SLEEP);
+		snprintf(*pool_name, len+1, "%s", zfs_boot);
+
+		dprintf("Got zfs_boot: [%llu] {%s}->{%s}\n",
+		    *pool_guid, zfs_boot, *pool_name);
+	} else {
+		dprintf("%s\n", "No zfs_boot\n");
+		pool_name = 0;
+	}
+
+	kmem_free(zfs_boot, ZFS_MAX_DATASET_NAME_LEN);
+	zfs_boot = 0;
+	return (result);
+}
+
+bool
+zfs_boot_init(IOService *zfs_hl)
+{
+	IONotifier *notifier = 0;
+	pool_list_t *pools = 0;
+	char *pool_name = 0;
+	uint64_t pool_guid = 0;
+
+	zfs_boot_pool_list = 0;
+
+	if (!zfs_hl) {
+		dprintf("%s: No zfs_hl provided\n", __func__);
+		return (false);
+	}
+
+	if (!zfs_boot_check_mountroot(&pool_name, &pool_guid) ||
+	    (!pool_name && pool_guid == 0)) {
+		/*
+		 * kext is not being loaded during early-boot,
+		 * or no pool is specified for import.
+		 */
+		dprintf("%s: check failed\n", __func__);
+		return (true);
+	}
+
+	pools = (pool_list_t *) kmem_alloc(sizeof (pool_list_t),
+	    KM_SLEEP);
+	if (!pools) {
+		goto error;
+	}
+	bzero(pools, sizeof (pool_list_t));
+
+	if ((pools->disks = OSSet::withCapacity(
+	    ZFS_BOOT_PREALLOC_SET)) == NULL) {
+		/* Fail if memory couldn't be allocated */
+		goto error;
+	}
+	pools->terminating = ZFS_BOOT_ACTIVE;
+	pools->pools = 0;
+	pools->names = 0;
+	pools->pool_guid = pool_guid;
+	pools->pool_name = pool_name;
+	pools->zfs_hl = zfs_hl;
+
+	notifier = IOService::addMatchingNotification(
+	    gIOFirstPublishNotification, IOService::serviceMatching(
+	    "IOMediaBSDClient"), zfs_boot_probe_media,
+	    zfs_hl, pools, 0);
+
+	if (!notifier) {
+		/* Fail if memory couldn't be allocated */
+		goto error;
+	}
+	pools->notifier = notifier;
+
+	mutex_init(&pools->lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&pools->cv, NULL, CV_DEFAULT, NULL);
+
+	/* Finally, start the import thread */
+	taskq_dispatch(system_taskq, zfs_boot_import_thread,
+	    (void*)pools, TQ_SLEEP);
+#if 0
+/* Alternate method of scheduling the import thread */
+	(void) thread_create(NULL, 0, zfs_boot_import_thread,
+	    pools, 0, &p0,
+	    TS_RUN, minclsyspri);
+#endif
+
+	zfs_boot_pool_list = pools;
+
+	return (true);
+
+error:
+	if (pools) {
+		if (pools->disks) {
+			pools->disks->flushCollection();
+			pools->disks->release();
+			pools->disks = 0;
+		}
+		kmem_free(pools, sizeof (pool_list_t));
+		pools = 0;
+	}
+	return (false);
+}
+
+/* Include these functions in all builds */
+
+/*
+ * zfs_boot_update_bootinfo_vdev_leaf
+ * Inputs: spa: valid pool spa pointer. vd: valid vdev pointer.
+ * Return: 0 on success, positive integer errno on failure.
+ * Callers: zfs_boot_update_bootinfo_vdev
+ *
+ * called by bootinfo_vdev with each leaf vdev.
+ */
+DSTATIC int
+zfs_boot_update_bootinfo_vdev_leaf(OSArray *array, vdev_t *vd)
+{
+	OSDictionary *dict;
+	OSString *dev_str;
+	OSNumber *dev_size;
+	vdev_disk_t *dvd;
+	struct io_bootinfo *info;
+	int error;
+
+	/* Validate inputs */
+	if (!array || !vd) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Should be called with leaf vdev */
+	if (!vd->vdev_ops->vdev_op_leaf) {
+		dprintf("%s not a leaf vdev\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Skip hole vdevs */
+	if (vd->vdev_ishole) {
+		dprintf("%s skipping hole in namespace\n", __func__);
+		return (0);
+	}
+
+	/* No info available if missing */
+	if (strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_MISSING) == 0) {
+		dprintf("%s skipping missing vdev\n", __func__);
+		return (0);
+	}
+
+	/* Must be a disk, not a file */
+	if (strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
+		dprintf("%s skipping non-disk vdev\n", __func__);
+		return (0);
+	}
+
+	/* Skip obviously non-bootable vdevs */
+	if (vd->vdev_islog ||
+	    vd->vdev_isl2cache || vd->vdev_isspare) {
+		dprintf("%s skipping non-bootable\n", __func__);
+		return (0);
+	}
+
+	/* Get vdev type-specific data */
+	dvd = (vdev_disk_t *)vd->vdev_tsd;
+	if (!dvd || !dvd->vd_lh) {
+		dprintf("%s missing dvd or ldi handle\n", __func__);
+		return (0);
+	}
+
+	/* Allocate an ldi io_bootinfo struct */
+	info = (struct io_bootinfo *)kmem_alloc(
+	    sizeof (struct io_bootinfo), KM_SLEEP);
+	if (!info) {
+		dprintf("%s info alloc failed\n", __func__);
+		return (ENOMEM);
+	}
+	bzero(info, sizeof (struct io_bootinfo));
+
+	/* Ask the vdev handle to fill in the info */
+	error = ldi_ioctl(dvd->vd_lh, DKIOCGETBOOTINFO,
+	    (intptr_t)info, 0, 0, 0);
+	if (error != 0) {
+		dprintf("%s ioctl error %d\n", __func__, error);
+		kmem_free(info, sizeof (struct io_bootinfo));
+		return (EIO);
+	}
+
+	/* Allocate dictionary to hold the keys */
+	if ((dict = OSDictionary::withCapacity(2)) == NULL) {
+		dprintf("%s dictionary alloc failed\n", __func__);
+		kmem_free(info, sizeof (struct io_bootinfo));
+		return (ENOMEM);
+	}
+
+	/* Keys are path (string) and size (number) */
+	dev_str = OSString::withCString(info->dev_path);
+	dev_size = OSNumber::withNumber(info->dev_size,
+	    (8 * sizeof (info->dev_size)));
+	kmem_free(info, sizeof (struct io_bootinfo));
+	info = 0;
+
+	/* Add keys to dictionary or bail */
+	if (!dev_str || !dev_size ||
+	    dict->setObject(kIOBootDevicePathKey,
+	    dev_str) == false ||
+	    dict->setObject(kIOBootDeviceSizeKey,
+	    dev_size) == false) {
+		dprintf("%s dictionary setup failed\n", __func__);
+		if (dev_str) dev_str->release();
+		if (dev_size) dev_size->release();
+		dict->release();
+		dict = 0;
+		return (ENOMEM);
+	}
+	dev_str->release();
+	dev_str = 0;
+	dev_size->release();
+	dev_size = 0;
+
+	/* Add dict to array */
+	if (array->setObject(dict) == false) {
+		dprintf("%s couldn't set bootinfo\n", __func__);
+		dict->release();
+		dict = 0;
+		return (ENOMEM);
+	}
+	dict->release();
+	dict = 0;
+
+	return (0);
+}
+
+/*
+ * zfs_boot_update_bootinfo_vdev
+ * Inputs: spa: valid pool spa pointer. vd: valid vdev pointer.
+ * Return: 0 on success, positive integer errno on failure.
+ * Callers: zfs_boot_update_bootinfo
+ *
+ * called by bootinfo with root vdev, and recursively calls
+ * itself while iterating over children (vdevs only have a
+ * few levels of nesting at most).
+ */
+DSTATIC int
+zfs_boot_update_bootinfo_vdev(OSArray *array, vdev_t *vd)
+{
+	int c, error;
+
+	/* Validate inputs */
+	if (!array || !vd) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Skip obviously non-bootable vdevs */
+	if (vd->vdev_islog ||
+	    vd->vdev_isl2cache || vd->vdev_isspare) {
+		dprintf("%s skipping non-bootable\n", __func__);
+		return (0);
+	}
+
+	/* Process leaf vdevs */
+	if (vd->vdev_ops->vdev_op_leaf) {
+		error = zfs_boot_update_bootinfo_vdev_leaf(array, vd);
+		if (error)
+			dprintf("%s bootinfo_vdev_leaf error %d\n",
+			    __func__, error);
+		return (error);
+	}
+
+	/* Iterate over child vdevs */
+	for (c = 0; c < vd->vdev_children; c++) {
+		if (vd->vdev_child[c] == NULL) {
+			dprintf("%s hole in vdev namespace\n", __func__);
+			continue;
+		}
+
+		/* Recursion */
+		error = zfs_boot_update_bootinfo_vdev(array,
+		    vd->vdev_child[c]);
+		if (error != 0) {
+			dprintf("%s bootinfo_vdev_leaf error %d\n",
+			    __func__, error);
+			return (error);
+		}
+	}
+
+	return (0);
+}
+
+extern "C" {
+
+/*
+ * zfs_boot_update_bootinfo
+ * Inputs: spa: valid pool spa pointer.
+ * Return: 0 on success, positive integer errno on failure.
+ * Callers: spa_open_common, spa_vdev_add, spa_vdev_remove,
+ * spa_vdev_attach, spa_vdev_detach.
+ *
+ * Called from spa.c on changes to the vdev layout. This
+ * information is assigned to the pool proxy so all zvols
+ * and datasets will retrieve the property through IOKit
+ * since it is retrieved via recursion.
+ * (see bless-105/Misc/BLCreateBooterInformationDictionary.c).
+ * If IOBootDevice property is needed for each dataset and
+ * zvol, we can revisit this and assign/update on all of
+ * these (already implemented a prototype that worked fine).
+ *
+ * Note: bootinfo is only collected for data vdevs.
+ * XXX We only want boot helpers there, unless there is a
+ * compelling argument for log, cache, or spares having
+ * boot helpers.
+ */
+int
+zfs_boot_update_bootinfo(spa_t *spa)
+{
+	ZFSPool *pool_proxy;
+	OSArray *array;
+	int error;
+
+	if (!spa) {
+		dprintf("%s missing spa\n", __func__);
+		return (EINVAL);
+	}
+
+	/* XXX Could count vdevs first? */
+	if ((array = OSArray::withCapacity(1)) == NULL) {
+		dprintf("%s allocation failed\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Grab necessary locks */
+	mutex_enter(&spa_namespace_lock);
+	spa_open_ref(spa, FTAG);
+
+	/* Get pool proxy */
+	if (!spa->spa_iokit_proxy ||
+	    (pool_proxy = spa->spa_iokit_proxy->proxy) == NULL) {
+		spa_close(spa, FTAG);
+		mutex_exit(&spa_namespace_lock);
+		dprintf("%s no spa_pool_proxy\n", __func__);
+		return (0);
+	}
+	/* Avoid it disappearing from under us */
+	pool_proxy->retain();
+
+	/* Don't need to hold this throughout */
+	mutex_exit(&spa_namespace_lock);
+
+	/* vdev state lock only requires an spa open ref */
+	spa_vdev_state_enter(spa, SCL_NONE);
+
+	/* Iterate over all vdevs */
+	if ((error = zfs_boot_update_bootinfo_vdev(array,
+	    spa->spa_root_vdev)) != 0) {
+		dprintf("%s bootinfo_vdev error %d\n",
+		    __func__, error);
+
+		/* Drop locks */
+		(void) spa_vdev_state_exit(spa, NULL, 0);
+		mutex_enter(&spa_namespace_lock);
+		spa_close(spa, FTAG);
+		mutex_exit(&spa_namespace_lock);
+		array->release();
+		pool_proxy->release();
+		return (error);
+	}
+
+	/* Release locks, passing NULL vd (no change) */
+	error = spa_vdev_state_exit(spa, NULL, 0);
+	if (error != 0) {
+		dprintf("%s spa_vdev_state_exit error %d\n",
+		    __func__, error);
+	}
+
+	/* setProperty adds a retain */
+	pool_proxy->setProperty(kIOBootDeviceKey, array);
+	pool_proxy->release();
+	array->release();
+
+	/* Drop locks */
+	mutex_enter(&spa_namespace_lock);
+	spa_close(spa, FTAG);
+	mutex_exit(&spa_namespace_lock);
+	return (0);
+}
+
+} /* extern "C" */
+
+#if 0
+#ifdef ZFS_BOOT
+/* Remainder only needed for boot */
+
+#define	DPRINTF_FUNC()	dprintf("%s\n", __func__)
+
+#pragma mark - ZFSBootDevice
+
+OSDefineMetaClassAndStructors(ZFSBootDevice, IOBlockStorageDevice);
+char ZFSBootDevice::vendorString[4] = "ZFS";
+char ZFSBootDevice::revisionString[4] = "0.1";
+char ZFSBootDevice::infoString[12] = "ZFS dataset";
+
+#if 0
+int
+zfs_boot_get_path(char *path, int len)
+{
+	OSString *disk = 0;
+
+	if (!path || len == 0) {
+		dprintf("%s: invalid argument\n", __func__);
+		return (-1);
+	}
+
+	if (bootdev) {
+		disk = OSDynamicCast(OSString,
+		    bootdev->getProperty(kIOBSDNameKey, gIOServicePlane,
+		    kIORegistryIterateRecursively));
+	}
+
+	if (disk) {
+		snprintf(path, len, "/dev/%s", disk->getCStringNoCopy());
+		return (0);
+	}
+
+	return (-1);
+}
+#endif
+
+bool
+ZFSBootDevice::init(OSDictionary *properties)
+{
+	/* Allocate dictionaries and symbols */
+	OSDictionary *pdict = OSDictionary::withCapacity(2);
+	OSDictionary *ddict = OSDictionary::withCapacity(4);
+	const OSSymbol *virtualSymbol = OSSymbol::withCString(
+	    kIOPropertyPhysicalInterconnectTypeVirtual);
+	const OSSymbol *ramSymbol = OSSymbol::withCString(
+	    kIOPropertyInterconnectRAMKey);
+	const OSSymbol *ssdSymbol = OSSymbol::withCString(
+	    kIOPropertyMediumTypeSolidStateKey);
+	OSNumber *physSize = OSNumber::withNumber((uint32_t)4096, 32);
+	OSNumber *logSize = OSNumber::withNumber((uint32_t)512, 32);
+	const OSSymbol *vendorSymbol = 0;
+	const OSSymbol *revisionSymbol = 0;
+	const OSSymbol *blankSymbol = 0;
+	OSBoolean *rdonly = 0;
+	OSString *str = 0;
+	const char *cstr = 0;
+	bool ret = false;
+
+	DPRINTF_FUNC();
+
+	/* Validate allocations */
+	if (!pdict || !ddict || !virtualSymbol || !ramSymbol ||
+	    !ssdSymbol || !physSize || !logSize) {
+		dprintf("ZFSBootDevice::%s allocation failed\n", __func__);
+		goto error;
+	}
+
+	/* Init class statics every time an instance inits */
+	/* Shared across instances, but doesn't hurt to reprint */
+	snprintf(vendorString, strlen("ZFS")+1, "ZFS");
+	snprintf(revisionString, strlen("0.1")+1, "0.1");
+	snprintf(infoString, strlen("ZFS dataset")+1, "ZFS dataset");
+
+	/* For IORegistry keys, cache OSSymbols for class statics */
+	/* Leverages OSSymbol cahce pool to reuse across instances */
+	vendorSymbol = OSSymbol::withCString(vendorString);
+	revisionSymbol = OSSymbol::withCString(revisionString);
+	blankSymbol = OSSymbol::withCString("");
+	if (!vendorSymbol || !revisionSymbol || !blankSymbol) {
+		dprintf("ZFSBootDevice::%s class symbols failed\n", __func__);
+		goto error;
+	}
+
+	/* Call super init */
+	if (IOBlockStorageDevice::init(properties) == false) {
+		dprintf("ZFSBootDevice::%s device init failed\n", __func__);
+		goto error;
+	}
+
+	/* Set class private vars */
+	productString = NULL;
+	isReadOnly = false; // XXX should really be true initially
+
+	/* Set Protocol Characteristics */
+	if (pdict->setObject(kIOPropertyPhysicalInterconnectLocationKey,
+	    ramSymbol) == false ||
+	    pdict->setObject(kIOPropertyPhysicalInterconnectTypeKey,
+	    virtualSymbol) == false) {
+		dprintf("%s pdict set properties failed\n", __func__);
+		goto error;
+	}
+	setProperty(kIOPropertyProtocolCharacteristicsKey, pdict);
+
+	/* Set Device Characteristics */
+	if (ddict->setObject(kIOPropertyVendorNameKey,
+	    vendorSymbol) == false ||
+	    ddict->setObject(kIOPropertyProductRevisionLevelKey,
+	    revisionSymbol) == false ||
+	    ddict->setObject(kIOPropertyProductSerialNumberKey,
+	    blankSymbol) == false ||
+	    ddict->setObject(kIOPropertyPhysicalBlockSizeKey,
+	    physSize) == false ||
+	    ddict->setObject(kIOPropertyLogicalBlockSizeKey,
+	    logSize) == false ||
+	    ddict->setObject(kIOPropertyMediumTypeKey,
+	    ssdSymbol) == false) {
+		dprintf("%s ddict set properties failed\n", __func__);
+		goto error;
+	}
+	setProperty(kIOPropertyDeviceCharacteristicsKey, ddict);
+
+	/* Check for passed in readonly status */
+	if (properties && (rdonly = OSDynamicCast(OSBoolean,
+	    properties->getObject(ZFS_BOOT_DATASET_RDONLY_KEY))) != NULL) {
+		/* Got the boolean */
+		isReadOnly = rdonly->getValue();
+		dprintf("ZFSBootDevice %s set %s\n", __func__,
+		    (isReadOnly ? "readonly" : "readwrite"));
+	}
+
+	/* Check for passed in dataset UUID */
+	if (properties && (str = OSDynamicCast(OSString,
+	    properties->getObject(ZFS_BOOT_DATASET_UUID_KEY))) != NULL &&
+	    (cstr = str->getCStringNoCopy()) != NULL) {
+		/* Got the string, try to set UUID */
+		str->retain();
+		if (ddict->setObject("Dataset UUID", str) == false) {
+			dprintf("ZFSBootDevice::%s failed UUID [%s]\n",
+			    __func__, cstr);
+			str->release();
+			goto error;
+		}
+		dprintf("ZFSBootDevice::%s set UUID [%s]\n",
+		    __func__, cstr);
+		str->release();
+	}
+
+	/* Check for passed in dataset name */
+	if (properties && (str = OSDynamicCast(OSString,
+	    properties->getObject(ZFS_BOOT_DATASET_NAME_KEY))) != NULL &&
+	    (cstr = str->getCStringNoCopy()) != NULL) {
+		/* Got the string, try to set name */
+		str->retain();
+		if (setDatasetName(cstr) == false) {
+			/* Unlikely */
+			dprintf("ZFSBootDevice %s couldn't setup dataset"
+			    " name property [%s]\n", __func__, cstr);
+			str->release();
+			goto error;
+		}
+
+		dprintf("ZFSBootDevice %s set dataset name [%s]\n",
+		    __func__, cstr);
+		str->release();
+	} else {
+		if (setDatasetName("invalid") == false) {
+			dprintf("ZFSBootDevice::%s setDatasetName failed\n",
+			    __func__);
+			goto error;
+		}
+		dprintf("ZFSBootDevice %s set name [invalid]\n", __func__);
+	}
+
+	/* Success */
+	ret = true;
+
+error:
+	if (pdict) pdict->release();
+	if (ddict) ddict->release();
+	if (virtualSymbol) virtualSymbol->release();
+	if (ramSymbol) ramSymbol->release();
+	if (ssdSymbol) ssdSymbol->release();
+	if (physSize) physSize->release();
+	if (logSize) logSize->release();
+	if (vendorSymbol) vendorSymbol->release();
+	if (revisionSymbol) revisionSymbol->release();
+	if (blankSymbol) blankSymbol->release();
+	return (ret);
+}
+
+void
+ZFSBootDevice::free()
+{
+	char *pstring = (char *)productString;
+	productString = 0;
+
+	if (pstring) kmem_free(pstring, strlen(pstring) + 1);
+
+	IOBlockStorageDevice::free();
+}
+
+#if 0
+bool
+ZFSBootDevice::attach(IOService *provider)
+{
+	DPRINTF_FUNC();
+	// return (IOMedia::attach(provider));
+	return (IOBlockStorageDevice::attach(provider));
+}
+
+void
+ZFSBootDevice::detach(IOService *provider)
+{
+	DPRINTF_FUNC();
+	// IOMedia::detach(provider);
+	IOBlockStorageDevice::detach(provider);
+}
+
+bool
+ZFSBootDevice::start(IOService *provider)
+{
+	DPRINTF_FUNC();
+	// return (IOMedia::start(provider));
+	return (IOBlockStorageDevice::start(provider));
+}
+
+void
+ZFSBootDevice::stop(IOService *provider)
+{
+	DPRINTF_FUNC();
+	// IOMedia::stop(provider);
+	IOBlockStorageDevice::stop(provider);
+}
+
+IOService*
+ZFSBootDevice::probe(IOService *provider, SInt32 *score)
+{
+	DPRINTF_FUNC();
+	// return (IOMedia::probe(provider, score));
+	return (IOBlockStorageDevice::probe(provider, score));
+}
+#endif
+
+IOReturn
+ZFSBootDevice::doSynchronizeCache(void)
+{
+	dprintf("ZFSBootDevice %s\n", __func__);
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSBootDevice::doAsyncReadWrite(IOMemoryDescriptor *buffer,
+    UInt64 block, UInt64 nblks,
+    IOStorageAttributes *attributes,
+    IOStorageCompletion *completion)
+{
+	char zero[ZFS_BOOT_DEV_BSIZE];
+	size_t len, cur, off = 0;
+
+	DPRINTF_FUNC();
+
+	if (!buffer) {
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/* Read vs. write */
+	if (buffer->getDirection() == kIODirectionIn) {
+		/* Zero the read buffer */
+		bzero(zero, ZFS_BOOT_DEV_BSIZE);
+		len = buffer->getLength();
+		while (len > 0) {
+			cur = (len > ZFS_BOOT_DEV_BSIZE ?
+			    ZFS_BOOT_DEV_BSIZE : len);
+			buffer->writeBytes(/* offset */ off,
+			    /* buf */ zero, /* length */ cur);
+			off += cur;
+			len -= cur;
+		}
+		// dprintf("%s: read: %llu %llu\n",
+		//    __func__, block, nblks);
+		IOStorage::complete(completion, kIOReturnSuccess,
+			    buffer->getLength());
+		return (kIOReturnSuccess);
+	}
+
+	if (buffer->getDirection() != kIODirectionOut) {
+		dprintf("%s invalid direction %d\n", __func__,
+		    buffer->getDirection());
+		IOStorage::complete(completion, kIOReturnError, 0);
+		return (kIOReturnSuccess);
+	}
+
+	/*
+	 * XXX For now this just returns error for all writes.
+	 * If it turns out that mountroot/bdevvp try to
+	 * verify writable status by reading a block and writing
+	 * it back to disk, lie and say it succeeded.
+	 */
+	dprintf("%s: write: %llu %llu\n", __func__, block, nblks);
+	IOStorage::complete(completion, kIOReturnError, 0);
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+ZFSBootDevice::doEjectMedia()
+{
+	DPRINTF_FUNC();
+	/* XXX Called at shutdown, maybe return success? */
+	return (kIOReturnError);
+}
+
+IOReturn
+ZFSBootDevice::doFormatMedia(UInt64 byteCapacity)
+{
+	DPRINTF_FUNC();
+	/* XXX shouldn't need it */
+	return (kIOReturnError);
+	// return (kIOReturnSuccess);
+}
+
+UInt32
+ZFSBootDevice::doGetFormatCapacities(UInt64 *capacities,
+    UInt32 capacitiesMaxCount) const
+{
+	DPRINTF_FUNC();
+	if (capacities && capacitiesMaxCount > 0) {
+		capacities[0] = (ZFS_BOOT_DEV_BSIZE * ZFS_BOOT_DEV_BCOUNT);
+		dprintf("ZFSBootDevice %s: capacity %llu\n",
+		    __func__, capacities[0]);
+	}
+
+	/* Always inform caller of capacity count */
+	return (1);
+}
+
+/* Assign dataset name from null-terminated string */
+bool
+ZFSBootDevice::setDatasetName(const char *dsname)
+{
+	OSDictionary *dict;
+	OSString *dsstr;
+	char *newname, *oldname;
+	size_t len;
+
+	DPRINTF_FUNC();
+
+	/* Validate arguments */
+	if (!dsname || (len = strnlen(dsname,
+	    ZFS_MAX_DATASET_NAME_LEN)) == 0) {
+		dprintf("%s: missing argument\n", __func__);
+		return (false);
+	}
+
+	/* Truncate too-long names (shouldn't happen) */
+	if (len == ZFS_MAX_DATASET_NAME_LEN &&
+	    dsname[ZFS_MAX_DATASET_NAME_LEN] != '\0') {
+		dprintf("%s: dsname too long [%s]\n",
+		    __func__, dsname);
+		/* XXX Just truncate the name */
+		len--;
+	}
+
+	/* Allocate room for name plus null char */
+	newname = (char *)kmem_alloc(len+1, KM_SLEEP);
+	if (!newname) {
+		dprintf("ZFSBootDevice::%s string alloc failed\n", __func__);
+		return (false);
+	}
+	snprintf(newname, len+1, "%s", dsname);
+	newname[len] = '\0'; /* just in case */
+
+	/* Save an OSString copy for IORegistry */
+	dsstr = OSString::withCString(newname);
+	if (!dsstr) {
+		dprintf("ZFSBootDevice::%s OSString failed\n", __func__);
+		kmem_free(newname, len+1);
+		return (false);
+	}
+
+	/* Swap into class private var */
+	oldname = productString;
+	productString = newname;
+	newname = 0;
+	if (oldname) {
+		kmem_free(oldname, strlen(oldname)+1);
+		oldname = 0;
+	}
+
+	/* Get and clone device characteristics prop dict */
+	if ((dict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyDeviceCharacteristicsKey))) == NULL ||
+	    (dict = OSDictionary::withDictionary(dict)) == NULL) {
+		dprintf("%s couldn't clone prop dict\n", __func__);
+		/* Should only happen during initialization */
+	}
+
+	if (dict) {
+		/* Copy string, add to dictionary, and replace prop dict */
+		if (dict->setObject(kIOPropertyProductNameKey,
+		    dsstr) == false ||
+		    setProperty(kIOPropertyDeviceCharacteristicsKey,
+		    dict) == false) {
+			dprintf("%s couldn't set name\n", __func__);
+			dsstr->release();
+			dict->release();
+			return (false);
+		}
+		dict->release();
+		dict = 0;
+	}
+
+	/* Finally, set the IORegistryEntry/IOService name */
+	setName(dsstr->getCStringNoCopy());
+	dsstr->release();
+
+	return (true);
+}
+
+/* Returns full dataset name from instance private var */
+char *
+ZFSBootDevice::getProductString()
+{
+	dprintf("ZFSBootDevice %s [%s]\n", productString);
+	/* Return class private string */
+	return (productString);
+}
+
+/* Returns readonly status from instance private var */
+IOReturn
+ZFSBootDevice::reportWriteProtection(bool *isWriteProtected)
+{
+	DPRINTF_FUNC();
+	if (isWriteProtected) *isWriteProtected = isReadOnly;
+	return (kIOReturnSuccess);
+}
+
+/* These return class static string for all instances */
+char *
+ZFSBootDevice::getVendorString()
+{
+	dprintf("ZFSBootDevice %s [%s]\n", vendorString);
+	/* Return class static string */
+	return (vendorString);
+}
+char *
+ZFSBootDevice::getRevisionString()
+{
+	dprintf("ZFSBootDevice %s [%s]\n", revisionString);
+	/* Return class static string */
+	return (revisionString);
+}
+char *
+ZFSBootDevice::getAdditionalDeviceInfoString()
+{
+	dprintf("ZFSBootDevice %s [%s]\n", infoString);
+	/* Return class static string */
+	return (infoString);
+}
+
+/* Always return media present and unchanged */
+IOReturn
+ZFSBootDevice::reportMediaState(bool *mediaPresent,
+    bool *changedState)
+{
+	DPRINTF_FUNC();
+	if (mediaPresent) *mediaPresent = true;
+	if (changedState) *changedState = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report nonremovable and nonejectable */
+IOReturn
+ZFSBootDevice::reportRemovability(bool *isRemoveable)
+{
+	DPRINTF_FUNC();
+	if (isRemoveable) *isRemoveable = false;
+	return (kIOReturnSuccess);
+}
+IOReturn
+ZFSBootDevice::reportEjectability(bool *isEjectable)
+{
+	DPRINTF_FUNC();
+	if (isEjectable) *isEjectable = false;
+	return (kIOReturnSuccess);
+}
+
+/* Always report 512b blocksize */
+IOReturn
+ZFSBootDevice::reportBlockSize(UInt64 *blockSize)
+{
+	DPRINTF_FUNC();
+	if (!blockSize)
+		return (kIOReturnError);
+
+	*blockSize = ZFS_BOOT_DEV_BSIZE;
+	return (kIOReturnSuccess);
+}
+
+/* XXX Calculate from dev_bcount, should get size from objset */
+/* XXX Can issue message kIOMessageMediaParametersHaveChanged to update */
+IOReturn
+ZFSBootDevice::reportMaxValidBlock(UInt64 *maxBlock)
+{
+	DPRINTF_FUNC();
+	if (!maxBlock)
+		return (kIOReturnError);
+
+	// *maxBlock = 0;
+	*maxBlock = ZFS_BOOT_DEV_BCOUNT - 1;
+	dprintf("ZFSBootDevice %s: maxBlock %llu\n", __func__, *maxBlock);
+
+	return (kIOReturnSuccess);
+}
+#endif	/* ZFS_BOOT */
+#endif	/* 0 */
diff --git a/module/os/macos/zfs/zfs_ctldir.c b/module/os/macos/zfs/zfs_ctldir.c
new file mode 100644
index 0000000000..57f8dc2e28
--- /dev/null
+++ b/module/os/macos/zfs/zfs_ctldir.c
@@ -0,0 +1,1519 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ *
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * LLNL-CODE-403049.
+ * Rewritten for Linux by:
+ *   Rohan Puri <rohan.puri15@gmail.com>
+ *   Brian Behlendorf <behlendorf1@llnl.gov>
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2018 George Melikov. All Rights Reserved.
+ * Copyright (c) 2019 Datto, Inc. All rights reserved.
+ * Copyright (c) 2020 Jorgen Lundman. All rights reserved.
+ */
+
+/*
+ * ZFS control directory (a.k.a. ".zfs")
+ *
+ * This directory provides a common location for all ZFS meta-objects.
+ * Currently, this is only the 'snapshot' and 'shares' directory, but this may
+ * expand in the future.  The elements are built dynamically, as the hierarchy
+ * does not actually exist on disk.
+ *
+ * For 'snapshot', we don't want to have all snapshots always mounted, because
+ * this would take up a huge amount of space in /etc/mnttab.  We have three
+ * types of objects:
+ *
+ *	ctldir ------> snapshotdir -------> snapshot
+ *                                             |
+ *                                             |
+ *                                             V
+ *                                         mounted fs
+ *
+ * The 'snapshot' node contains just enough information to lookup '..' and act
+ * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
+ * perform an automount of the underlying filesystem and return the
+ * corresponding vnode.
+ *
+ * All mounts are handled automatically by an user mode helper which invokes
+ * the mount procedure.  Unmounts are handled by allowing the mount
+ * point to expire so the kernel may automatically unmount it.
+ *
+ * The '.zfs', '.zfs/snapshot', and all directories created under
+ * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
+ * zfsvfs_t as the head filesystem (what '.zfs' lives under).
+ *
+ * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
+ * (ie: snapshots) are complete ZFS filesystems and have their own unique
+ * zfsvfs_t.  However, the fsid reported by these mounts will be the same
+ * as that used by the parent zfsvfs_t to make NFS happy.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/dirent.h>
+#include <sys/sysmacros.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_vnops.h>
+#include <sys/stat.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_deleg.h>
+#include <sys/zpl.h>
+#include <sys/mntent.h>
+#include <sys/fm/fs/zfs.h>
+#include "zfs_namecheck.h"
+
+extern kmem_cache_t *znode_cache;
+extern uint64_t vnop_num_vnodes;
+
+/*
+ * Apple differences;
+ *
+ * We don't have 'shares' directory, so only 'snapshot' is relevant.
+ *
+ * We can not issue mount from kernel, so involve zed.
+ * - see zfs_ctldir_snapdir.c
+ *
+ * All vnodes point to znode_t, no special case nodes.
+ */
+
+/* List of zfsctl mounts waiting to be mounted */
+static kmutex_t zfsctl_mounts_lock;
+static list_t zfsctl_mounts_list;
+struct zfsctl_mounts_waiting {
+	kmutex_t zcm_lock;
+	kcondvar_t zcm_cv;
+	list_node_t zcm_node;
+	char zcm_name[ZFS_MAX_DATASET_NAME_LEN];
+};
+typedef struct zfsctl_mounts_waiting zfsctl_mounts_waiting_t;
+
+
+/*
+ * Control Directory Tunables (.zfs)
+ */
+int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
+int zfs_admin_snapshot = 1;
+int zfs_auto_snapshot = 1;
+
+static kmutex_t		zfsctl_unmount_lock;
+static kcondvar_t	zfsctl_unmount_cv;
+static boolean_t	zfsctl_unmount_thread_exit;
+
+static kmutex_t zfsctl_unmount_list_lock;
+static list_t zfsctl_unmount_list;
+
+struct zfsctl_unmount_delay {
+	char		*se_name;	/* full snapshot name */
+	spa_t		*se_spa;	/* pool spa */
+	uint64_t	se_objsetid;	/* snapshot objset id */
+	time_t		se_time;
+	list_node_t	se_nodelink;
+};
+typedef struct zfsctl_unmount_delay zfsctl_unmount_delay_t;
+
+
+/*
+ * Check if the given vnode is a part of the virtual .zfs directory.
+ */
+boolean_t
+zfsctl_is_node(struct vnode *ip)
+{
+	return (ITOZ(ip)->z_is_ctldir);
+}
+
+typedef int (**vnode_operations)(void *);
+
+
+/*
+ * Allocate a new vnode with the passed id and ops.
+ */
+static struct vnode *
+zfsctl_vnode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
+    char *name)
+{
+	timestruc_t	now;
+	struct vnode *vp = NULL;
+	znode_t *zp = NULL;
+	struct vnode_fsparam vfsp;
+
+	printf("%s\n", __func__);
+
+	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+
+	gethrestime(&now);
+	ASSERT3P(zp->z_dirlocks, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+	zp->z_zfsvfs = zfsvfs;
+	zp->z_id = id;
+	zp->z_unlinked = B_FALSE;
+	zp->z_atime_dirty = B_FALSE;
+	zp->z_zn_prefetch = B_FALSE;
+	zp->z_moved = B_FALSE;
+	zp->z_is_sa = B_FALSE;
+	zp->z_is_mapped = B_FALSE;
+	zp->z_is_ctldir = B_TRUE;
+	zp->z_is_stale = B_FALSE;
+	zp->z_sa_hdl = NULL;
+	zp->z_blksz = 0;
+	zp->z_seq = 0;
+	zp->z_mapcnt = 0;
+	zp->z_size = 0;
+	zp->z_pflags = 0;
+	zp->z_mode = 0;
+	zp->z_sync_cnt = 0;
+	zp->z_gen = 0;
+	zp->z_mode = (S_IFDIR | (S_IRWXU|S_IRWXG|S_IRWXO));
+	zp->z_uid = 0;
+	zp->z_gid = 0;
+	ZFS_TIME_ENCODE(&now, zp->z_atime);
+
+	zp->z_snap_mount_time = 0; /* Allow automount attempt */
+
+	strlcpy(zp->z_name_cache, name, sizeof (zp->z_name_cache));
+
+	dprintf("%s zp %p with vp %p zfsvfs %p vfs %p\n", __func__,
+	    zp, vp, zfsvfs, zfsvfs->z_vfs);
+
+	bzero(&vfsp, sizeof (vfsp));
+	vfsp.vnfs_str = "zfs";
+	vfsp.vnfs_mp = zfsvfs->z_vfs;
+	vfsp.vnfs_vtype = IFTOVT((mode_t)zp->z_mode);
+	vfsp.vnfs_fsnode = zp;
+	vfsp.vnfs_flags = VNFS_ADDFSREF;
+
+	/* Tag root directory */
+	if (id == zfsvfs->z_root)
+		vfsp.vnfs_markroot = 1;
+
+	/*
+	 * This creates a vnode with VSYSTEM set, this is so that unmount's
+	 * vflush() (called before our vfs_unmount) will pass (and not block
+	 * waiting for the usercount ref to be released). We then release the
+	 * VROOT vnode in zfsctl_destroy, and release the usercount ref.
+	 * Because of this, we need to call vnode_recycle() ourselves in destroy
+	 */
+	if (id == ZFSCTL_INO_ROOT)
+		vfsp.vnfs_marksystem = 1;
+
+	vfsp.vnfs_vops = zfs_ctldirops;
+
+	while (vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp) != 0) {
+		kpreempt(KPREEMPT_SYNC);
+	}
+	atomic_inc_64(&vnop_num_vnodes);
+
+	printf("Assigned zp %p with vp %p zfsvfs %p\n", zp, vp, zp->z_zfsvfs);
+
+	vnode_settag(vp, VT_ZFS);
+
+	zp->z_vid = vnode_vid(vp);
+	zp->z_vnode = vp;
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	list_insert_tail(&zfsvfs->z_all_znodes, zp);
+	membar_producer();
+	if (id < zfsvfs->z_ctldir_startid)
+		zfsvfs->z_ctldir_startid = id;
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	return (vp);
+}
+
+/*
+ * Lookup the vnode with given id, it will be allocated if needed.
+ */
+static struct vnode *
+zfsctl_vnode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
+    char *name)
+{
+	struct vnode *ip = NULL;
+	int error = 0;
+
+	printf("%s\n", __func__);
+
+	while (ip == NULL) {
+
+		error = zfs_vfs_vget(zfsvfs->z_vfs, id, &ip, NULL);
+		if (error == 0 && ip != NULL)
+			break;
+
+		/* May fail due to concurrent zfsctl_vnode_alloc() */
+		ip = zfsctl_vnode_alloc(zfsvfs, id, name);
+	}
+
+	return (ip);
+}
+
+/*
+ * Create the '.zfs' directory.  This directory is cached as part of the VFS
+ * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
+ * therefore checks against a vfs_count of 2 instead of 1.  This reference
+ * is removed when the ctldir is destroyed in the unmount.  All other entities
+ * under the '.zfs' directory are created dynamically as needed.
+ *
+ * Because the dynamically created '.zfs' directory entries assume the use
+ * of 64-bit vnode numbers this support must be disabled on 32-bit systems.
+ */
+int
+zfsctl_create(zfsvfs_t *zfsvfs)
+{
+	ASSERT(zfsvfs->z_ctldir == NULL);
+
+	printf("%s\n", __func__);
+
+	/* Create root node, tagged with VSYSTEM - see above */
+	zfsvfs->z_ctldir = zfsctl_vnode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
+	    ZFS_CTLDIR_NAME);
+	if (zfsvfs->z_ctldir == NULL)
+		return (SET_ERROR(ENOENT));
+
+	vnode_ref(zfsvfs->z_ctldir);
+	VN_RELE(zfsvfs->z_ctldir);
+
+	printf("%s: done %p\n", __func__, zfsvfs->z_ctldir);
+
+	return (0);
+}
+
+/*
+ * Destroy the '.zfs' directory or remove a snapshot from
+ * zfs_snapshots_by_name. Only called when the filesystem is unmounted.
+ */
+void
+zfsctl_destroy(zfsvfs_t *zfsvfs)
+{
+	if (zfsvfs->z_ctldir) {
+		if (VN_HOLD(zfsvfs->z_ctldir) == 0) {
+			vnode_rele(zfsvfs->z_ctldir);
+			/* Because tagged VSYSTEM, we manually call recycle */
+			vnode_recycle(zfsvfs->z_ctldir);
+			VN_RELE(zfsvfs->z_ctldir);
+		}
+		zfsvfs->z_ctldir = NULL;
+	}
+}
+
+/*
+ * Given a root znode, retrieve the associated .zfs directory.
+ * Add a hold to the vnode and return it.
+ */
+struct vnode *
+zfsctl_root(znode_t *zp)
+{
+	ASSERT(zfs_has_ctldir(zp));
+	VN_HOLD(ZTOZSB(zp)->z_ctldir);
+	return (ZTOZSB(zp)->z_ctldir);
+}
+
+
+struct vnode *
+zfs_root_dotdot(struct vnode *vp)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	znode_t *rootzp = NULL;
+	struct vnode *retvp = NULL;
+
+	printf("%s: for id %llu\n", __func__, zp->z_id);
+
+	if (zp->z_id == ZFSCTL_INO_ROOT)
+		zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	else if (zp->z_id == ZFSCTL_INO_SNAPDIR)
+		retvp = zfsctl_root(zp);
+	else
+		retvp = zfsctl_vnode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
+		    "snapshot");
+
+	if (rootzp != NULL)
+		retvp = ZTOV(rootzp);
+
+	return (retvp);
+}
+
+/*
+ * Special case the handling of "..".
+ */
+int
+zfsctl_root_lookup(struct vnode *dvp, char *name, struct vnode **vpp,
+    int flags, cred_t *cr, int *direntflags, struct componentname *realpnp)
+{
+	znode_t *dzp = VTOZ(dvp);
+	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+	int error = 0;
+	uint64_t id;
+
+	printf("%s: '%s'\n", __func__, name);
+
+	ZFS_ENTER(zfsvfs);
+
+	if (strcmp(name, "..") == 0) {
+		*vpp = zfs_root_dotdot(dvp);
+	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
+		*vpp = zfsctl_vnode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
+		    name);
+	} else {
+		error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
+		if (error != 0)
+			goto out;
+		*vpp = zfsctl_vnode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
+		    name);
+	}
+
+	if (*vpp == NULL) {
+		error = SET_ERROR(ENOENT);
+	}
+
+out:
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+int
+zfsctl_vnop_lookup(struct vnop_lookup_args *ap)
+#if 0
+	struct vnop_lookup_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int direntflags = 0;
+	int error;
+	struct componentname *cnp = ap->a_cnp;
+	char *filename = NULL;
+	int filename_num_bytes = 0;
+	cred_t *cr = (cred_t *)vfs_context_ucred((ap)->a_context);
+
+	/*
+	 * Darwin uses namelen as an optimisation, for example it can be
+	 * set to 5 for the string "alpha/beta" to look up "alpha". In this
+	 * case we need to copy it out to null-terminate.
+	 */
+	if (cnp->cn_nameptr[cnp->cn_namelen] != 0) {
+		filename_num_bytes = cnp->cn_namelen + 1;
+		filename = (char *)kmem_alloc(filename_num_bytes, KM_SLEEP);
+		bcopy(cnp->cn_nameptr, filename, cnp->cn_namelen);
+		filename[cnp->cn_namelen] = '\0';
+	}
+
+	error =  zfsctl_root_lookup(ap->a_dvp,
+		filename ? filename : cnp->cn_nameptr,
+		ap->a_vpp, /* flags */ 0, cr, &direntflags, NULL);
+
+	/* If we are to create a directory, change error code for XNU */
+	if ((error == ENOENT) &&
+		(cnp->cn_flags & ISLASTCN)) {
+		if ((cnp->cn_nameiop == CREATE) ||
+			(cnp->cn_nameiop == RENAME))
+			error = EJUSTRETURN;
+	}
+
+	if (filename != NULL)
+		kmem_free(filename, filename_num_bytes);
+
+	return (error);
+}
+
+/* Quick output function for readdir */
+#define	DIRENT_RECLEN(namelen, ext)	\
+	((ext) ? \
+	((sizeof (struct direntry) + (namelen) - (MAXPATHLEN-1) + 7) & ~7) \
+	: \
+	((sizeof (struct dirent) - (NAME_MAX+1)) + (((namelen)+1 + 7) &~ 7)))
+
+static int zfsctl_dir_emit(const char *name, uint64_t id, enum vtype type,
+	struct vnop_readdir_args *ap, uint64_t **next)
+{
+	struct uio *uio = ap->a_uio;
+	boolean_t extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
+	struct direntry	*eodp;	/* Extended */
+	struct dirent	*odp;	/* Standard */
+	int namelen;
+	void *buf;
+	int error = 0;
+	ushort_t reclen;
+
+	printf("%s '%s'\n", __func__, name);
+
+	namelen = strlen(name);
+	reclen = DIRENT_RECLEN(namelen, extended);
+
+	if (reclen > uio_resid(uio))
+		return (EINVAL);
+
+	buf = kmem_zalloc(reclen, KM_SLEEP);
+
+	if (extended) {
+		eodp = buf;
+
+		/*
+		 * NOTE: d_seekoff is the offset for the *next* entry -
+		 * so poke in the previous struct with this id
+		 */
+		eodp->d_seekoff = uio_offset(uio) + 1;
+
+		eodp->d_ino = id;
+		eodp->d_type = type;
+
+		(void) bcopy(name, eodp->d_name, namelen + 1);
+		eodp->d_namlen = namelen;
+		eodp->d_reclen = reclen;
+
+	} else {
+		odp = buf;
+
+		odp->d_ino = id;
+		odp->d_type = type;
+		(void) bcopy(name, odp->d_name, namelen + 1);
+		odp->d_namlen = namelen;
+		odp->d_reclen = reclen;
+
+	}
+
+	/* Copyout this entry */
+	error = uiomove(buf, (long)reclen, UIO_READ, uio);
+
+	kmem_free(buf, reclen);
+	return (error);
+}
+
+int
+zfsctl_vnop_readdir_root(struct vnop_readdir_args *ap)
+#if 0
+	struct vnop_readdir_args {
+		struct vnode	a_vp;
+		struct uio	*a_uio;
+		int		a_flags;
+		int		*a_eofflag;
+		int		*a_numdirent;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error = 0;
+	uint64_t *next = NULL;
+	int entries = 0;
+	uint64_t offset;
+	struct uio *uio = ap->a_uio;
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	printf("%s\n", __func__);
+
+	ZFS_ENTER(zfsvfs);
+
+	*ap->a_numdirent = 0;
+
+	offset = uio_offset(uio);
+
+	while (offset < 3 && error == 0) {
+
+		switch (offset) {
+		case 0: /* "." */
+			error = zfsctl_dir_emit(".", ZFSCTL_INO_ROOT,
+			    DT_DIR, ap, &next);
+			break;
+
+		case 1: /* ".." */
+			error = zfsctl_dir_emit("..", 2,
+			    DT_DIR, ap, &next);
+			break;
+
+		case 2:
+			error = zfsctl_dir_emit(ZFS_SNAPDIR_NAME,
+			    ZFSCTL_INO_SNAPDIR, DT_DIR, ap, &next);
+			break;
+		}
+
+		if (error == ENOENT) {
+			printf("end of snapshots reached\n");
+			break;
+		}
+
+		if (error != 0) {
+			printf("emit error\n");
+			break;
+		}
+
+		entries++;
+		offset++;
+		uio_setoffset(uio, offset);
+	}
+
+	uio_setoffset(uio, offset);
+
+	/* Finished without error? Set EOF */
+	if (offset >= 3 && error == 0) {
+		*ap->a_eofflag = 1;
+		printf("Setting eof\n");
+	}
+
+	*ap->a_numdirent = entries;
+	printf("Returning %d entries\n", entries);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+int
+zfsctl_vnop_readdir_snapdir(struct vnop_readdir_args *ap)
+#if 0
+	struct vnop_readdir_args {
+		struct vnode	a_vp;
+		struct uio	*a_uio;
+		int		a_flags;
+		int		*a_eofflag;
+		int		*a_numdirent;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error = 0;
+	uint64_t *next = NULL;
+	int entries = 0;
+	uint64_t offset;
+	struct uio *uio = ap->a_uio;
+	boolean_t case_conflict;
+	uint64_t id;
+	char snapname[MAXNAMELEN];
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	printf("%s\n", __func__);
+
+	ZFS_ENTER(zfsvfs);
+
+	*ap->a_numdirent = 0;
+
+	offset = uio_offset(uio);
+
+	while (error == 0) {
+
+		switch (offset) {
+		case 0: /* "." */
+			error = zfsctl_dir_emit(".", ZFSCTL_INO_SNAPDIR,
+			    DT_DIR, ap, &next);
+			break;
+
+		case 1: /* ".." */
+			error = zfsctl_dir_emit("..", ZFSCTL_INO_ROOT,
+			    DT_DIR, ap, &next);
+			break;
+
+		default:
+			dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os),
+			    FTAG);
+			error = dmu_snapshot_list_next(zfsvfs->z_os,
+			    MAXNAMELEN, snapname, &id, &offset, &case_conflict);
+			dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os),
+			    FTAG);
+			if (error)
+				break;
+
+			error = zfsctl_dir_emit(snapname,
+			    ZFSCTL_INO_SHARES - id, DT_DIR, ap, &next);
+			break;
+		}
+
+		if (error != 0) {
+			printf("emit error\n");
+			break;
+		}
+
+		entries++;
+		offset++;
+		uio_setoffset(uio, offset);
+	}
+
+	uio_setoffset(uio, offset);
+
+	/* Finished without error? Set EOF */
+	if (error == ENOENT) {
+		*ap->a_eofflag = 1;
+		printf("Setting eof\n");
+		error = 0;
+	}
+
+	*ap->a_numdirent = entries;
+	printf("Returning %d entries\n", entries);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+
+/* We need to spit out a valid "." ".." entries for mount to work */
+int
+zfsctl_vnop_readdir_snapdirs(struct vnop_readdir_args *ap)
+#if 0
+	struct vnop_readdir_args {
+		struct vnode	a_vp;
+		struct uio	*a_uio;
+		int		a_flags;
+		int		*a_eofflag;
+		int		*a_numdirent;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error = 0;
+	uint64_t *next = NULL;
+	int entries = 0;
+	uint64_t offset;
+	struct uio *uio = ap->a_uio;
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ZFS_ENTER(zfsvfs);
+
+	*ap->a_numdirent = 0;
+
+	offset = uio_offset(uio);
+
+	printf("%s: for id %llu: offset %llu\n", __func__,
+	    zp->z_id, offset);
+
+	while (error == 0) {
+
+		switch (offset) {
+		case 0: /* "." */
+			error = zfsctl_dir_emit(".", ZFSCTL_INO_SNAPDIR,
+			    DT_DIR, ap, &next);
+			break;
+
+		case 1: /* ".." */
+			error = zfsctl_dir_emit("..", ZFSCTL_INO_ROOT,
+			    DT_DIR, ap, &next);
+			break;
+
+		default:
+			error = ENOENT;
+			break;
+		}
+
+		if (error != 0) {
+			printf("emit error\n");
+			break;
+		}
+
+		entries++;
+		offset++;
+		uio_setoffset(uio, offset);
+	}
+
+	uio_setoffset(uio, offset);
+
+	/* Finished without error? Set EOF */
+	if (error == ENOENT) {
+		*ap->a_eofflag = 1;
+		printf("Setting eof\n");
+		error = 0;
+	}
+
+	*ap->a_numdirent = entries;
+	printf("Returning %d entries\n", entries);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+int
+zfsctl_vnop_readdir(struct vnop_readdir_args *ap)
+#if 0
+	struct vnop_readdir_args {
+		struct vnode	a_vp;
+		struct uio	*a_uio;
+		int		a_flags;
+		int		*a_eofflag;
+		int		*a_numdirent;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+
+	printf("%s\n", __func__);
+
+	/* Which directory are we to output? */
+	switch (zp->z_id) {
+		case ZFSCTL_INO_ROOT:
+			return (zfsctl_vnop_readdir_root(ap));
+		case ZFSCTL_INO_SNAPDIR:
+			return (zfsctl_vnop_readdir_snapdir(ap));
+		default:
+			return (zfsctl_vnop_readdir_snapdirs(ap));
+	}
+	return (EINVAL);
+}
+
+int
+zfsctl_vnop_getattr(struct vnop_getattr_args *ap)
+#if 0
+	struct vnop_getattr_args {
+		struct vnode	*a_vp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	vattr_t *vap = ap->a_vap;
+	struct vnode *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	timestruc_t	now;
+
+	printf("%s: active x%llx\n", __func__, vap->va_active);
+
+	ZFS_ENTER(zfsvfs);
+
+	gethrestime(&now);
+
+	if (VATTR_IS_ACTIVE(vap, va_rdev))
+		VATTR_RETURN(vap, va_rdev, zfsvfs->z_rdev);
+	if (VATTR_IS_ACTIVE(vap, va_nlink))
+		VATTR_RETURN(vap, va_nlink,
+		    vnode_isdir(vp) ? zp->z_size : zp->z_links);
+	if (VATTR_IS_ACTIVE(vap, va_total_size))
+		VATTR_RETURN(vap, va_total_size, 512);
+	if (VATTR_IS_ACTIVE(vap, va_total_alloc))
+		VATTR_RETURN(vap, va_total_alloc, 512);
+	if (VATTR_IS_ACTIVE(vap, va_data_size))
+		VATTR_RETURN(vap, va_data_size, 0);
+	if (VATTR_IS_ACTIVE(vap, va_data_alloc))
+		VATTR_RETURN(vap, va_data_alloc, 0);
+	if (VATTR_IS_ACTIVE(vap, va_iosize))
+		VATTR_RETURN(vap, va_iosize, 512);
+	if (VATTR_IS_ACTIVE(vap, va_uid))
+		VATTR_RETURN(vap, va_uid, 0);
+	if (VATTR_IS_ACTIVE(vap, va_gid))
+		VATTR_RETURN(vap, va_gid, 0);
+	if (VATTR_IS_ACTIVE(vap, va_mode))
+		VATTR_RETURN(vap, va_mode, S_IFDIR |
+		    S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+	if (VATTR_IS_ACTIVE(vap, va_flags))
+		VATTR_RETURN(vap, va_flags, zfs_getbsdflags(zp));
+
+	if (VATTR_IS_ACTIVE(vap, va_acl)) {
+		VATTR_RETURN(vap, va_uuuid, kauth_null_guid);
+		VATTR_RETURN(vap, va_guuid, kauth_null_guid);
+		VATTR_RETURN(vap, va_acl, NULL);
+	}
+
+	// crtime, atime, mtime, ctime, btime
+	uint64_t timez[2];
+	timez[0] = zfsvfs->z_mount_time;
+	timez[1] = 0;
+
+	if (VATTR_IS_ACTIVE(vap, va_create_time)) {
+		ZFS_TIME_DECODE(&vap->va_create_time, timez);
+		VATTR_SET_SUPPORTED(vap, va_create_time);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+		ZFS_TIME_DECODE(&vap->va_access_time, timez);
+		VATTR_SET_SUPPORTED(vap, va_access_time);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+		ZFS_TIME_DECODE(&vap->va_modify_time, timez);
+		VATTR_SET_SUPPORTED(vap, va_modify_time);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_change_time)) {
+		ZFS_TIME_DECODE(&vap->va_change_time, timez);
+		VATTR_SET_SUPPORTED(vap, va_change_time);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_backup_time)) {
+		ZFS_TIME_DECODE(&vap->va_backup_time, timez);
+		VATTR_SET_SUPPORTED(vap, va_backup_time);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		ZFS_TIME_DECODE(&vap->va_addedtime, timez);
+		VATTR_SET_SUPPORTED(vap, va_addedtime);
+	}
+
+	if (VATTR_IS_ACTIVE(vap, va_fileid))
+		VATTR_RETURN(vap, va_fileid, zp->z_id);
+	if (VATTR_IS_ACTIVE(vap, va_linkid))
+		VATTR_RETURN(vap, va_linkid, zp->z_id);
+	if (VATTR_IS_ACTIVE(vap, va_parentid)) {
+		switch (zp->z_id) {
+			case ZFSCTL_INO_ROOT:
+				// ".zfs" parent is mount, 2 on osx
+				VATTR_RETURN(vap, va_linkid, 2);
+				break;
+			case ZFSCTL_INO_SNAPDIR:
+				// ".zfs/snapshot" parent is ".zfs"
+				VATTR_RETURN(vap, va_linkid, ZFSCTL_INO_ROOT);
+				break;
+			default:
+				// ".zfs/snapshot/$name" parent ".zfs/snapshot"
+				VATTR_RETURN(vap, va_linkid,
+				    ZFSCTL_INO_SNAPDIR);
+				break;
+		}
+	}
+	if (VATTR_IS_ACTIVE(vap, va_fsid))
+		VATTR_RETURN(vap, va_fsid, zfsvfs->z_rdev);
+
+	if (VATTR_IS_ACTIVE(vap, va_filerev))
+		VATTR_RETURN(vap, va_filerev, 0);
+	if (VATTR_IS_ACTIVE(vap, va_gen))
+		VATTR_RETURN(vap, va_gen, zp->z_gen);
+	if (VATTR_IS_ACTIVE(vap, va_type))
+		VATTR_RETURN(vap, va_type, vnode_vtype(ZTOV(zp)));
+	if (VATTR_IS_ACTIVE(vap, va_name)) {
+		strlcpy(vap->va_name, zp->z_name_cache, MAXPATHLEN);
+		VATTR_SET_SUPPORTED(vap, va_name);
+	}
+
+	/* Don't include '.' and '..' in the number of entries */
+	if (VATTR_IS_ACTIVE(vap, va_nchildren) && vnode_isdir(vp)) {
+		VATTR_RETURN(vap, va_nchildren,
+		    zp->z_links > 3 ? zp->z_links-2 : 1);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_dirlinkcount) && vnode_isdir(vp))
+		VATTR_RETURN(vap, va_dirlinkcount, 1);
+
+#ifdef VNODE_ATTR_va_fsid64
+	if (VATTR_IS_ACTIVE(vap, va_fsid64)) {
+		vap->va_fsid64.val[0] =
+		    vfs_statfs(zfsvfs->z_vfs)->f_fsid.val[0];
+		vap->va_fsid64.val[1] = vfs_typenum(zfsvfs->z_vfs);
+		VATTR_SET_SUPPORTED(vap, va_fsid64);
+	}
+#endif
+
+	ZFS_EXIT(zfsvfs);
+
+	printf("%s: returned x%llx missed: x%llx\n", __func__,
+		vap->va_supported, vap->va_active &= ~vap->va_supported);
+	return (0);
+}
+
+int
+zfsctl_vnop_access(struct vnop_access_args *ap)
+{
+	int accmode = ap->a_action;
+	dprintf("zfsctl_access\n");
+
+	if (accmode & VWRITE)
+		return (EACCES);
+	return (0);
+}
+
+int
+zfsctl_vnop_open(struct vnop_open_args *ap)
+{
+	int flags = ap->a_mode;
+
+	if (flags & FWRITE)
+		return (EACCES);
+
+	return (zfsctl_snapshot_mount(ap->a_vp, 0));
+}
+
+int
+zfsctl_vnop_close(struct vnop_close_args *ap)
+{
+	printf("%s\n", __func__);
+	return (0);
+}
+
+int
+zfsctl_vnop_inactive(struct vnop_inactive_args *ap)
+{
+	printf("%s\n", __func__);
+	return (0);
+}
+
+int
+zfsctl_vnop_reclaim(struct vnop_reclaim_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	printf("%s vp %p\n", __func__, vp);
+	vnode_removefsref(vp); /* ADDREF from vnode_create */
+	vnode_clearfsnode(vp); /* vp->v_data = NULL */
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	if (list_link_active(&zp->z_link_node)) {
+		list_remove(&zfsvfs->z_all_znodes, zp);
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	zp->z_vnode = NULL;
+	kmem_cache_free(znode_cache, zp);
+
+	return (0);
+}
+
+/*
+ * Construct a full dataset name in full_name: "pool/dataset@snap_name"
+ */
+static int
+zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
+    char *full_name)
+{
+	objset_t *os = zfsvfs->z_os;
+
+	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
+		return (SET_ERROR(EILSEQ));
+
+	dmu_objset_name(os, full_name);
+	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
+		return (SET_ERROR(ENAMETOOLONG));
+
+	(void) strcat(full_name, "@");
+	(void) strcat(full_name, snap_name);
+
+	return (0);
+}
+
+int
+zfsctl_snapshot_mount(struct vnode *vp, int flags)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int ret = 0;
+	/*
+	 * If we are here for a snapdirs directory, attempt to get zed
+	 * to mount the snapshot for the user. If successful, forward the
+	 * vnop_open() to them (ourselves).
+	 * Use a timeout in case zed is not running.
+	 */
+
+	if (zfs_auto_snapshot == 0)
+		return (0);
+
+	ZFS_ENTER(zfsvfs);
+	if (((zp->z_id >= zfsvfs->z_ctldir_startid) &&
+	    (zp->z_id <= ZFSCTL_INO_SNAPDIRS))) {
+		hrtime_t now;
+		now = gethrtime();
+
+		/*
+		 * If z_snap_mount_time is set, check if it is old enough to
+		 * retry, if so, set z_snap_mount_time to zero.
+		 */
+		if (now - zp->z_snap_mount_time > SEC2NSEC(60))
+			atomic_cas_64((uint64_t *)&zp->z_snap_mount_time,
+			    (uint64_t)zp->z_snap_mount_time,
+			    0ULL);
+
+		/*
+		 * Attempt mount, make sure only to issue one request, by
+		 * attempting to CAS in current time in place of zero.
+		 */
+		if (atomic_cas_64((uint64_t *)&zp->z_snap_mount_time, 0ULL,
+		    (uint64_t)now) == 0ULL) {
+			char full_name[ZFS_MAX_DATASET_NAME_LEN];
+
+			/* First! */
+			ret = zfsctl_snapshot_name(zfsvfs, zp->z_name_cache,
+			    ZFS_MAX_DATASET_NAME_LEN, full_name);
+
+			if (ret == 0) {
+				zfsctl_mounts_waiting_t *zcm;
+
+				/* Create condvar to wait for mount to happen */
+
+				zcm = kmem_alloc(
+				    sizeof (zfsctl_mounts_waiting_t), KM_SLEEP);
+				mutex_init(&zcm->zcm_lock, NULL, MUTEX_DEFAULT,
+				    NULL);
+				cv_init(&zcm->zcm_cv, NULL, CV_DEFAULT, NULL);
+				strlcpy(zcm->zcm_name, full_name,
+				    sizeof (zcm->zcm_name));
+
+				printf("%s: requesting mount for '%s'\n",
+				    __func__, full_name);
+
+				mutex_enter(&zfsctl_mounts_lock);
+				list_insert_tail(&zfsctl_mounts_list, zcm);
+				mutex_exit(&zfsctl_mounts_lock);
+
+				mutex_enter(&zcm->zcm_lock);
+				zfs_ereport_snapshot_post(
+				    FM_EREPORT_ZFS_SNAPSHOT_MOUNT,
+				    dmu_objset_spa(zfsvfs->z_os), full_name);
+
+				/* Now we wait hoping zed comes back to us */
+				ret = cv_timedwait(&zcm->zcm_cv, &zcm->zcm_lock,
+				    ddi_get_lbolt() + (hz * 3));
+
+				printf("%s: finished waiting %d\n",
+				    __func__, ret);
+
+				mutex_exit(&zcm->zcm_lock);
+
+				mutex_enter(&zfsctl_mounts_lock);
+				list_remove(&zfsctl_mounts_list, zcm);
+				mutex_exit(&zfsctl_mounts_lock);
+
+				mutex_destroy(&zcm->zcm_lock);
+				cv_destroy(&zcm->zcm_cv);
+
+				kmem_free(zcm,
+				    sizeof (zfsctl_mounts_waiting_t));
+
+				/*
+				 * If we mounted, make it re-open it so
+				 * the process that issued the access will
+				 * see the mounted content
+				 */
+				if (ret >= 0) {
+					/* Remove the cache entry */
+					cache_purge(vp);
+					cache_purge_negatives(vp);
+					ret = ERESTART;
+				}
+			}
+		}
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (ret);
+}
+
+/* Called whenever zfs_vfs_mount() is called with a snapshot */
+void
+zfsctl_mount_signal(char *osname, boolean_t mounting)
+{
+	zfsctl_mounts_waiting_t *zcm;
+
+	printf("%s: looking for snapshot '%s'\n", __func__, osname);
+
+	mutex_enter(&zfsctl_mounts_lock);
+	for (zcm = list_head(&zfsctl_mounts_list);
+	    zcm;
+	    zcm = list_next(&zfsctl_mounts_list, zcm)) {
+		if (strncmp(zcm->zcm_name, osname, sizeof (zcm->zcm_name)) == 0)
+			break;
+	}
+	mutex_exit(&zfsctl_mounts_lock);
+
+	/* Is there someone to wake up? */
+	if (zcm != NULL) {
+		mutex_enter(&zcm->zcm_lock);
+		cv_signal(&zcm->zcm_cv);
+		mutex_exit(&zcm->zcm_lock);
+		printf("%s: mount waiter found and signalled\n", __func__);
+	}
+
+	zfsctl_unmount_delay_t *zcu;
+
+	/* Add or remove mount to/from list of active mounts */
+
+	if (mounting) {
+		/* Add active mounts to the list */
+		zcu = kmem_alloc(sizeof (zfsctl_unmount_delay_t), KM_SLEEP);
+		zcu->se_name = kmem_strdup(osname);
+		zcu->se_time = gethrestime_sec();
+		list_link_init(&zcu->se_nodelink);
+
+		mutex_enter(&zfsctl_unmount_list_lock);
+		list_insert_tail(&zfsctl_unmount_list, zcu);
+		mutex_exit(&zfsctl_unmount_list_lock);
+
+	} else {
+		/* Unmounting */
+		mutex_enter(&zfsctl_unmount_list_lock);
+		for (zcu = list_head(&zfsctl_unmount_list);
+		    zcu != NULL;
+		    zcu = list_next(&zfsctl_unmount_list, zcu)) {
+			if (strcmp(osname, zcu->se_name) == 0) {
+				list_remove(&zfsctl_unmount_list, zcu);
+				kmem_strfree(zcu->se_name);
+				kmem_free(zcu, sizeof (zfsctl_unmount_delay_t));
+				break;
+			}
+		}
+		mutex_exit(&zfsctl_unmount_list_lock);
+	}
+}
+
+int
+zfsctl_snapshot_unmount_node(struct vnode *vp, const char *full_name,
+    int flags)
+{
+	znode_t *zp = VTOZ(vp);
+
+	printf("%s\n", __func__);
+
+	if (zp == NULL)
+		return (ENOENT);
+
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int ret = ENOENT;
+	/*
+	 * If we are here for a snapdirs directory, attempt to get zed
+	 * to mount the snapshot for the user. If successful, forward the
+	 * vnop_open() to them (ourselves).
+	 * Use a timeout in case zed is not running.
+	 */
+
+	ZFS_ENTER(zfsvfs);
+
+	if (zp->z_id == zfsvfs->z_root) {
+		hrtime_t now;
+		now = gethrtime();
+
+		/*
+		 * If z_snap_mount_time is set, check if it is old enough to
+		 * retry, if so, set z_snap_mount_time to zero.
+		 */
+		if (now - zp->z_snap_mount_time > SEC2NSEC(60))
+			atomic_cas_64((uint64_t *)&zp->z_snap_mount_time,
+			    (uint64_t)zp->z_snap_mount_time,
+			    0ULL);
+
+		/*
+		 * Attempt unmount, make sure only to issue one request, by
+		 * attempting to CAS in current time in place of zero.
+		 */
+		if (atomic_cas_64((uint64_t *)&zp->z_snap_mount_time, 0ULL,
+		    (uint64_t)now) == 0ULL) {
+
+			ret = 0;
+
+			/* First! */
+
+			if (ret == 0) {
+				zfsctl_mounts_waiting_t *zcm;
+
+				/* Create condvar to wait for mount to happen */
+
+				zcm = kmem_alloc(
+				    sizeof (zfsctl_mounts_waiting_t), KM_SLEEP);
+				mutex_init(&zcm->zcm_lock, NULL, MUTEX_DEFAULT,
+				    NULL);
+				cv_init(&zcm->zcm_cv, NULL, CV_DEFAULT, NULL);
+				strlcpy(zcm->zcm_name, full_name,
+				    sizeof (zcm->zcm_name));
+
+				dprintf("%s: requesting unmount for '%s'\n",
+				    __func__, full_name);
+
+				mutex_enter(&zfsctl_mounts_lock);
+				list_insert_tail(&zfsctl_mounts_list, zcm);
+				mutex_exit(&zfsctl_mounts_lock);
+
+				mutex_enter(&zcm->zcm_lock);
+				zfs_ereport_snapshot_post(
+				    FM_EREPORT_ZFS_SNAPSHOT_UNMOUNT,
+				    dmu_objset_spa(zfsvfs->z_os), full_name);
+
+				/* Now we wait hoping zed comes back to us */
+				ret = cv_timedwait(&zcm->zcm_cv, &zcm->zcm_lock,
+				    ddi_get_lbolt() + (hz * 3));
+
+				dprintf("%s: finished waiting %d\n",
+				    __func__, ret);
+
+				mutex_exit(&zcm->zcm_lock);
+
+				mutex_enter(&zfsctl_mounts_lock);
+				list_remove(&zfsctl_mounts_list, zcm);
+				mutex_exit(&zfsctl_mounts_lock);
+
+				kmem_free(zcm,
+				    sizeof (zfsctl_mounts_waiting_t));
+
+				/* Allow mounts to happen immediately */
+				zp->z_snap_mount_time = 0;
+
+				/*
+				 * If we unmounted, alert caller
+				 */
+				if (ret >= 0)
+					ret = 0;
+
+			}
+		}
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (ret);
+}
+
+int
+zfsctl_snapshot_unmount(const char *snapname, int flags)
+{
+	znode_t *rootzp;
+	zfsvfs_t *zfsvfs;
+
+	printf("%s\n", __func__);
+
+	if (strchr(snapname, '@') == NULL)
+		return (0);
+
+	int err = getzfsvfs(snapname, &zfsvfs);
+	if (err != 0) {
+		ASSERT3P(zfsvfs, ==, NULL);
+		return (0);
+	}
+	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
+
+	err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	if (err == 0) {
+		zfsctl_snapshot_unmount_node(ZTOV(rootzp), snapname, flags);
+		VN_RELE(ZTOV(rootzp));
+	}
+
+	vfs_unbusy(zfsvfs->z_vfs);
+	return (0);
+}
+
+int
+zfsctl_vnop_mkdir(struct vnop_mkdir_args *ap)
+#if 0
+	struct vnop_mkdir_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	cred_t *cr = (cred_t *)vfs_context_ucred((ap)->a_context);
+	znode_t *dzp = VTOZ(ap->a_dvp);
+	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+	char *dsname;
+	int error;
+
+	if (zfs_admin_snapshot == 0)
+		return (SET_ERROR(EACCES));
+
+	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (zfs_component_namecheck(ap->a_cnp->cn_nameptr, NULL, NULL) != 0) {
+		error = SET_ERROR(EILSEQ);
+		goto out;
+	}
+
+	dmu_objset_name(zfsvfs->z_os, dsname);
+
+	error = zfs_secpolicy_snapshot_perms(dsname, cr);
+	if (error != 0)
+		goto out;
+
+	if (error == 0) {
+		error = dmu_objset_snapshot_one(dsname, ap->a_cnp->cn_nameptr);
+		if (error != 0)
+			goto out;
+
+		error = zfsctl_root_lookup(ap->a_dvp, ap->a_cnp->cn_nameptr,
+		    ap->a_vpp, 0, cr, NULL, NULL);
+	}
+
+out:
+	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
+
+	return (error);
+}
+
+int
+zfsctl_vnop_rmdir(struct vnop_rmdir_args *ap)
+#if 0
+	struct vnop_rmdir_args {
+		struct vnode	*a_dvp;
+		struct vnode	*a_vp;
+		struct componentname *a_cnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	cred_t *cr = (cred_t *)vfs_context_ucred((ap)->a_context);
+	znode_t *dzp = VTOZ(ap->a_dvp);
+	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+	char *snapname, *real;
+	char *name = ap->a_cnp->cn_nameptr;
+	int error;
+
+	printf("%s: '%s'\n", __func__, name);
+
+	if (zfs_admin_snapshot == 0)
+		return (SET_ERROR(EACCES));
+
+	ZFS_ENTER(zfsvfs);
+
+	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+		error = dmu_snapshot_realname(zfsvfs->z_os, name,
+		    real, ZFS_MAX_DATASET_NAME_LEN, NULL);
+		if (error == 0) {
+			name = real;
+		} else if (error != ENOTSUP) {
+			goto out;
+		}
+	}
+
+	error = zfsctl_snapshot_name(zfsvfs, name,
+	    ZFS_MAX_DATASET_NAME_LEN, snapname);
+	if (error == 0)
+		error = zfs_secpolicy_destroy_perms(snapname, cr);
+	if (error != 0)
+		goto out;
+
+	error = zfsctl_snapshot_unmount_node(ap->a_vp, snapname, MNT_FORCE);
+	if ((error == 0) || (error == ENOENT)) {
+		error = dsl_destroy_snapshot(snapname, B_FALSE);
+
+		/* Destroy the vnode */
+		if (ap->a_vp != NULL) {
+			dprintf("%s: releasing vp\n", __func__);
+			vnode_recycle(ap->a_vp);
+		}
+	}
+
+out:
+	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+static void
+zfsctl_unmount_thread(void *notused)
+{
+	callb_cpr_t cpr;
+	zfsctl_unmount_delay_t *zcu;
+	time_t now;
+	CALLB_CPR_INIT(&cpr, &zfsctl_unmount_lock, callb_generic_cpr, FTAG);
+
+	dprintf("%s is alive\n", __func__);
+
+	mutex_enter(&zfsctl_unmount_lock);
+	while (!zfsctl_unmount_thread_exit) {
+
+		CALLB_CPR_SAFE_BEGIN(&cpr);
+		(void) cv_timedwait(&zfsctl_unmount_cv,
+		    &zfsctl_unmount_lock, ddi_get_lbolt() + (hz<<6));
+		CALLB_CPR_SAFE_END(&cpr, &zfsctl_unmount_lock);
+
+		if (!zfsctl_unmount_thread_exit) {
+			/*
+			 * Loop all active mounts, if any are older
+			 * than ZFSCTL_EXPIRE_SNAPSHOT, then we update
+			 * their timestamp and attempt unmount.
+			 */
+			now = gethrestime_sec();
+			mutex_enter(&zfsctl_unmount_list_lock);
+			for (zcu = list_head(&zfsctl_unmount_list);
+			    zcu != NULL;
+			    zcu = list_next(&zfsctl_unmount_list, zcu)) {
+				if ((now > zcu->se_time) &&
+				    ((now - zcu->se_time) >
+				    zfs_expire_snapshot)) {
+					zcu->se_time = now;
+					zfsctl_snapshot_unmount(zcu->se_name,
+					    0);
+				}
+			}
+			mutex_exit(&zfsctl_unmount_list_lock);
+		}
+	}
+
+	zfsctl_unmount_thread_exit = FALSE;
+	cv_broadcast(&zfsctl_unmount_cv);
+	CALLB_CPR_EXIT(&cpr);
+	dprintf("ZFS: zfsctl_unmount thread exit\n");
+	thread_exit();
+}
+
+/*
+ * Initialize the various pieces we'll need to create and manipulate .zfs
+ * directories.  Currently this is unused but available.
+ */
+void
+zfsctl_init(void)
+{
+	mutex_init(&zfsctl_mounts_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsctl_mounts_list, sizeof (zfsctl_mounts_waiting_t),
+	    offsetof(zfsctl_mounts_waiting_t, zcm_node));
+
+	mutex_init(&zfsctl_unmount_list_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsctl_unmount_list, sizeof (zfsctl_unmount_delay_t),
+	    offsetof(zfsctl_unmount_delay_t, se_nodelink));
+
+	mutex_init(&zfsctl_unmount_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&zfsctl_unmount_cv, NULL, CV_DEFAULT, NULL);
+	zfsctl_unmount_thread_exit = FALSE;
+
+	(void) thread_create(NULL, 0, zfsctl_unmount_thread, NULL, 0, &p0,
+	    TS_RUN, minclsyspri);
+}
+
+/*
+ * Cleanup the various pieces we needed for .zfs directories.  In particular
+ * ensure the expiry timer is canceled safely.
+ */
+void
+zfsctl_fini(void)
+{
+	mutex_destroy(&zfsctl_mounts_lock);
+	list_destroy(&zfsctl_mounts_list);
+
+	mutex_destroy(&zfsctl_unmount_list_lock);
+	list_destroy(&zfsctl_unmount_list);
+
+	mutex_enter(&zfsctl_unmount_lock);
+	zfsctl_unmount_thread_exit = TRUE;
+	while (zfsctl_unmount_thread_exit) {
+		cv_signal(&zfsctl_unmount_cv);
+		cv_wait(&zfsctl_unmount_cv, &zfsctl_unmount_lock);
+	}
+	mutex_exit(&zfsctl_unmount_lock);
+
+	mutex_destroy(&zfsctl_unmount_lock);
+	cv_destroy(&zfsctl_unmount_cv);
+}
+
+module_param(zfs_admin_snapshot, int, 0644);
+MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
+
+module_param(zfs_expire_snapshot, int, 0644);
+MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
diff --git a/module/os/macos/zfs/zfs_debug.c b/module/os/macos/zfs/zfs_debug.c
new file mode 100644
index 0000000000..68cd17eeef
--- /dev/null
+++ b/module/os/macos/zfs/zfs_debug.c
@@ -0,0 +1,186 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+
+typedef struct zfs_dbgmsg {
+	list_node_t zdm_node;
+	time_t zdm_timestamp;
+	char zdm_msg[1]; /* variable length allocation */
+} zfs_dbgmsg_t;
+
+list_t zfs_dbgmsgs;
+int zfs_dbgmsg_size;
+kmutex_t zfs_dbgmsgs_lock;
+int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
+
+int zfs_dbgmsg_enable = 1;
+
+/*
+ * Debug logging is enabled by default for production kernel builds.
+ * The overhead for this is negligible and the logs can be valuable when
+ * debugging.  For non-production user space builds all debugging except
+ * logging is enabled since performance is no longer a concern.
+ */
+void
+zfs_dbgmsg_init(void)
+{
+	list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
+	    offsetof(zfs_dbgmsg_t, zdm_node));
+	mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+zfs_dbgmsg_fini(void)
+{
+	zfs_dbgmsg_t *zdm;
+
+	while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
+		int size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+	mutex_destroy(&zfs_dbgmsgs_lock);
+	ASSERT0(zfs_dbgmsg_size);
+}
+
+void
+__set_error(const char *file, const char *func, int line, int err)
+{
+	/*
+	 * To enable this:
+	 *
+	 * $ echo 512 >/sys/module/zfs/parameters/zfs_flags
+	 */
+	if (zfs_flags & ZFS_DEBUG_SET_ERROR)
+		__dprintf(B_FALSE, file, func, line, "error %lu", err);
+}
+
+/*
+ * Print these messages by running:
+ * echo ::zfs_dbgmsg | mdb -k
+ *
+ * Monitor these messages by running:
+ * dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
+ *
+ * When used with libzpool, monitor with:
+ * dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}'
+ */
+
+/*
+ * MacOS X's dtrace doesn't handle the PROBEs, so
+ * we have a utility function that we can watch with
+ * sudo dtrace -qn '__zfs_dbgmsg:entry{printf("%s\n", stringof(arg0));}'
+ */
+noinline void
+__zfs_dbgmsg(char *buf)
+{
+	int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
+	zdm->zdm_timestamp = gethrestime_sec();
+	strlcpy(zdm->zdm_msg, buf, size);
+
+	mutex_enter(&zfs_dbgmsgs_lock);
+	list_insert_tail(&zfs_dbgmsgs, zdm);
+	zfs_dbgmsg_size += sizeof (zfs_dbgmsg_t) + size;
+	while (zfs_dbgmsg_size > zfs_dbgmsg_maxsize) {
+		zdm = list_remove_head(&zfs_dbgmsgs);
+		size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+	mutex_exit(&zfs_dbgmsgs_lock);
+}
+
+void
+__dprintf(boolean_t dprint, const char *file, const char *func,
+    int line, const char *fmt, ...)
+{
+	int size, i;
+	va_list adx;
+	char *buf, *nl;
+	char *prefix = (dprint) ? "dprintf: " : "";
+	const char *newfile;
+
+	/*
+	 * Get rid of annoying prefix to filename.
+	 */
+	newfile = strrchr(file, '/');
+	if (newfile != NULL) {
+		newfile = newfile + 1; /* Get rid of leading / */
+	} else {
+		newfile = file;
+	}
+
+	va_start(adx, fmt);
+	size = vsnprintf(NULL, 0, fmt, adx);
+	va_end(adx);
+
+	size += snprintf(NULL, 0, "%s%s:%d:%s(): ", prefix, newfile, line,
+	    func);
+
+	size++; /* null byte in the "buf" string */
+
+	/*
+	 * There is one byte of string in sizeof (zfs_dbgmsg_t), used
+	 * for the terminating null.
+	 */
+	buf = kmem_alloc(size, KM_SLEEP);
+	int roger = 0;
+
+	va_start(adx, fmt);
+	i = snprintf(buf, size + 1, "%s%s:%d:%s(): ",
+	    prefix, newfile, line, func);
+	roger = vsnprintf(buf + i, size -i + 1, fmt, adx);
+	va_end(adx);
+
+	/*
+	 * Get rid of trailing newline for dprintf logs.
+	 */
+	if (dprint && buf[0] != '\0') {
+		nl = &buf[strlen(buf) - 1];
+		if (*nl == '\n')
+			*nl = '\0';
+	}
+
+	DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
+
+	__zfs_dbgmsg(buf);
+
+	kmem_free(buf, size);
+}
+
+void
+zfs_dbgmsg_print(const char *tag)
+{
+	zfs_dbgmsg_t *zdm;
+
+	(void) printf("ZFS_DBGMSG(%s):\n", tag);
+	mutex_enter(&zfs_dbgmsgs_lock);
+	for (zdm = list_head(&zfs_dbgmsgs); zdm;
+	    zdm = list_next(&zfs_dbgmsgs, zdm))
+		(void) printf("%s\n", zdm->zdm_msg);
+	mutex_exit(&zfs_dbgmsgs_lock);
+}
diff --git a/module/os/macos/zfs/zfs_dir.c b/module/os/macos/zfs/zfs_dir.c
new file mode 100644
index 0000000000..0de0590572
--- /dev/null
+++ b/module/os/macos/zfs/zfs_dir.c
@@ -0,0 +1,1213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/uio.h>
+#include <sys/pathname.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/random.h>
+#include <sys/policy.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_vnops.h>
+#include <sys/fs/zfs.h>
+#include <sys/zap.h>
+#include <sys/dmu.h>
+#include <sys/atomic.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_dir.h>
+
+/*
+ * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
+ * of names after deciding which is the appropriate lookup interface.
+ */
+static int
+zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, matchtype_t mt,
+    boolean_t update, int *deflags, struct componentname *rpnp, uint64_t *zoid)
+{
+	boolean_t conflict = B_FALSE;
+	int error;
+
+	if (zfsvfs->z_norm) {
+		size_t bufsz = 0;
+		char *buf = NULL;
+
+		if (rpnp) {
+			buf = rpnp->cn_nameptr;
+			bufsz = rpnp->cn_namelen;
+		}
+
+		/*
+		 * In the non-mixed case we only expect there would ever
+		 * be one match, but we need to use the normalizing lookup.
+		 */
+		error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
+		    zoid, mt, buf, bufsz, &conflict);
+	} else {
+		error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
+	}
+
+	/*
+	 * Allow multiple entries provided the first entry is
+	 * the object id.  Non-zpl consumers may safely make
+	 * use of the additional space.
+	 *
+	 * XXX: This should be a feature flag for compatibility
+	 */
+	if (error == EOVERFLOW)
+		error = 0;
+
+	if (zfsvfs->z_norm && !error && deflags)
+		*deflags = conflict ? ED_CASE_CONFLICT : 0;
+
+	*zoid = ZFS_DIRENT_OBJ(*zoid);
+
+	return (error);
+}
+
+/*
+ * Lock a directory entry.  A dirlock on <dzp, name> protects that name
+ * in dzp's directory zap object.  As long as you hold a dirlock, you can
+ * assume two things: (1) dzp cannot be reaped, and (2) no other thread
+ * can change the zap entry for (i.e. link or unlink) this name.
+ *
+ * Input arguments:
+ *	dzp	- znode for directory
+ *	name	- name of entry to lock
+ *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
+ *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
+ *		  ZSHARED: allow concurrent access with other ZSHARED callers.
+ *		  ZXATTR: we want dzp's xattr directory
+ *		  ZCILOOK: On a mixed sensitivity file system,
+ *			   this lookup should be case-insensitive.
+ *		  ZCIEXACT: On a purely case-insensitive file system,
+ *			    this lookup should be case-sensitive.
+ *		  ZRENAMING: we are locking for renaming, force narrow locks
+ *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
+ *			     current thread already holds it.
+ *
+ * Output arguments:
+ *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
+ *	dlpp	- pointer to the dirlock for this entry (NULL on error)
+ *      direntflags - (case-insensitive lookup only)
+ *		flags if multiple case-sensitive matches exist in directory
+ *      realpnp     - (case-insensitive lookup only)
+ *		actual name matched within the directory
+ *
+ * Return value: 0 on success or errno on failure.
+ *
+ * NOTE: Always checks for, and rejects, '.' and '..'.
+ * NOTE: For case-insensitive file systems we take wide locks (see below),
+ *	 but return znode pointers to a single match.
+ */
+int
+zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
+    int flag, int *direntflags, struct componentname *realpnp)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zfs_dirlock_t	*dl;
+	boolean_t	update;
+	matchtype_t	mt = 0;
+	uint64_t	zoid;
+	int		error = 0;
+	int		cmpflags;
+
+	*zpp = NULL;
+	*dlpp = NULL;
+
+	/*
+	 * Verify that we are not trying to lock '.', '..', or '.zfs'
+	 */
+	if ((name[0] == '.' &&
+	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
+	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
+		return (SET_ERROR(EEXIST));
+
+	/*
+	 * Case sensitivity and normalization preferences are set when
+	 * the file system is created.  These are stored in the
+	 * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
+	 * affect what vnodes can be cached in the DNLC, how we
+	 * perform zap lookups, and the "width" of our dirlocks.
+	 *
+	 * A normal dirlock locks a single name.  Note that with
+	 * normalization a name can be composed multiple ways, but
+	 * when normalized, these names all compare equal.  A wide
+	 * dirlock locks multiple names.  We need these when the file
+	 * system is supporting mixed-mode access.  It is sometimes
+	 * necessary to lock all case permutations of file name at
+	 * once so that simultaneous case-insensitive/case-sensitive
+	 * behaves as rationally as possible.
+	 */
+
+	/*
+	 * When matching we may need to normalize & change case according to
+	 * FS settings.
+	 *
+	 * Note that a normalized match is necessary for a case insensitive
+	 * filesystem when the lookup request is not exact because normalization
+	 * can fold case independent of normalizing code point sequences.
+	 *
+	 * See the table above zfs_dropname().
+	 */
+	if (zfsvfs->z_norm != 0) {
+		mt = MT_NORMALIZE;
+
+		/*
+		 * Determine if the match needs to honor the case specified in
+		 * lookup, and if so keep track of that so that during
+		 * normalization we don't fold case.
+		 */
+		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
+		    (flag & ZCIEXACT)) ||
+		    (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
+			mt |= MT_MATCH_CASE;
+		}
+	}
+
+	/*
+	 * Only look in or update the DNLC if we are looking for the
+	 * name on a file system that does not require normalization
+	 * or case folding.  We can also look there if we happen to be
+	 * on a non-normalizing, mixed sensitivity file system IF we
+	 * are looking for the exact name.
+	 *
+	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
+	 * case for performance improvement?
+	 */
+	update = !zfsvfs->z_norm ||
+	    (zfsvfs->z_case == ZFS_CASE_MIXED &&
+	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
+
+	/*
+	 * ZRENAMING indicates we are in a situation where we should
+	 * take narrow locks regardless of the file system's
+	 * preferences for normalizing and case folding.  This will
+	 * prevent us deadlocking trying to grab the same wide lock
+	 * twice if the two names happen to be case-insensitive
+	 * matches.
+	 */
+	if (flag & ZRENAMING)
+		cmpflags = 0;
+	else
+		cmpflags = zfsvfs->z_norm;
+
+	/*
+	 * Wait until there are no locks on this name.
+	 *
+	 * Don't grab the lock if it is already held. However, cannot
+	 * have both ZSHARED and ZHAVELOCK together.
+	 */
+	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
+	if (!(flag & ZHAVELOCK))
+		rw_enter(&dzp->z_name_lock, RW_READER);
+
+	mutex_enter(&dzp->z_lock);
+	for (;;) {
+		if (dzp->z_unlinked && !(flag & ZXATTR)) {
+			mutex_exit(&dzp->z_lock);
+			if (!(flag & ZHAVELOCK))
+				rw_exit(&dzp->z_name_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
+			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
+			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
+				break;
+		}
+		if (error != 0) {
+			mutex_exit(&dzp->z_lock);
+			if (!(flag & ZHAVELOCK))
+				rw_exit(&dzp->z_name_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		if (dl == NULL)	{
+			/*
+			 * Allocate a new dirlock and add it to the list.
+			 */
+			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
+			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
+			dl->dl_name = name;
+			dl->dl_sharecnt = 0;
+			dl->dl_namelock = 0;
+			dl->dl_namesize = 0;
+			dl->dl_dzp = dzp;
+			dl->dl_next = dzp->z_dirlocks;
+			dzp->z_dirlocks = dl;
+			break;
+		}
+		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
+			break;
+		cv_wait(&dl->dl_cv, &dzp->z_lock);
+	}
+
+	/*
+	 * If the z_name_lock was NOT held for this dirlock record it.
+	 */
+	if (flag & ZHAVELOCK)
+		dl->dl_namelock = 1;
+
+	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
+		/*
+		 * We're the second shared reference to dl.  Make a copy of
+		 * dl_name in case the first thread goes away before we do.
+		 * Note that we initialize the new name before storing its
+		 * pointer into dl_name, because the first thread may load
+		 * dl->dl_name at any time.  It'll either see the old value,
+		 * which belongs to it, or the new shared copy; either is OK.
+		 */
+		dl->dl_namesize = strlen(dl->dl_name) + 1;
+		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
+		bcopy(dl->dl_name, name, dl->dl_namesize);
+		dl->dl_name = name;
+	}
+
+	mutex_exit(&dzp->z_lock);
+
+	/*
+	 * We have a dirlock on the name.  (Note that it is the dirlock,
+	 * not the dzp's z_lock, that protects the name in the zap object.)
+	 * See if there's an object by this name; if so, put a hold on it.
+	 */
+	if (flag & ZXATTR) {
+		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
+		    sizeof (zoid));
+		if (error == 0)
+			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
+	} else {
+		error = zfs_match_find(zfsvfs, dzp, name, mt,
+		    update, direntflags, realpnp, &zoid);
+	}
+	if (error) {
+		if (error != ENOENT || (flag & ZEXISTS)) {
+			zfs_dirent_unlock(dl);
+			return (error);
+		}
+	} else {
+		if (flag & ZNEW) {
+			zfs_dirent_unlock(dl);
+			return (SET_ERROR(EEXIST));
+		}
+		error = zfs_zget(zfsvfs, zoid, zpp);
+		if (error) {
+			zfs_dirent_unlock(dl);
+			return (error);
+		}
+	}
+
+	*dlpp = dl;
+
+	return (0);
+}
+
+/*
+ * Unlock this directory entry and wake anyone who was waiting for it.
+ */
+void
+zfs_dirent_unlock(zfs_dirlock_t *dl)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfs_dirlock_t **prev_dl, *cur_dl;
+
+	mutex_enter(&dzp->z_lock);
+
+	if (!dl->dl_namelock)
+		rw_exit(&dzp->z_name_lock);
+
+	if (dl->dl_sharecnt > 1) {
+		dl->dl_sharecnt--;
+		mutex_exit(&dzp->z_lock);
+		return;
+	}
+	prev_dl = &dzp->z_dirlocks;
+	while ((cur_dl = *prev_dl) != dl)
+		prev_dl = &cur_dl->dl_next;
+	*prev_dl = dl->dl_next;
+	cv_broadcast(&dl->dl_cv);
+	mutex_exit(&dzp->z_lock);
+
+	if (dl->dl_namesize != 0)
+		kmem_free(dl->dl_name, dl->dl_namesize);
+	cv_destroy(&dl->dl_cv);
+	kmem_free(dl, sizeof (*dl));
+}
+
+/*
+ * Look up an entry in a directory.
+ *
+ * NOTE: '.' and '..' are handled as special cases because
+ *	no directory entries are actually stored for them.  If this is
+ *	the root of a filesystem, then '.zfs' is also treated as a
+ *	special pseudo-directory.
+ */
+int
+zfs_dirlook(znode_t *dzp, char *name, znode_t **zpp, int flags,
+    int *deflg, struct componentname *rpnp)
+{
+	zfs_dirlock_t *dl;
+	znode_t *zp;
+	struct vnode *vp;
+	int error = 0;
+	uint64_t parent;
+
+	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
+		*zpp = dzp;
+		zhold(*zpp);
+	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
+		zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+
+		/*
+		 * If we are a snapshot mounted under .zfs, return
+		 * the inode pointer for the snapshot directory.
+		 */
+		if ((error = sa_lookup(dzp->z_sa_hdl,
+		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+			return (error);
+
+		if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
+			error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
+			    "snapshot", &vp, 0, kcred, NULL, NULL);
+			if (error == 0)
+				*zpp = VTOZ(vp);
+			return (error);
+		}
+		rw_enter(&dzp->z_parent_lock, RW_READER);
+		error = zfs_zget(zfsvfs, parent, &zp);
+		if (error == 0)
+			*zpp = zp;
+		rw_exit(&dzp->z_parent_lock);
+	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
+		vp = zfsctl_root(dzp);
+		if (vp != NULL)
+			*zpp = VTOZ(vp);
+		else
+			error = ENOENT;
+	} else {
+		int zf;
+
+		zf = ZEXISTS | ZSHARED;
+		if (flags & FIGNORECASE)
+			zf |= ZCILOOK;
+
+		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
+		if (error == 0) {
+			*zpp = zp;
+			zfs_dirent_unlock(dl);
+			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
+		}
+		rpnp = NULL;
+	}
+
+	if ((flags & FIGNORECASE) && rpnp && !error)
+		(void) strlcpy(rpnp->cn_nameptr, name, rpnp->cn_namelen);
+
+	return (error);
+}
+
+/*
+ * unlinked Set (formerly known as the "delete queue") Error Handling
+ *
+ * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
+ * don't specify the name of the entry that we will be manipulating.  We
+ * also fib and say that we won't be adding any new entries to the
+ * unlinked set, even though we might (this is to lower the minimum file
+ * size that can be deleted in a full filesystem).  So on the small
+ * chance that the nlink list is using a fat zap (ie. has more than
+ * 2000 entries), we *may* not pre-read a block that's needed.
+ * Therefore it is remotely possible for some of the assertions
+ * regarding the unlinked set below to fail due to i/o error.  On a
+ * nondebug system, this will result in the space being leaked.
+ */
+void
+zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+
+	ASSERT(zp->z_unlinked);
+	ASSERT(ZTOI(zp)->i_nlink == 0);
+
+	VERIFY3U(0, ==,
+	    zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
+
+}
+
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+static void
+zfs_unlinked_drain_task(void *arg)
+{
+	zfsvfs_t *zfsvfs = arg;
+	zap_cursor_t	zc;
+	zap_attribute_t zap;
+	dmu_object_info_t doi;
+	znode_t		*zp;
+	int		error;
+
+	ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
+
+	/*
+	 * Iterate over the contents of the unlinked set.
+	 */
+	for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
+	    zap_cursor_retrieve(&zc, &zap) == 0 &&
+	    zfsvfs->z_drain_state == ZFS_DRAIN_RUNNING;
+	    zap_cursor_advance(&zc)) {
+
+		/*
+		 * See what kind of object we have in list
+		 */
+
+		error = dmu_object_info(zfsvfs->z_os,
+		    zap.za_first_integer, &doi);
+		if (error != 0)
+			continue;
+
+		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+		/*
+		 * We need to re-mark these list entries for deletion,
+		 * so we pull them back into core and set zp->z_unlinked.
+		 */
+		error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
+
+		/*
+		 * We may pick up znodes that are already marked for deletion.
+		 * This could happen during the purge of an extended attribute
+		 * directory.  All we need to do is skip over them, since they
+		 * are already in the system marked z_unlinked.
+		 */
+		if (error != 0)
+			continue;
+
+		zp->z_unlinked = B_TRUE;
+
+		/*
+		 * zrele() decrements the znode's ref count and may cause
+		 * it to be synchronously freed. We interrupt freeing
+		 * of this znode by checking the return value of
+		 * dmu_objset_zfs_unmounting() in dmu_free_long_range()
+		 * when an unmount is requested.
+		 */
+		zrele(zp);
+		ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+	}
+	zap_cursor_fini(&zc);
+
+	mutex_enter(&zfsvfs->z_drain_lock);
+	zfsvfs->z_drain_state = ZFS_DRAIN_SHUTDOWN;
+	cv_broadcast(&zfsvfs->z_drain_cv);
+	mutex_exit(&zfsvfs->z_drain_lock);
+}
+
+/*
+ * Sets z_draining then tries to dispatch async unlinked drain.
+ * If that fails executes synchronous unlinked drain.
+ */
+void
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
+{
+	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+	mutex_enter(&zfsvfs->z_drain_lock);
+	ASSERT(zfsvfs->z_drain_state == ZFS_DRAIN_SHUTDOWN);
+	zfsvfs->z_drain_state = ZFS_DRAIN_RUNNING;
+	mutex_exit(&zfsvfs->z_drain_lock);
+
+	if (taskq_dispatch(
+	    dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
+	    zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP) == 0) {
+		zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
+		zfs_unlinked_drain_task(zfsvfs);
+	}
+}
+
+/*
+ * Wait for the unlinked drain taskq task to stop. This will interrupt the
+ * unlinked set processing if it is in progress.
+ */
+void
+zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
+{
+	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
+
+	mutex_enter(&zfsvfs->z_drain_lock);
+	while (zfsvfs->z_drain_state != ZFS_DRAIN_SHUTDOWN) {
+		zfsvfs->z_drain_state = ZFS_DRAIN_SHUTDOWN_REQ;
+		cv_wait(&zfsvfs->z_drain_cv, &zfsvfs->z_drain_lock);
+	}
+	mutex_exit(&zfsvfs->z_drain_lock);
+}
+
+/*
+ * Delete the entire contents of a directory.  Return a count
+ * of the number of entries that could not be deleted. If we encounter
+ * an error, return a count of at least one so that the directory stays
+ * in the unlinked set.
+ *
+ * NOTE: this function assumes that the directory is inactive,
+ *	so there is no need to lock its entries before deletion.
+ *	Also, it assumes the directory contents is *only* regular
+ *	files.
+ */
+static int
+zfs_purgedir(znode_t *dzp)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	znode_t		*xzp;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zfs_dirlock_t	dl;
+	int skipped = 0;
+	int error;
+
+	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
+	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
+	    zap_cursor_advance(&zc)) {
+		error = zfs_zget(zfsvfs,
+		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
+		if (error) {
+			skipped += 1;
+			continue;
+		}
+
+		ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
+		    S_ISLNK(ZTOI(xzp)->i_mode));
+
+		tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+		/* Is this really needed ? */
+		zfs_sa_upgrade_txholds(tx, xzp);
+		dmu_tx_mark_netfree(tx);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			zfs_zrele_async(xzp);
+			skipped += 1;
+			continue;
+		}
+		bzero(&dl, sizeof (dl));
+		dl.dl_dzp = dzp;
+		dl.dl_name = zap.za_name;
+
+		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
+		if (error)
+			skipped += 1;
+		dmu_tx_commit(tx);
+
+		zfs_zrele_async(xzp);
+	}
+	zap_cursor_fini(&zc);
+	if (error != ENOENT)
+		skipped += 1;
+	return (skipped);
+}
+
+void
+zfs_rmnode(znode_t *zp)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	objset_t	*os = zfsvfs->z_os;
+	znode_t		*xzp = NULL;
+	dmu_tx_t	*tx;
+	uint64_t	acl_obj;
+	uint64_t	xattr_obj;
+	int		error;
+
+	/*
+	 * If this is an attribute directory, purge its contents.
+	 */
+	if (S_ISDIR(zp->z_mode) && (zp->z_pflags & ZFS_XATTR)) {
+		if (zfs_purgedir(zp) != 0) {
+			/*
+			 * Not enough space to delete some xattrs.
+			 * Leave it in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+
+			return;
+		}
+	}
+
+	/*
+	 * Free up all the data in the file.  We don't do this for directories
+	 * because we need truncate and remove to be in the same tx, like in
+	 * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
+	 * an inconsistent truncated zap object in the delete queue.  Note a
+	 * truncated file is harmless since it only contains user data.
+	 */
+	if (S_ISREG(zp->z_mode)) {
+		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
+		if (error) {
+			/*
+			 * Not enough space or we were interrupted by unmount.
+			 * Leave the file in the unlinked set.
+			 */
+			zfs_znode_dmu_fini(zp);
+			return;
+		}
+	}
+
+	/*
+	 * If the file has extended attributes, we're going to unlink
+	 * the xattr dir.
+	 */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT(error == 0);
+	}
+
+	acl_obj = zfs_external_acl(zp);
+
+	/*
+	 * Set up the final transaction.
+	 */
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	if (xzp) {
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+	}
+	if (acl_obj)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		/*
+		 * Not enough space to delete the file.  Leave it in the
+		 * unlinked set, leaking it until the fs is remounted (at
+		 * which point we'll call zfs_unlinked_drain() to process it).
+		 */
+		dmu_tx_abort(tx);
+		zfs_znode_dmu_fini(zp);
+		goto out;
+	}
+
+	if (xzp) {
+		ASSERT(error == 0);
+		mutex_enter(&xzp->z_lock);
+		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
+		xzp->z_links = 0;	/* no more links to it */
+		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+		    &xzp->z_links, sizeof (xzp->z_links), tx));
+		mutex_exit(&xzp->z_lock);
+		zfs_unlinked_add(xzp, tx);
+	}
+
+	mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	/*
+	 * Remove this znode from the unlinked set.  If a has rollback has
+	 * occurred while a file is open and unlinked.  Then when the file
+	 * is closed post rollback it will not exist in the rolled back
+	 * version of the unlinked object.
+	 */
+	error = zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
+	    zp->z_id, tx);
+	VERIFY(error == 0 || error == ENOENT);
+
+	uint64_t count;
+	if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
+		cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
+	}
+
+	mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
+
+	zfs_znode_delete(zp, tx);
+
+	dmu_tx_commit(tx);
+out:
+	if (xzp)
+		zfs_zrele_async(xzp);
+}
+
+static uint64_t
+zfs_dirent(znode_t *zp, uint64_t mode)
+{
+	uint64_t de = zp->z_id;
+
+	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
+		de |= IFTODT(mode) << 60;
+	return (de);
+}
+
+/*
+ * Link zp into dl.  Can fail in the following cases :
+ * - if zp has been unlinked.
+ * - if the number of entries with the same hash (aka. colliding entries)
+ *    exceed the capacity of a leaf-block of fatzap and splitting of the
+ *    leaf-block does not help.
+ */
+int
+zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	uint64_t value;
+	int zp_is_dir = S_ISDIR(zp->z_mode);
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
+	int error;
+
+	mutex_enter(&zp->z_lock);
+
+	if (!(flag & ZRENAMING)) {
+		if (zp->z_unlinked) {	/* no new links to unlinked zp */
+			ASSERT(!(flag & (ZNEW | ZEXISTS)));
+			mutex_exit(&zp->z_lock);
+			return (SET_ERROR(ENOENT));
+		}
+		zp->z_links++;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+		    NULL, &zp->z_links, sizeof (zp->z_links));
+	}
+
+	value = zfs_dirent(zp, zp->z_mode);
+	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
+	    &value, tx);
+
+	/*
+	 * zap_add could fail to add the entry if it exceeds the capacity of the
+	 * leaf-block and zap_leaf_split() failed to help.
+	 * The caller of this routine is responsible for failing the transaction
+	 * which will rollback the SA updates done above.
+	 */
+	if (error != 0) {
+		mutex_exit(&zp->z_lock);
+		return (error);
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+	    &dzp->z_id, sizeof (dzp->z_id));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (!(flag & ZNEW)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+		    ctime);
+	}
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
+	mutex_exit(&zp->z_lock);
+
+	mutex_enter(&dzp->z_lock);
+	dzp->z_size++;
+	if (zp_is_dir)
+		dzp->z_links++;
+	count = 0;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+	mutex_exit(&dzp->z_lock);
+
+	return (0);
+}
+
+/*
+ * The match type in the code for this function should conform to:
+ *
+ * ------------------------------------------------------------------------
+ * fs type  | z_norm      | lookup type | match type
+ * ---------|-------------|-------------|----------------------------------
+ * CS !norm | 0           |           0 | 0 (exact)
+ * CS  norm | formX       |           0 | MT_NORMALIZE
+ * CI !norm | upper       |   !ZCIEXACT | MT_NORMALIZE
+ * CI !norm | upper       |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CI  norm | upper|formX |   !ZCIEXACT | MT_NORMALIZE
+ * CI  norm | upper|formX |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper       |     ZCILOOK | MT_NORMALIZE
+ * CM  norm | upper|formX |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM  norm | upper|formX |     ZCILOOK | MT_NORMALIZE
+ *
+ * Abbreviations:
+ *    CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
+ *    upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
+ *    formX = unicode normalization form set on fs creation
+ */
+static int
+zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
+    int flag)
+{
+	int error;
+
+	if (ZTOZSB(zp)->z_norm) {
+		matchtype_t mt = MT_NORMALIZE;
+
+		if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
+		    (flag & ZCIEXACT)) ||
+		    (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
+		    !(flag & ZCILOOK))) {
+			mt |= MT_MATCH_CASE;
+		}
+
+		error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
+		    dl->dl_name, mt, tx);
+	} else {
+		error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
+		    tx);
+	}
+
+	return (error);
+}
+
+/*
+ * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
+ * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
+ * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
+ * If it's non-NULL, we use it to indicate whether the znode needs deletion,
+ * and it's the caller's job to do it.
+ */
+int
+zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
+    boolean_t *unlinkedp)
+{
+	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+	int zp_is_dir = S_ISDIR(zp->z_mode);
+	boolean_t unlinked = B_FALSE;
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
+	int error;
+
+	if (!(flag & ZRENAMING)) {
+		mutex_enter(&zp->z_lock);
+
+		if (zp_is_dir && !zfs_dirempty(zp)) {
+			mutex_exit(&zp->z_lock);
+			return (SET_ERROR(ENOTEMPTY));
+		}
+
+		/*
+		 * If we get here, we are going to try to remove the object.
+		 * First try removing the name from the directory; if that
+		 * fails, return the error.
+		 */
+		error = zfs_dropname(dl, zp, dzp, tx, flag);
+		if (error != 0) {
+			mutex_exit(&zp->z_lock);
+			return (error);
+		}
+
+		if (zp->z_links <= zp_is_dir) {
+			zfs_panic_recover("zfs: link count on %lu is %u, "
+			    "should be at least %u", zp->z_id,
+			    (int)zp->z_links, zp_is_dir + 1);
+			zp->z_links = zp_is_dir + 1;
+		}
+		if (--zp->z_links == zp_is_dir) {
+			zp->z_unlinked = B_TRUE;
+			zp->z_links = 0;
+			unlinked = B_TRUE;
+		} else {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+			    NULL, &ctime, sizeof (ctime));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+			    ctime);
+		}
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+		    NULL, &zp->z_links, sizeof (zp->z_links));
+		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		count = 0;
+		ASSERT(error == 0);
+		mutex_exit(&zp->z_lock);
+	} else {
+		error = zfs_dropname(dl, zp, dzp, tx, flag);
+		if (error != 0)
+			return (error);
+	}
+
+	mutex_enter(&dzp->z_lock);
+	dzp->z_size--;		/* one dirent removed */
+	if (zp_is_dir)
+		dzp->z_links--;	/* ".." link from zp */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+	    NULL, &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+	    NULL, ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+	    NULL, mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+	mutex_exit(&dzp->z_lock);
+
+	if (unlinkedp != NULL)
+		*unlinkedp = unlinked;
+	else if (unlinked)
+		zfs_unlinked_add(zp, tx);
+
+	return (0);
+}
+
+/*
+ * Indicate whether the directory is empty.  Works with or without z_lock
+ * held, but can only be consider a hint in the latter case.  Returns true
+ * if only "." and ".." remain and there's no work in progress.
+ *
+ * The internal ZAP size, rather than zp->z_size, needs to be checked since
+ * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
+ */
+boolean_t
+zfs_dirempty(znode_t *dzp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+	uint64_t count;
+	int error;
+
+	if (dzp->z_dirlocks != NULL)
+		return (B_FALSE);
+
+	error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
+	if (error != 0 || count != 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+int
+zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	znode_t *xzp;
+	dmu_tx_t *tx;
+	int error;
+	zfs_acl_ids_t acl_ids;
+	boolean_t fuid_dirtied;
+#ifdef DEBUG
+	uint64_t parent;
+#endif
+
+	*xzpp = NULL;
+
+	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
+		return (error);
+
+	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
+	    &acl_ids)) != 0)
+		return (error);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
+		zfs_acl_ids_free(&acl_ids);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		return (error);
+	}
+	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+#ifdef DEBUG
+	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent));
+	ASSERT(error == 0 && parent == zp->z_id);
+#endif
+
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
+	    sizeof (xzp->z_id), tx));
+
+	if (!zp->z_unlinked)
+		(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
+		    xzp, "", NULL, acl_ids.z_fuidp, vap);
+
+	zfs_acl_ids_free(&acl_ids);
+	dmu_tx_commit(tx);
+
+#ifdef __APPLE__
+	/*
+	 * OS X - attach the vnode _after_ committing the transaction
+	 */
+	zfs_znode_getvnode(xzp, zfsvfs);
+#endif
+
+	*xzpp = xzp;
+
+	return (0);
+}
+
+/*
+ * Return a znode for the extended attribute directory for zp.
+ * ** If the directory does not already exist, it is created **
+ *
+ *	IN:	zp	- znode to obtain attribute directory from
+ *		cr	- credentials of caller
+ *		flags	- flags from the VOP_LOOKUP call
+ *
+ *	OUT:	xipp	- pointer to extended attribute znode
+ *
+ *	RETURN:	0 on success
+ *		error number on failure
+ */
+int
+zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	znode_t		*xzp;
+	zfs_dirlock_t	*dl;
+	vattr_t		va;
+	int		error;
+top:
+	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
+	if (error)
+		return (error);
+
+	if (xzp != NULL) {
+		*xzpp = xzp;
+		zfs_dirent_unlock(dl);
+		return (0);
+	}
+
+	if (!(flags & CREATE_XATTR_DIR)) {
+		zfs_dirent_unlock(dl);
+		return (SET_ERROR(ENOENT));
+	}
+
+	if (zfs_is_readonly(zfsvfs)) {
+		zfs_dirent_unlock(dl);
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * The ability to 'create' files in an attribute
+	 * directory comes from the write_xattr permission on the base file.
+	 *
+	 * The ability to 'search' an attribute directory requires
+	 * read_xattr permission on the base file.
+	 *
+	 * Once in a directory the ability to read/write attributes
+	 * is controlled by the permissions on the attribute file.
+	 */
+	va.va_mask = ATTR_TYPE | ATTR_MODE | ATTR_UID | ATTR_GID;
+	va.va_type = VDIR;
+	va.va_mode = S_IFDIR | S_ISVTX | 0777;
+	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
+
+	error = zfs_make_xattrdir(zp, &va, xzpp, cr);
+	zfs_dirent_unlock(dl);
+
+	if (error == ERESTART) {
+		/* NB: we already did dmu_tx_wait() if necessary */
+		goto top;
+	}
+
+	return (error);
+}
+
+/*
+ * Decide whether it is okay to remove within a sticky directory.
+ *
+ * In sticky directories, write access is not sufficient;
+ * you can remove entries from a directory only if:
+ *
+ *	you own the directory,
+ *	you own the entry,
+ *	you have write access to the entry,
+ *	or you are privileged (checked in secpolicy...).
+ *
+ * The function returns 0 if remove access is granted.
+ */
+int
+zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
+{
+	uid_t		uid;
+	uid_t		downer;
+	uid_t		fowner;
+	zfsvfs_t	*zfsvfs = ZTOZSB(zdp);
+
+	if (zfsvfs->z_replay)
+		return (0);
+
+	if ((zdp->z_mode & S_ISVTX) == 0)
+		return (0);
+
+	downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
+	fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+
+	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
+	    (vnode_isreg(ZTOV(zp)) &&
+	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
+		return (0);
+	else
+		return (secpolicy_vnode_remove(ZTOV(zp), cr));
+}
diff --git a/module/os/macos/zfs/zfs_file_os.c b/module/os/macos/zfs/zfs_file_os.c
new file mode 100644
index 0000000000..46ddcf033e
--- /dev/null
+++ b/module/os/macos/zfs/zfs_file_os.c
@@ -0,0 +1,405 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfs_file.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/zfs_ioctl.h>
+
+#define	FILE_FD_NOTUSED -1
+
+/*
+ * Open file
+ *
+ * path - fully qualified path to file
+ * flags - file attributes O_READ / O_WRITE / O_EXCL
+ * fpp - pointer to return file pointer
+ *
+ * Returns 0 on success underlying error on failure.
+ */
+int
+zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
+{
+	struct vnode *vp = NULL;
+	vfs_context_t vctx;
+	int error;
+
+	if (!(flags & O_CREAT) && (flags & O_WRONLY))
+		flags |= O_EXCL;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	error = vnode_open(path, flags, mode, 0, &vp, vctx);
+	(void) vfs_context_rele(vctx);
+	if (error == 0 &&
+	    vp != NULL) {
+		zfs_file_t *zf;
+		zf = (zfs_file_t *)kmem_zalloc(sizeof (zfs_file_t), KM_SLEEP);
+		zf->f_vnode = vp;
+		zf->f_fd = FILE_FD_NOTUSED;
+		*fpp = zf;
+	}
+
+	/* Optional, implemented O_APPEND: set offset to file size. */
+	VERIFY0(flags & O_APPEND);
+
+	return (error);
+}
+
+void
+zfs_file_close(zfs_file_t *fp)
+{
+	vfs_context_t vctx;
+	vctx = vfs_context_create((vfs_context_t)0);
+	vnode_close(fp->f_vnode, fp->f_writes ? FWRITE : 0, vctx);
+	(void) vfs_context_rele(vctx);
+
+	kmem_free(fp, sizeof (zfs_file_t));
+}
+
+static int
+zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count,
+    loff_t *off, ssize_t *resid)
+{
+	int error;
+	ssize_t local_resid = count;
+
+	/* If we came with a 'fd' use it, as it can handle pipes. */
+	if (fp->f_fd == FILE_FD_NOTUSED)
+		error = zfs_vn_rdwr(UIO_WRITE, fp->f_vnode, (caddr_t)buf, count,
+		    *off, UIO_SYSSPACE, 0, RLIM64_INFINITY,
+		    kcred, &local_resid);
+	else
+		error = spl_vn_rdwr(UIO_WRITE, fp, (caddr_t)buf, count,
+		    *off, UIO_SYSSPACE, 0, RLIM64_INFINITY,
+		    kcred, &local_resid);
+
+	if (error != 0)
+		return (SET_ERROR(error));
+
+	fp->f_writes = 1;
+
+	if (resid != NULL)
+		*resid = local_resid;
+	else if (local_resid != 0)
+		return (SET_ERROR(EIO));
+
+	*off += count - local_resid;
+
+	return (0);
+}
+
+/*
+ * Stateful write - use os internal file pointer to determine where to
+ * write and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * resid -  pointer to count of unwritten bytes  (if short write)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_offset;
+	ssize_t rc;
+
+	rc = zfs_file_write_impl(fp, buf, count, &off, resid);
+	if (rc == 0)
+		fp->f_offset = off;
+
+	return (SET_ERROR(rc));
+}
+
+/*
+ * Stateless write - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to write to
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to write to (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	return (zfs_file_write_impl(fp, buf, count, &off, resid));
+}
+
+static ssize_t
+zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off,
+    ssize_t *resid)
+{
+	int error;
+	ssize_t local_resid = count;
+
+	/* If we have realvp, it's faster to call its spl_vn_rdwr */
+	if (fp->f_fd == FILE_FD_NOTUSED)
+		error = zfs_vn_rdwr(UIO_READ, fp->f_vnode, buf, count,
+		    *off, UIO_SYSSPACE, 0, RLIM64_INFINITY,
+		    kcred, &local_resid);
+	else
+		error = spl_vn_rdwr(UIO_READ, fp, buf, count,
+		    *off, UIO_SYSSPACE, 0, RLIM64_INFINITY,
+		    kcred, &local_resid);
+
+	if (error)
+		return (SET_ERROR(error));
+
+	*off += count - local_resid;
+	if (resid != NULL)
+		*resid = local_resid;
+
+	return (SET_ERROR(0));
+}
+
+/*
+ * Stateful read - use os internal file pointer to determine where to
+ * read and update on successful completion.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to read
+ * resid -  pointer to count of unread bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
+{
+	loff_t off = fp->f_offset;
+	int rc;
+
+	rc = zfs_file_read_impl(fp, buf, count, &off, resid);
+	if (rc == 0)
+		fp->f_offset = off;
+	return (rc);
+}
+
+/*
+ * Stateless read - os internal file pointer is not updated.
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * buf - buffer to write
+ * count - # of bytes to write
+ * off - file offset to read from (only valid for seekable types)
+ * resid -  pointer to count of unwritten bytes (if short read)
+ *
+ * Returns 0 on success errno on failure.
+ */
+int
+zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
+    ssize_t *resid)
+{
+	return (zfs_file_read_impl(fp, buf, count, &off, resid));
+}
+
+/*
+ * lseek - set / get file pointer
+ *
+ * fp -  pointer to file (pipe, socket, etc) to read from
+ * offp - value to seek to, returns current value plus passed offset
+ * whence - see man pages for standard lseek whence values
+ *
+ * Returns 0 on success errno on failure (ESPIPE for non seekable types)
+ */
+int
+zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
+{
+	if (*offp < 0 || *offp > MAXOFFSET_T)
+		return (EINVAL);
+
+	switch (whence) {
+		case SEEK_SET:
+			fp->f_offset = *offp;
+			break;
+		case SEEK_CUR:
+			fp->f_offset += *offp;
+			*offp = fp->f_offset;
+			break;
+		case SEEK_END:
+			/* Implement this if eventually needed: get filesize */
+			VERIFY0(whence == SEEK_END);
+			break;
+	}
+
+	return (0);
+}
+
+/*
+ * Get file attributes
+ *
+ * filp - file pointer
+ * zfattr - pointer to file attr structure
+ *
+ * Currently only used for fetching size and file mode.
+ *
+ * Returns 0 on success or error code of underlying getattr call on failure.
+ */
+int
+zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr)
+{
+	vfs_context_t vctx;
+	int rc;
+	vattr_t vap;
+
+	VATTR_INIT(&vap);
+	VATTR_WANTED(&vap, va_size);
+	VATTR_WANTED(&vap, va_mode);
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	rc = vnode_getattr(filp->f_vnode, &vap, vctx);
+	(void) vfs_context_rele(vctx);
+
+	if (rc)
+		return (rc);
+
+	zfattr->zfa_size = vap.va_size;
+	zfattr->zfa_mode = vap.va_mode;
+
+	return (0);
+}
+
+/*
+ * Sync file to disk
+ *
+ * filp - file pointer
+ * flags - O_SYNC and or O_DSYNC
+ *
+ * Returns 0 on success or error code of underlying sync call on failure.
+ */
+int
+zfs_file_fsync(zfs_file_t *filp, int flags)
+{
+	vfs_context_t vctx;
+	int rc;
+
+	vctx = vfs_context_create((vfs_context_t)0);
+	rc = VNOP_FSYNC(filp->f_vnode, (flags == FSYNC), vctx);
+	(void) vfs_context_rele(vctx);
+	return (rc);
+}
+
+/*
+ * fallocate - allocate or free space on disk
+ *
+ * fp - file pointer
+ * mode (non-standard options for hole punching etc)
+ * offset - offset to start allocating or freeing from
+ * len - length to free / allocate
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
+{
+	return (0);
+}
+
+/*
+ * Request current file pointer offset
+ *
+ * fp - pointer to file
+ *
+ * Returns current file offset.
+ */
+loff_t
+zfs_file_off(zfs_file_t *fp)
+{
+	return (fp->f_offset);
+}
+
+/*
+ * Request file pointer private data
+ *
+ * fp - pointer to file
+ *
+ * Returns pointer to file private data.
+ */
+extern kmutex_t zfsdev_state_lock;
+dev_t zfsdev_get_dev(void);
+
+void *
+zfs_file_private(zfs_file_t *fp)
+{
+	dev_t dev;
+	void *zs;
+
+	dev = zfsdev_get_dev();
+	printf("%s: fetching dev x%x\n", __func__, dev);
+	if (dev == 0)
+		return (NULL);
+
+	mutex_enter(&zfsdev_state_lock);
+	zs = zfsdev_get_state(minor(dev), ZST_ALL);
+	mutex_exit(&zfsdev_state_lock);
+	printf("%s: searching minor %d %p\n", __func__, minor(dev), zs);
+
+	return (zs);
+}
+
+/*
+ * unlink file
+ *
+ * path - fully qualified file path
+ *
+ * Returns 0 on success.
+ *
+ * OPTIONAL
+ */
+int
+zfs_file_unlink(const char *path)
+{
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get reference to file pointer
+ *
+ * fd - input file descriptor
+ * fpp - pointer to file pointer
+ *
+ * Returns 0 on success EBADF on failure.
+ */
+int
+zfs_file_get(int fd, zfs_file_t **fpp)
+{
+	*fpp = getf(fd);
+	if (*fpp == NULL)
+		return (EBADF);
+	return (0);
+}
+
+/*
+ * Drop reference to file pointer
+ *
+ * fd - input file descriptor
+ */
+void
+zfs_file_put(int fd)
+{
+	releasef(fd);
+}
diff --git a/module/os/macos/zfs/zfs_fuid_os.c b/module/os/macos/zfs/zfs_fuid_os.c
new file mode 100644
index 0000000000..ebd09abd65
--- /dev/null
+++ b/module/os/macos/zfs/zfs_fuid_os.c
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dmu.h>
+#include <sys/avl.h>
+#include <sys/zap.h>
+#include <sys/refcount.h>
+#include <sys/nvpair.h>
+#ifdef _KERNEL
+#include <sys/sid.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_znode.h>
+#endif
+#include <sys/zfs_fuid.h>
+
+uint64_t
+zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
+    cred_t *cr, zfs_fuid_info_t **fuidp)
+{
+	uid_t		id;
+
+	VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
+
+	id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
+
+	if (IS_EPHEMERAL(id))
+		return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
+
+	return ((uint64_t)id);
+}
diff --git a/module/os/macos/zfs/zfs_ioctl_os.c b/module/os/macos/zfs/zfs_ioctl_os.c
new file mode 100644
index 0000000000..57ab905bd2
--- /dev/null
+++ b/module/os/macos/zfs/zfs_ioctl_os.c
@@ -0,0 +1,392 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zap.h>
+#include <sys/spa.h>
+#include <sys/nvpair.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zvol.h>
+#include <sys/fm/util.h>
+#include <sys/dsl_crypt.h>
+
+#include <sys/zfs_ioctl_impl.h>
+#include <sys/zfs_ioctl_compat.h>
+#include <sys/zvol_os.h>
+#include <sys/kstat_osx.h>
+
+int zfs_major			= 0;
+int zfs_bmajor			= 0;
+static void *zfs_devnode 	= NULL;
+#define	ZFS_MAJOR		-24
+
+boolean_t
+zfs_vfs_held(zfsvfs_t *zfsvfs)
+{
+	return (zfsvfs->z_vfs != NULL);
+}
+
+int
+zfs_vfs_ref(zfsvfs_t **zfvp)
+{
+	int error = 0;
+
+	if (*zfvp == NULL || (*zfvp)->z_vfs == NULL)
+		return (SET_ERROR(ESRCH));
+
+	error = vfs_busy((*zfvp)->z_vfs, LK_NOWAIT);
+	if (error != 0) {
+		*zfvp = NULL;
+		error = SET_ERROR(ESRCH);
+	}
+	return (error);
+}
+
+void
+zfs_vfs_rele(zfsvfs_t *zfsvfs)
+{
+	vfs_unbusy(zfsvfs->z_vfs);
+}
+
+static uint_t zfsdev_private_tsd;
+
+static int
+zfsdev_state_init(dev_t dev)
+{
+	zfsdev_state_t *zs, *zsprev = NULL;
+	minor_t minor;
+	boolean_t newzs = B_FALSE;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	minor = minor(dev);
+	if (minor == 0)
+		return (SET_ERROR(ENXIO));
+
+	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
+		if (zs->zs_minor == -1)
+			break;
+		zsprev = zs;
+	}
+
+	if (!zs) {
+		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
+		newzs = B_TRUE;
+	}
+
+	/* Store this dev_t in tsd, so zfs_get_private() can retrieve it */
+	tsd_set(zfsdev_private_tsd, (void *)(uintptr_t)dev);
+
+	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
+	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
+
+	/*
+	 * In order to provide for lock-free concurrent read access
+	 * to the minor list in zfsdev_get_state_impl(), new entries
+	 * must be completely written before linking them into the
+	 * list whereas existing entries are already linked; the last
+	 * operation must be updating zs_minor (from -1 to the new
+	 * value).
+	 */
+	if (newzs) {
+		zs->zs_minor = minor;
+		zsprev->zs_next = zs;
+	} else {
+		zs->zs_minor = minor;
+	}
+
+	return (0);
+}
+
+dev_t
+zfsdev_get_dev(void)
+{
+	return ((dev_t)tsd_get(zfsdev_private_tsd));
+}
+
+static int
+zfsdev_state_destroy(dev_t dev)
+{
+	zfsdev_state_t *zs;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	tsd_set(zfsdev_private_tsd, NULL);
+
+	zs = zfsdev_get_state(minor(dev), ZST_ALL);
+
+	if (!zs) {
+		printf("%s: no cleanup for minor x%x\n", __func__,
+		    minor(dev));
+		return (0);
+	}
+
+	ASSERT(zs != NULL);
+	if (zs->zs_minor != -1) {
+		zs->zs_minor = -1;
+		zfs_onexit_destroy(zs->zs_onexit);
+		zfs_zevent_destroy(zs->zs_zevent);
+	}
+	return (0);
+}
+
+static int
+zfsdev_open(dev_t dev, int flags, int devtype, struct proc *p)
+{
+	int error;
+
+	mutex_enter(&zfsdev_state_lock);
+	if (zfsdev_get_state(minor(dev), ZST_ALL)) {
+		mutex_exit(&zfsdev_state_lock);
+		return (0);
+	}
+	error = zfsdev_state_init(dev);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (-error);
+}
+
+static int
+zfsdev_release(dev_t dev, int flags, int devtype, struct proc *p)
+{
+	int error;
+
+	mutex_enter(&zfsdev_state_lock);
+	error = zfsdev_state_destroy(dev);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (-error);
+}
+
+static int
+zfsdev_ioctl(dev_t dev, ulong_t cmd, caddr_t arg,  __unused int xflag,
+    struct proc *p)
+{
+	uint_t len, vecnum;
+	zfs_iocparm_t *zit;
+	zfs_cmd_t *zc;
+	int error, rc;
+	user_addr_t uaddr;
+
+	/* Translate XNU ioctl to enum table: */
+	len = IOCPARM_LEN(cmd);
+	vecnum = cmd - _IOWR('Z', ZFS_IOC_FIRST, zfs_iocparm_t);
+	zit = (void *)arg;
+	uaddr = (user_addr_t)zit->zfs_cmd;
+
+	if (len != sizeof (zfs_iocparm_t)) {
+		/*
+		 * printf("len %d vecnum: %d sizeof (zfs_cmd_t) %lu\n",
+		 *  len, vecnum, sizeof (zfs_cmd_t));
+		 */
+		/*
+		 * We can get plenty raw ioctl()s here, for exaple open() will
+		 * cause spec_open() to issue DKIOCGETTHROTTLEMASK.
+		 */
+		return (EINVAL);
+	}
+
+	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+
+	if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) {
+		error = SET_ERROR(EFAULT);
+		goto out;
+	}
+
+	error = zfsdev_ioctl_common(vecnum, zc, 0);
+
+	rc = copyout(zc, uaddr, sizeof (*zc));
+
+	if (error == 0 && rc != 0)
+		error = -SET_ERROR(EFAULT);
+
+	/*
+	 * OSX must return(0) or XNU doesn't copyout(). Save the real
+	 * rc to userland
+	 */
+	zit->zfs_ioc_error = error;
+	error = 0;
+
+out:
+	kmem_free(zc, sizeof (zfs_cmd_t));
+	return (error);
+
+}
+
+/* for spa_iokit_dataset_proxy_create */
+#include <sys/ZFSDataset.h>
+#include <sys/ZFSDatasetScheme.h>
+
+static int
+zfs_ioc_osx_proxy_dataset(zfs_cmd_t *zc)
+{
+	int error;
+	const char *osname;
+
+	/* XXX Get osname */
+	osname = zc->zc_name;
+
+	/* Create new virtual disk, and return /dev/disk name */
+	error = zfs_osx_proxy_create(osname);
+
+	if (!error)
+		error = zfs_osx_proxy_get_bsdname(osname,
+		    zc->zc_value, sizeof (zc->zc_value));
+	if (error)
+		printf("%s: Created virtual disk '%s' for '%s'\n", __func__,
+		    zc->zc_value, osname);
+
+	return (error);
+}
+
+void
+zfs_ioctl_init_os(void)
+{
+	/* APPLE Specific ioctls */
+	zfs_ioctl_register_pool(ZFS_IOC_PROXY_DATASET,
+	    zfs_ioc_osx_proxy_dataset, zfs_secpolicy_config,
+	    B_FALSE, POOL_CHECK_NONE);
+}
+
+/* ioctl handler for block device. Relay to zvol */
+static int
+zfsdev_bioctl(dev_t dev, ulong_t cmd, caddr_t data,
+    __unused int flag, struct proc *p)
+{
+	return (zvol_os_ioctl(dev, cmd, data, 1, NULL, NULL));
+}
+
+static struct bdevsw zfs_bdevsw = {
+	.d_open		= zvol_os_open,
+	.d_close	= zvol_os_close,
+	.d_strategy	= zvol_os_strategy,
+	.d_ioctl	= zfsdev_bioctl, /* block ioctl handler */
+	.d_dump		= eno_dump,
+	.d_psize	= zvol_os_get_volume_blocksize,
+	.d_type		= D_DISK,
+};
+
+static struct cdevsw zfs_cdevsw = {
+	.d_open		= zfsdev_open,
+	.d_close	= zfsdev_release,
+	.d_read		= zvol_os_read,
+	.d_write	= zvol_os_write,
+	.d_ioctl	= zfsdev_ioctl,
+	.d_stop		= eno_stop,
+	.d_reset	= eno_reset,
+	.d_ttys		= NULL,
+	.d_select	= eno_select,
+	.d_mmap		= eno_mmap,
+	.d_strategy	= eno_strat,
+	.d_reserved_1	= eno_getc,
+	.d_reserved_2	= eno_putc,
+	.d_type		= D_DISK
+};
+
+/* Callback to create a unique minor for each open */
+static int
+zfs_devfs_clone(__unused dev_t dev, int action)
+{
+	static minor_t minorx;
+
+	if (action == DEVFS_CLONE_ALLOC) {
+		mutex_enter(&zfsdev_state_lock);
+		minorx = zfsdev_minor_alloc();
+		mutex_exit(&zfsdev_state_lock);
+		return (minorx);
+	}
+	return (-1);
+}
+
+int
+zfsdev_attach(void)
+{
+	dev_t dev;
+
+	zfs_bmajor = bdevsw_add(-1, &zfs_bdevsw);
+	zfs_major = cdevsw_add_with_bdev(-1, &zfs_cdevsw, zfs_bmajor);
+
+	if (zfs_major < 0) {
+		printf("ZFS: zfs_attach() failed to allocate a major number\n");
+		return (-1);
+	}
+
+	dev = makedev(zfs_major, 0); /* Get the device number */
+	zfs_devnode = devfs_make_node_clone(dev, DEVFS_CHAR, UID_ROOT,
+	    GID_WHEEL, 0666, zfs_devfs_clone, "zfs", 0);
+	if (!zfs_devnode) {
+		printf("ZFS: devfs_make_node() failed\n");
+		return (-1);
+	}
+
+	wrap_avl_init();
+	wrap_unicode_init();
+	wrap_nvpair_init();
+	wrap_zcommon_init();
+	wrap_icp_init();
+	wrap_lua_init();
+
+	tsd_create(&zfsdev_private_tsd, NULL);
+
+	kstat_osx_init();
+	return (0);
+}
+
+void
+zfsdev_detach(void)
+{
+	kstat_osx_fini();
+
+	tsd_destroy(&zfsdev_private_tsd);
+
+	wrap_lua_fini();
+	wrap_icp_fini();
+	wrap_zcommon_fini();
+	wrap_nvpair_fini();
+	wrap_unicode_fini();
+	wrap_avl_fini();
+
+	if (zfs_devnode) {
+		devfs_remove(zfs_devnode);
+		zfs_devnode = NULL;
+	}
+	if (zfs_major) {
+		(void) cdevsw_remove(zfs_major, &zfs_cdevsw);
+		zfs_major = 0;
+	}
+}
diff --git a/module/os/macos/zfs/zfs_kstat_osx.c b/module/os/macos/zfs/zfs_kstat_osx.c
new file mode 100644
index 0000000000..ea37f32963
--- /dev/null
+++ b/module/os/macos/zfs/zfs_kstat_osx.c
@@ -0,0 +1,883 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2014, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/zio_compress.h>
+#include <sys/zfs_context.h>
+#include <sys/arc.h>
+#include <sys/refcount.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/dsl_pool.h>
+#ifdef _KERNEL
+#include <sys/vmsystm.h>
+#endif
+#include <sys/callb.h>
+#include <sys/kstat.h>
+#include <sys/kstat_osx.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/spa.h>
+#include <sys/zap_impl.h>
+#include <sys/zil.h>
+
+/*
+ * In Solaris the tunable are set via /etc/system. Until we have a load
+ * time configuration, we add them to writable kstat tunables.
+ *
+ * This table is more or less populated from IllumOS mdb zfs_params sources
+ * https://github.com/illumos/illumos-gate/blob/master/
+ * usr/src/cmd/mdb/common/modules/zfs/zfs.c#L336-L392
+ *
+ */
+
+
+
+osx_kstat_t osx_kstat = {
+	{ "spa_version",			KSTAT_DATA_UINT64 },
+	{ "zpl_version",			KSTAT_DATA_UINT64 },
+
+	{ "active_vnodes",			KSTAT_DATA_UINT64 },
+	{ "vnop_debug",				KSTAT_DATA_UINT64 },
+	{ "reclaim_nodes",			KSTAT_DATA_UINT64 },
+	{ "ignore_negatives",			KSTAT_DATA_UINT64 },
+	{ "ignore_positives",			KSTAT_DATA_UINT64 },
+	{ "create_negatives",			KSTAT_DATA_UINT64 },
+	{ "force_formd_normalized",		KSTAT_DATA_UINT64 },
+	{ "skip_unlinked_drain",		KSTAT_DATA_UINT64 },
+	{ "use_system_sync",			KSTAT_DATA_UINT64 },
+
+	{ "zfs_arc_max",			KSTAT_DATA_UINT64 },
+	{ "zfs_arc_min",			KSTAT_DATA_UINT64 },
+	{ "zfs_arc_meta_limit",			KSTAT_DATA_UINT64 },
+	{ "zfs_arc_meta_min",			KSTAT_DATA_UINT64 },
+	{ "zfs_arc_grow_retry",			KSTAT_DATA_UINT64 },
+	{ "zfs_arc_shrink_shift",		KSTAT_DATA_UINT64 },
+	{ "zfs_arc_p_min_shift",		KSTAT_DATA_UINT64 },
+	{ "zfs_arc_average_blocksize",		KSTAT_DATA_UINT64 },
+
+	{ "l2arc_write_max",			KSTAT_DATA_UINT64 },
+	{ "l2arc_write_boost",			KSTAT_DATA_UINT64 },
+	{ "l2arc_headroom",			KSTAT_DATA_UINT64 },
+	{ "l2arc_headroom_boost",		KSTAT_DATA_UINT64 },
+	{ "l2arc_feed_secs",			KSTAT_DATA_UINT64 },
+	{ "l2arc_feed_min_ms",			KSTAT_DATA_UINT64 },
+
+	{ "max_active",				KSTAT_DATA_UINT64 },
+	{ "sync_read_min_active",		KSTAT_DATA_UINT64 },
+	{ "sync_read_max_active",		KSTAT_DATA_UINT64 },
+	{ "sync_write_min_active",		KSTAT_DATA_UINT64 },
+	{ "sync_write_max_active",		KSTAT_DATA_UINT64 },
+	{ "async_read_min_active",		KSTAT_DATA_UINT64 },
+	{ "async_read_max_active",		KSTAT_DATA_UINT64 },
+	{ "async_write_min_active",		KSTAT_DATA_UINT64 },
+	{ "async_write_max_active",		KSTAT_DATA_UINT64 },
+	{ "scrub_min_active",			KSTAT_DATA_UINT64 },
+	{ "scrub_max_active",			KSTAT_DATA_UINT64 },
+	{ "async_write_min_dirty_pct",		KSTAT_DATA_INT64  },
+	{ "async_write_max_dirty_pct",		KSTAT_DATA_INT64  },
+	{ "aggregation_limit",			KSTAT_DATA_INT64  },
+	{ "read_gap_limit",			KSTAT_DATA_INT64  },
+	{ "write_gap_limit",			KSTAT_DATA_INT64  },
+
+	{"arc_lotsfree_percent",		KSTAT_DATA_INT64  },
+	{"zfs_dirty_data_max",			KSTAT_DATA_INT64  },
+	{"zfs_delay_max_ns",			KSTAT_DATA_INT64  },
+	{"zfs_delay_min_dirty_percent",		KSTAT_DATA_INT64  },
+	{"zfs_delay_scale",			KSTAT_DATA_INT64  },
+	{"spa_asize_inflation",			KSTAT_DATA_INT64  },
+	{"zfs_prefetch_disable",		KSTAT_DATA_INT64  },
+	{"zfetch_max_streams",			KSTAT_DATA_INT64  },
+	{"zfetch_min_sec_reap",			KSTAT_DATA_INT64  },
+	{"zfetch_array_rd_sz",			KSTAT_DATA_INT64  },
+	{"zfs_default_bs",			KSTAT_DATA_INT64  },
+	{"zfs_default_ibs",			KSTAT_DATA_INT64  },
+	{"metaslab_aliquot",			KSTAT_DATA_INT64  },
+	{"spa_max_replication_override",	KSTAT_DATA_INT64  },
+	{"spa_mode_global",			KSTAT_DATA_INT64  },
+	{"zfs_flags",				KSTAT_DATA_INT64  },
+	{"zfs_txg_timeout",			KSTAT_DATA_INT64  },
+	{"zfs_vdev_cache_max",			KSTAT_DATA_INT64  },
+	{"zfs_vdev_cache_size",			KSTAT_DATA_INT64  },
+	{"zfs_vdev_cache_bshift",		KSTAT_DATA_INT64  },
+	{"vdev_mirror_shift",			KSTAT_DATA_INT64  },
+	{"zfs_scrub_limit",			KSTAT_DATA_INT64  },
+	{"zfs_no_scrub_io",			KSTAT_DATA_INT64  },
+	{"zfs_no_scrub_prefetch",		KSTAT_DATA_INT64  },
+	{"fzap_default_block_shift",		KSTAT_DATA_INT64  },
+	{"zfs_immediate_write_sz",		KSTAT_DATA_INT64  },
+	{"zfs_read_chunk_size",			KSTAT_DATA_INT64  },
+	{"zfs_nocacheflush",			KSTAT_DATA_INT64  },
+	{"zil_replay_disable",			KSTAT_DATA_INT64  },
+	{"metaslab_df_alloc_threshold",		KSTAT_DATA_INT64  },
+	{"metaslab_df_free_pct",		KSTAT_DATA_INT64  },
+	{"zio_injection_enabled",		KSTAT_DATA_INT64  },
+	{"zvol_immediate_write_sz",		KSTAT_DATA_INT64  },
+
+	{ "l2arc_noprefetch",			KSTAT_DATA_INT64  },
+	{ "l2arc_feed_again",			KSTAT_DATA_INT64  },
+	{ "l2arc_norw",				KSTAT_DATA_INT64  },
+
+	{"zfs_recover",				KSTAT_DATA_INT64  },
+
+	{"zfs_free_bpobj_enabled",		KSTAT_DATA_INT64  },
+
+	{"zfs_send_corrupt_data",		KSTAT_DATA_UINT64  },
+	{"zfs_send_queue_length",		KSTAT_DATA_UINT64  },
+	{"zfs_recv_queue_length",		KSTAT_DATA_UINT64  },
+
+	{"zvol_inhibit_dev",			KSTAT_DATA_UINT64  },
+	{"zfs_send_set_freerecords_bit",	KSTAT_DATA_UINT64  },
+
+	{"zfs_write_implies_delete_child",	KSTAT_DATA_UINT64  },
+	{"zfs_send_holes_without_birth_time",	KSTAT_DATA_UINT64  },
+
+	{"dbuf_cache_max_bytes",		KSTAT_DATA_UINT64  },
+
+	{"zfs_vdev_queue_depth_pct",		KSTAT_DATA_UINT64  },
+	{"zio_dva_throttle_enabled",		KSTAT_DATA_UINT64  },
+
+	{"zfs_lua_max_instrlimit",		KSTAT_DATA_UINT64  },
+	{"zfs_lua_max_memlimit",		KSTAT_DATA_UINT64  },
+
+	{"zfs_trim_extent_bytes_max",		KSTAT_DATA_UINT64  },
+	{"zfs_trim_extent_bytes_min",		KSTAT_DATA_UINT64  },
+	{"zfs_trim_metaslab_skip",		KSTAT_DATA_UINT64  },
+	{"zfs_trim_txg_batch",			KSTAT_DATA_UINT64  },
+	{"zfs_trim_queue_limit",		KSTAT_DATA_UINT64  },
+
+	{"zfs_send_unmodified_spill_blocks",	KSTAT_DATA_UINT64  },
+	{"zfs_special_class_metadata_reserve_pct", KSTAT_DATA_UINT64  },
+
+	{"zfs_vdev_raidz_impl",			KSTAT_DATA_STRING  },
+	{"icp_gcm_impl",			KSTAT_DATA_STRING  },
+	{"icp_aes_impl",			KSTAT_DATA_STRING  },
+	{"zfs_fletcher_4_impl",			KSTAT_DATA_STRING  },
+
+	{"zfs_expire_snapshot",			KSTAT_DATA_UINT64  },
+	{"zfs_admin_snapshot",			KSTAT_DATA_UINT64  },
+	{"zfs_auto_snapshot",			KSTAT_DATA_UINT64  },
+
+	{"zfs_spa_discard_memory_limit",	KSTAT_DATA_UINT64  },
+	{"zfs_async_block_max_blocks",		KSTAT_DATA_UINT64  },
+	{"zfs_initialize_chunk_size",		KSTAT_DATA_UINT64  },
+	{"zfs_scan_suspend_progress",		KSTAT_DATA_UINT64  },
+	{"zfs_removal_suspend_progress",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_max_entries",		KSTAT_DATA_UINT64  },
+
+	{"zfs_allow_redacted_dataset_mount",	KSTAT_DATA_UINT64  },
+	{"zfs_checksum_events_per_second",	KSTAT_DATA_UINT64  },
+	{"zfs_commit_timeout_pct",		KSTAT_DATA_UINT64  },
+	{"zfs_compressed_arc_enabled",		KSTAT_DATA_UINT64  },
+	{"zfs_condense_indirect_commit_entry_delay_ms", KSTAT_DATA_UINT64  },
+	{"zfs_condense_min_mapping_bytes",	KSTAT_DATA_UINT64  },
+	{"zfs_deadman_checktime_ms",		KSTAT_DATA_UINT64  },
+	{"zfs_deadman_failmode",		KSTAT_DATA_STRING  },
+	{"zfs_deadman_synctime_ms",		KSTAT_DATA_UINT64  },
+	{"zfs_deadman_ziotime_ms",		KSTAT_DATA_UINT64  },
+	{"zfs_disable_ivset_guid_check",	KSTAT_DATA_UINT64  },
+	{"zfs_initialize_value",		KSTAT_DATA_UINT64  },
+	{"zfs_keep_log_spacemaps_at_export",	KSTAT_DATA_UINT64  },
+	{"l2arc_rebuild_blocks_min_l2size",	KSTAT_DATA_UINT64  },
+	{"l2arc_rebuild_enabled",		KSTAT_DATA_UINT64  },
+	{"l2arc_trim_ahead",			KSTAT_DATA_UINT64  },
+	{"zfs_livelist_condense_new_alloc",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_condense_sync_cancel",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_condense_sync_pause",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_condense_zthr_cancel",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_condense_zthr_pause",	KSTAT_DATA_UINT64  },
+	{"zfs_livelist_min_percent_shared",	KSTAT_DATA_UINT64  },
+	{"zfs_max_dataset_nesting",		KSTAT_DATA_UINT64  },
+	{"zfs_max_missing_tvds",		KSTAT_DATA_UINT64  },
+	{"metaslab_debug_load",			KSTAT_DATA_UINT64  },
+	{"metaslab_force_ganging",		KSTAT_DATA_UINT64  },
+	{"zfs_multihost_fail_intervals",	KSTAT_DATA_UINT64  },
+	{"zfs_multihost_import_intervals",	KSTAT_DATA_UINT64  },
+	{"zfs_multihost_interval",		KSTAT_DATA_UINT64  },
+	{"zfs_override_estimate_recordsize",	KSTAT_DATA_UINT64  },
+	{"zfs_remove_max_segment",		KSTAT_DATA_UINT64  },
+	{"zfs_resilver_min_time_ms",		KSTAT_DATA_UINT64  },
+	{"zfs_scan_legacy",			KSTAT_DATA_UINT64  },
+	{"zfs_scan_vdev_limit",			KSTAT_DATA_UINT64  },
+	{"zfs_slow_io_events_per_second",	KSTAT_DATA_UINT64  },
+	{"spa_load_verify_data",		KSTAT_DATA_UINT64  },
+	{"spa_load_verify_metadata",		KSTAT_DATA_UINT64  },
+	{"zfs_unlink_suspend_progress",		KSTAT_DATA_UINT64  },
+	{"zfs_vdev_min_ms_count",		KSTAT_DATA_UINT64  },
+	{"vdev_validate_skip",			KSTAT_DATA_UINT64  },
+	{"zfs_zevent_len_max",			KSTAT_DATA_UINT64  },
+	{"zio_slow_io_ms",			KSTAT_DATA_UINT64  },
+
+};
+
+
+extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
+extern int zfs_vdev_raidz_impl_set(const char *val);
+extern int zfs_vdev_raidz_impl_get(char *buffer, int max);
+extern int icp_gcm_impl_set(const char *val);
+extern int icp_gcm_impl_get(char *buffer, int max);
+extern int icp_aes_impl_set(const char *val);
+extern int icp_aes_impl_get(char *buffer, int max);
+extern int zfs_fletcher_4_impl_set(const char *val);
+extern int zfs_fletcher_4_impl_get(char *buffer, int max);
+
+static char vdev_raidz_string[80] = { 0 };
+static char icp_gcm_string[80] = { 0 };
+static char icp_aes_string[80] = { 0 };
+static char zfs_fletcher_4_string[80] = { 0 };
+
+static kstat_t		*osx_kstat_ksp;
+
+#if !defined(__OPTIMIZE__)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+extern kstat_t *arc_ksp;
+
+static int osx_kstat_update(kstat_t *ksp, int rw)
+{
+	osx_kstat_t *ks = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE) {
+
+		/* Darwin */
+
+		debug_vnop_osx_printf = ks->darwin_debug.value.ui64;
+		if (ks->darwin_debug.value.ui64 == 9119)
+			panic("ZFS: User requested panic\n");
+		zfs_vnop_ignore_negatives =
+		    ks->darwin_ignore_negatives.value.ui64;
+		zfs_vnop_ignore_positives =
+		    ks->darwin_ignore_positives.value.ui64;
+		zfs_vnop_create_negatives =
+		    ks->darwin_create_negatives.value.ui64;
+		zfs_vnop_force_formd_normalized_output =
+		    ks->darwin_force_formd_normalized.value.ui64;
+		zfs_vnop_skip_unlinked_drain =
+		    ks->darwin_skip_unlinked_drain.value.ui64;
+		zfs_vfs_sync_paranoia =
+		    ks->darwin_use_system_sync.value.ui64;
+
+		/* ARC */
+		/* Upstream has this static, but we can find another way ... */
+		/* arc_kstat_update(ksp, rw); */
+		if (arc_ksp != NULL && arc_ksp->ks_update != NULL)
+			arc_ksp->ks_update(ksp, rw);
+		arc_kstat_update_osx(ksp, rw);
+
+		/* L2ARC */
+		l2arc_write_max = ks->l2arc_write_max.value.ui64;
+		l2arc_write_boost = ks->l2arc_write_boost.value.ui64;
+		l2arc_headroom = ks->l2arc_headroom.value.ui64;
+		l2arc_headroom_boost = ks->l2arc_headroom_boost.value.ui64;
+		l2arc_feed_secs = ks->l2arc_feed_secs.value.ui64;
+		l2arc_feed_min_ms = ks->l2arc_feed_min_ms.value.ui64;
+
+		l2arc_noprefetch = ks->l2arc_noprefetch.value.i64;
+		l2arc_feed_again = ks->l2arc_feed_again.value.i64;
+		l2arc_norw = ks->l2arc_norw.value.i64;
+
+		/* vdev_queue */
+
+		zfs_vdev_max_active =
+		    ks->zfs_vdev_max_active.value.ui64;
+		zfs_vdev_sync_read_min_active =
+		    ks->zfs_vdev_sync_read_min_active.value.ui64;
+		zfs_vdev_sync_read_max_active =
+		    ks->zfs_vdev_sync_read_max_active.value.ui64;
+		zfs_vdev_sync_write_min_active =
+		    ks->zfs_vdev_sync_write_min_active.value.ui64;
+		zfs_vdev_sync_write_max_active =
+		    ks->zfs_vdev_sync_write_max_active.value.ui64;
+		zfs_vdev_async_read_min_active =
+		    ks->zfs_vdev_async_read_min_active.value.ui64;
+		zfs_vdev_async_read_max_active =
+		    ks->zfs_vdev_async_read_max_active.value.ui64;
+		zfs_vdev_async_write_min_active =
+		    ks->zfs_vdev_async_write_min_active.value.ui64;
+		zfs_vdev_async_write_max_active =
+		    ks->zfs_vdev_async_write_max_active.value.ui64;
+		zfs_vdev_scrub_min_active =
+		    ks->zfs_vdev_scrub_min_active.value.ui64;
+		zfs_vdev_scrub_max_active =
+		    ks->zfs_vdev_scrub_max_active.value.ui64;
+		zfs_vdev_async_write_active_min_dirty_percent =
+		    ks->zfs_vdev_async_write_active_min_dirty_percent.value.i64;
+		zfs_vdev_async_write_active_max_dirty_percent =
+		    ks->zfs_vdev_async_write_active_max_dirty_percent.value.i64;
+		zfs_vdev_aggregation_limit =
+		    ks->zfs_vdev_aggregation_limit.value.i64;
+		zfs_vdev_read_gap_limit =
+		    ks->zfs_vdev_read_gap_limit.value.i64;
+		zfs_vdev_write_gap_limit =
+		    ks->zfs_vdev_write_gap_limit.value.i64;
+
+		arc_lotsfree_percent =
+		    ks->arc_lotsfree_percent.value.i64;
+		zfs_dirty_data_max =
+		    ks->zfs_dirty_data_max.value.i64;
+		zfs_delay_max_ns =
+		    ks->zfs_delay_max_ns.value.i64;
+		zfs_delay_min_dirty_percent =
+		    ks->zfs_delay_min_dirty_percent.value.i64;
+		zfs_delay_scale =
+		    ks->zfs_delay_scale.value.i64;
+		spa_asize_inflation =
+		    ks->spa_asize_inflation.value.i64;
+		zfs_prefetch_disable =
+		    ks->zfs_prefetch_disable.value.i64;
+		zfetch_max_streams =
+		    ks->zfetch_max_streams.value.i64;
+		zfetch_min_sec_reap =
+		    ks->zfetch_min_sec_reap.value.i64;
+		zfetch_array_rd_sz =
+		    ks->zfetch_array_rd_sz.value.i64;
+		zfs_default_bs =
+		    ks->zfs_default_bs.value.i64;
+		zfs_default_ibs =
+		    ks->zfs_default_ibs.value.i64;
+		metaslab_aliquot =
+		    ks->metaslab_aliquot.value.i64;
+		spa_max_replication_override =
+		    ks->spa_max_replication_override.value.i64;
+		spa_mode_global =
+		    ks->spa_mode_global.value.i64;
+		zfs_flags =
+		    ks->zfs_flags.value.i64;
+		zfs_txg_timeout =
+		    ks->zfs_txg_timeout.value.i64;
+		zfs_vdev_cache_max =
+		    ks->zfs_vdev_cache_max.value.i64;
+		zfs_vdev_cache_size =
+		    ks->zfs_vdev_cache_size.value.i64;
+		zfs_no_scrub_io =
+		    ks->zfs_no_scrub_io.value.i64;
+		zfs_no_scrub_prefetch =
+		    ks->zfs_no_scrub_prefetch.value.i64;
+		fzap_default_block_shift =
+		    ks->fzap_default_block_shift.value.i64;
+		zfs_immediate_write_sz =
+		    ks->zfs_immediate_write_sz.value.i64;
+		zfs_read_chunk_size =
+		    ks->zfs_read_chunk_size.value.i64;
+		zfs_nocacheflush =
+		    ks->zfs_nocacheflush.value.i64;
+		zil_replay_disable =
+		    ks->zil_replay_disable.value.i64;
+		metaslab_df_alloc_threshold =
+		    ks->metaslab_df_alloc_threshold.value.i64;
+		metaslab_df_free_pct =
+		    ks->metaslab_df_free_pct.value.i64;
+		zio_injection_enabled =
+		    ks->zio_injection_enabled.value.i64;
+		zvol_immediate_write_sz =
+		    ks->zvol_immediate_write_sz.value.i64;
+
+		zfs_recover =
+		    ks->zfs_recover.value.i64;
+
+		zfs_free_bpobj_enabled	 =
+		    ks->zfs_free_bpobj_enabled.value.i64;
+
+		zfs_send_corrupt_data =
+		    ks->zfs_send_corrupt_data.value.ui64;
+		zfs_send_queue_length =
+		    ks->zfs_send_queue_length.value.ui64;
+		zfs_recv_queue_length =
+		    ks->zfs_recv_queue_length.value.ui64;
+
+		zvol_inhibit_dev =
+		    ks->zvol_inhibit_dev.value.ui64;
+		zfs_send_set_freerecords_bit =
+		    ks->zfs_send_set_freerecords_bit.value.ui64;
+
+		zfs_write_implies_delete_child =
+		    ks->zfs_write_implies_delete_child.value.ui64;
+		send_holes_without_birth_time =
+		    ks->zfs_send_holes_without_birth_time.value.ui64;
+
+		dbuf_cache_max_bytes =
+		    ks->dbuf_cache_max_bytes.value.ui64;
+
+		zfs_vdev_queue_depth_pct =
+		    ks->zfs_vdev_queue_depth_pct.value.ui64;
+
+		zio_dva_throttle_enabled =
+		    (boolean_t)ks->zio_dva_throttle_enabled.value.ui64;
+
+		zfs_lua_max_instrlimit =
+		    ks->zfs_lua_max_instrlimit.value.ui64;
+		zfs_lua_max_memlimit =
+		    ks->zfs_lua_max_memlimit.value.ui64;
+
+		zfs_trim_extent_bytes_max =
+		    ks->zfs_trim_extent_bytes_max.value.ui64;
+		zfs_trim_extent_bytes_min =
+		    ks->zfs_trim_extent_bytes_min.value.ui64;
+		zfs_trim_metaslab_skip =
+		    ks->zfs_trim_metaslab_skip.value.ui64;
+		zfs_trim_txg_batch =
+		    ks->zfs_trim_txg_batch.value.ui64;
+		zfs_trim_queue_limit =
+		    ks->zfs_trim_queue_limit.value.ui64;
+
+		zfs_send_unmodified_spill_blocks =
+		    ks->zfs_send_unmodified_spill_blocks.value.ui64;
+		zfs_special_class_metadata_reserve_pct =
+		    ks->zfs_special_class_metadata_reserve_pct.value.ui64;
+
+		// Check if string has changed (from KREAD), if so, update.
+#if 0
+		if (strcmp(vdev_raidz_string,
+		    ks->zfs_vdev_raidz_impl.value.string.addr.ptr) != 0)
+		zfs_vdev_raidz_impl_set(
+		    ks->zfs_vdev_raidz_impl.value.string.addr.ptr);
+
+		if (strcmp(icp_gcm_string,
+		    ks->icp_gcm_impl.value.string.addr.ptr) != 0)
+		icp_gcm_impl_set(ks->icp_gcm_impl.value.string.addr.ptr);
+
+		if (strcmp(icp_aes_string,
+		    ks->icp_aes_impl.value.string.addr.ptr) != 0)
+		icp_aes_impl_set(ks->icp_aes_impl.value.string.addr.ptr);
+
+		if (strcmp(zfs_fletcher_4_string,
+		    ks->zfs_fletcher_4_impl.value.string.addr.ptr) != 0)
+		zfs_fletcher_4_impl_set(
+		    ks->zfs_fletcher_4_impl.value.string.addr.ptr);
+#endif
+
+		zfs_expire_snapshot =
+		    ks->zfs_expire_snapshot.value.ui64;
+		zfs_admin_snapshot =
+		    ks->zfs_admin_snapshot.value.ui64;
+		zfs_auto_snapshot =
+		    ks->zfs_auto_snapshot.value.ui64;
+
+		zfs_spa_discard_memory_limit =
+		    ks->zfs_spa_discard_memory_limit.value.ui64;
+		zfs_async_block_max_blocks =
+		    ks->zfs_async_block_max_blocks.value.ui64;
+		zfs_initialize_chunk_size =
+		    ks->zfs_initialize_chunk_size.value.ui64;
+		zfs_scan_suspend_progress =
+		    ks->zfs_scan_suspend_progress.value.ui64;
+		zfs_removal_suspend_progress =
+		    ks->zfs_removal_suspend_progress.value.ui64;
+		zfs_livelist_max_entries =
+		    ks->zfs_livelist_max_entries.value.ui64;
+
+		zfs_allow_redacted_dataset_mount =
+		    ks->zfs_allow_redacted_dataset_mount.value.ui64;
+		zfs_checksum_events_per_second =
+		    ks->zfs_checksum_events_per_second.value.ui64;
+		zfs_commit_timeout_pct =
+		    ks->zfs_commit_timeout_pct.value.ui64;
+		zfs_compressed_arc_enabled =
+		    ks->zfs_compressed_arc_enabled.value.ui64;
+		zfs_condense_indirect_commit_entry_delay_ms =
+		    ks->zfs_condense_indirect_commit_entry_delay_ms.value.ui64;
+		zfs_condense_min_mapping_bytes =
+		    ks->zfs_condense_min_mapping_bytes.value.ui64;
+		zfs_deadman_checktime_ms =
+		    ks->zfs_deadman_checktime_ms.value.ui64;
+		// string zfs_deadman_failmode =
+		// ks->zfs_deadman_failmode.value.ui64;
+		zfs_deadman_synctime_ms =
+		    ks->zfs_deadman_synctime_ms.value.ui64;
+		zfs_deadman_ziotime_ms =
+		    ks->zfs_deadman_ziotime_ms.value.ui64;
+		zfs_disable_ivset_guid_check =
+		    ks->zfs_disable_ivset_guid_check.value.ui64;
+		zfs_initialize_value =
+		    ks->zfs_initialize_value.value.ui64;
+		zfs_keep_log_spacemaps_at_export =
+		    ks->zfs_keep_log_spacemaps_at_export.value.ui64;
+		l2arc_rebuild_blocks_min_l2size =
+		    ks->l2arc_rebuild_blocks_min_l2size.value.ui64;
+		l2arc_rebuild_enabled =
+		    ks->l2arc_rebuild_enabled.value.ui64;
+		l2arc_trim_ahead = ks->l2arc_trim_ahead.value.ui64;
+		zfs_livelist_condense_new_alloc =
+		    ks->zfs_livelist_condense_new_alloc.value.ui64;
+		zfs_livelist_condense_sync_cancel =
+		    ks->zfs_livelist_condense_sync_cancel.value.ui64;
+		zfs_livelist_condense_sync_pause =
+		    ks->zfs_livelist_condense_sync_pause.value.ui64;
+		zfs_livelist_condense_zthr_cancel =
+		    ks->zfs_livelist_condense_zthr_cancel.value.ui64;
+		zfs_livelist_condense_zthr_pause =
+		    ks->zfs_livelist_condense_zthr_pause.value.ui64;
+		zfs_livelist_min_percent_shared =
+		    ks->zfs_livelist_min_percent_shared.value.ui64;
+		zfs_max_dataset_nesting =
+		    ks->zfs_max_dataset_nesting.value.ui64;
+		zfs_max_missing_tvds =
+		    ks->zfs_max_missing_tvds.value.ui64;
+		metaslab_debug_load = ks->metaslab_debug_load.value.ui64;
+		metaslab_force_ganging =
+		    ks->metaslab_force_ganging.value.ui64;
+		zfs_multihost_fail_intervals =
+		    ks->zfs_multihost_fail_intervals.value.ui64;
+		zfs_multihost_import_intervals =
+		    ks->zfs_multihost_import_intervals.value.ui64;
+		zfs_multihost_interval =
+		    ks->zfs_multihost_interval.value.ui64;
+		zfs_override_estimate_recordsize =
+		    ks->zfs_override_estimate_recordsize.value.ui64;
+		zfs_remove_max_segment =
+		    ks->zfs_remove_max_segment.value.ui64;
+		zfs_resilver_min_time_ms =
+		    ks->zfs_resilver_min_time_ms.value.ui64;
+		zfs_scan_legacy = ks->zfs_scan_legacy.value.ui64;
+		zfs_scan_vdev_limit =
+		    ks->zfs_scan_vdev_limit.value.ui64;
+		zfs_slow_io_events_per_second =
+		    ks->zfs_slow_io_events_per_second.value.ui64;
+		spa_load_verify_data =
+		    ks->spa_load_verify_data.value.ui64;
+		spa_load_verify_metadata =
+		    ks->spa_load_verify_metadata.value.ui64;
+		zfs_unlink_suspend_progress =
+		    ks->zfs_unlink_suspend_progress.value.ui64;
+		zfs_vdev_min_ms_count = ks->zfs_vdev_min_ms_count.value.ui64;
+		vdev_validate_skip = ks->vdev_validate_skip.value.ui64;
+		zfs_zevent_len_max = ks->zfs_zevent_len_max.value.ui64;
+		zio_slow_io_ms = ks->zio_slow_io_ms.value.ui64;
+
+
+	} else {
+
+		/* kstat READ */
+		ks->spa_version.value.ui64 = SPA_VERSION;
+		ks->zpl_version.value.ui64 = ZPL_VERSION;
+
+		/* Darwin */
+		ks->darwin_active_vnodes.value.ui64 = vnop_num_vnodes;
+		ks->darwin_reclaim_nodes.value.ui64 = vnop_num_reclaims;
+		ks->darwin_debug.value.ui64 = debug_vnop_osx_printf;
+		ks->darwin_ignore_negatives.value.ui64 =
+		    zfs_vnop_ignore_negatives;
+		ks->darwin_ignore_positives.value.ui64 =
+		    zfs_vnop_ignore_positives;
+		ks->darwin_create_negatives.value.ui64 =
+		    zfs_vnop_create_negatives;
+		ks->darwin_force_formd_normalized.value.ui64 =
+		    zfs_vnop_force_formd_normalized_output;
+		ks->darwin_skip_unlinked_drain.value.ui64 =
+		    zfs_vnop_skip_unlinked_drain;
+		ks->darwin_use_system_sync.value.ui64 = zfs_vfs_sync_paranoia;
+
+		/* ARC */
+		if (arc_ksp != NULL && arc_ksp->ks_update != NULL)
+			arc_ksp->ks_update(ksp, rw);
+		arc_kstat_update_osx(ksp, rw);
+
+		/* L2ARC */
+		ks->l2arc_write_max.value.ui64 = l2arc_write_max;
+		ks->l2arc_write_boost.value.ui64 = l2arc_write_boost;
+		ks->l2arc_headroom.value.ui64 = l2arc_headroom;
+		ks->l2arc_headroom_boost.value.ui64 = l2arc_headroom_boost;
+		ks->l2arc_feed_secs.value.ui64 = l2arc_feed_secs;
+		ks->l2arc_feed_min_ms.value.ui64 = l2arc_feed_min_ms;
+
+		ks->l2arc_noprefetch.value.i64 = l2arc_noprefetch;
+		ks->l2arc_feed_again.value.i64 = l2arc_feed_again;
+		ks->l2arc_norw.value.i64 = l2arc_norw;
+
+		/* vdev_queue */
+		ks->zfs_vdev_max_active.value.ui64 =
+		    zfs_vdev_max_active;
+		ks->zfs_vdev_sync_read_min_active.value.ui64 =
+		    zfs_vdev_sync_read_min_active;
+		ks->zfs_vdev_sync_read_max_active.value.ui64 =
+		    zfs_vdev_sync_read_max_active;
+		ks->zfs_vdev_sync_write_min_active.value.ui64 =
+		    zfs_vdev_sync_write_min_active;
+		ks->zfs_vdev_sync_write_max_active.value.ui64 =
+		    zfs_vdev_sync_write_max_active;
+		ks->zfs_vdev_async_read_min_active.value.ui64 =
+		    zfs_vdev_async_read_min_active;
+		ks->zfs_vdev_async_read_max_active.value.ui64 =
+		    zfs_vdev_async_read_max_active;
+		ks->zfs_vdev_async_write_min_active.value.ui64 =
+		    zfs_vdev_async_write_min_active;
+		ks->zfs_vdev_async_write_max_active.value.ui64 =
+		    zfs_vdev_async_write_max_active;
+		ks->zfs_vdev_scrub_min_active.value.ui64 =
+		    zfs_vdev_scrub_min_active;
+		ks->zfs_vdev_scrub_max_active.value.ui64 =
+		    zfs_vdev_scrub_max_active;
+		ks->zfs_vdev_async_write_active_min_dirty_percent.value.i64 =
+		    zfs_vdev_async_write_active_min_dirty_percent;
+		ks->zfs_vdev_async_write_active_max_dirty_percent.value.i64 =
+		    zfs_vdev_async_write_active_max_dirty_percent;
+		ks->zfs_vdev_aggregation_limit.value.i64 =
+		    zfs_vdev_aggregation_limit;
+		ks->zfs_vdev_read_gap_limit.value.i64 =
+		    zfs_vdev_read_gap_limit;
+		ks->zfs_vdev_write_gap_limit.value.i64 =
+		    zfs_vdev_write_gap_limit;
+
+		ks->arc_lotsfree_percent.value.i64 =
+		    arc_lotsfree_percent;
+		ks->zfs_dirty_data_max.value.i64 =
+		    zfs_dirty_data_max;
+		ks->zfs_delay_max_ns.value.i64 =
+		    zfs_delay_max_ns;
+		ks->zfs_delay_min_dirty_percent.value.i64 =
+		    zfs_delay_min_dirty_percent;
+		ks->zfs_delay_scale.value.i64 =
+		    zfs_delay_scale;
+		ks->spa_asize_inflation.value.i64 =
+		    spa_asize_inflation;
+		ks->zfs_prefetch_disable.value.i64 =
+		    zfs_prefetch_disable;
+		ks->zfetch_max_streams.value.i64 =
+		    zfetch_max_streams;
+		ks->zfetch_min_sec_reap.value.i64 =
+		    zfetch_min_sec_reap;
+		ks->zfetch_array_rd_sz.value.i64 =
+		    zfetch_array_rd_sz;
+		ks->zfs_default_bs.value.i64 =
+		    zfs_default_bs;
+		ks->zfs_default_ibs.value.i64 =
+		    zfs_default_ibs;
+		ks->metaslab_aliquot.value.i64 =
+		    metaslab_aliquot;
+		ks->spa_max_replication_override.value.i64 =
+		    spa_max_replication_override;
+		ks->spa_mode_global.value.i64 =
+		    spa_mode_global;
+		ks->zfs_flags.value.i64 =
+		    zfs_flags;
+		ks->zfs_txg_timeout.value.i64 =
+		    zfs_txg_timeout;
+		ks->zfs_vdev_cache_max.value.i64 =
+		    zfs_vdev_cache_max;
+		ks->zfs_vdev_cache_size.value.i64 =
+		    zfs_vdev_cache_size;
+		ks->zfs_no_scrub_io.value.i64 =
+		    zfs_no_scrub_io;
+		ks->zfs_no_scrub_prefetch.value.i64 =
+		    zfs_no_scrub_prefetch;
+		ks->fzap_default_block_shift.value.i64 =
+		    fzap_default_block_shift;
+		ks->zfs_immediate_write_sz.value.i64 =
+		    zfs_immediate_write_sz;
+		ks->zfs_read_chunk_size.value.i64 =
+		    zfs_read_chunk_size;
+		ks->zfs_nocacheflush.value.i64 =
+		    zfs_nocacheflush;
+		ks->zil_replay_disable.value.i64 =
+		    zil_replay_disable;
+		ks->metaslab_df_alloc_threshold.value.i64 =
+		    metaslab_df_alloc_threshold;
+		ks->metaslab_df_free_pct.value.i64 =
+		    metaslab_df_free_pct;
+		ks->zio_injection_enabled.value.i64 =
+		    zio_injection_enabled;
+		ks->zvol_immediate_write_sz.value.i64 =
+		    zvol_immediate_write_sz;
+
+		ks->zfs_recover.value.i64 =
+		    zfs_recover;
+
+		ks->zfs_free_bpobj_enabled.value.i64 =
+		    zfs_free_bpobj_enabled;
+
+		ks->zfs_send_corrupt_data.value.ui64 =
+		    zfs_send_corrupt_data;
+		ks->zfs_send_queue_length.value.ui64 =
+		    zfs_send_queue_length;
+		ks->zfs_recv_queue_length.value.ui64 =
+		    zfs_recv_queue_length;
+
+		ks->zvol_inhibit_dev.value.ui64 =
+		    zvol_inhibit_dev;
+		ks->zfs_send_set_freerecords_bit.value.ui64 =
+		    zfs_send_set_freerecords_bit;
+
+		ks->zfs_write_implies_delete_child.value.ui64 =
+		    zfs_write_implies_delete_child;
+		ks->zfs_send_holes_without_birth_time.value.ui64 =
+		    send_holes_without_birth_time;
+
+		ks->dbuf_cache_max_bytes.value.ui64 = dbuf_cache_max_bytes;
+
+		ks->zfs_vdev_queue_depth_pct.value.ui64 =
+		    zfs_vdev_queue_depth_pct;
+		ks->zio_dva_throttle_enabled.value.ui64 =
+		    (uint64_t)zio_dva_throttle_enabled;
+
+		ks->zfs_lua_max_instrlimit.value.ui64 = zfs_lua_max_instrlimit;
+		ks->zfs_lua_max_memlimit.value.ui64 = zfs_lua_max_memlimit;
+
+		ks->zfs_trim_extent_bytes_max.value.ui64 =
+		    zfs_trim_extent_bytes_max;
+		ks->zfs_trim_extent_bytes_min.value.ui64 =
+		    zfs_trim_extent_bytes_min;
+		ks->zfs_trim_metaslab_skip.value.ui64 =
+		    zfs_trim_metaslab_skip;
+		ks->zfs_trim_txg_batch.value.ui64 =
+		    zfs_trim_txg_batch;
+		ks->zfs_trim_queue_limit.value.ui64 =
+		    zfs_trim_queue_limit;
+
+		ks->zfs_send_unmodified_spill_blocks.value.ui64 =
+		    zfs_send_unmodified_spill_blocks;
+		ks->zfs_special_class_metadata_reserve_pct.value.ui64 =
+		    zfs_special_class_metadata_reserve_pct;
+#if 0
+		zfs_vdev_raidz_impl_get(vdev_raidz_string,
+		    sizeof (vdev_raidz_string));
+		kstat_named_setstr(&ks->zfs_vdev_raidz_impl, vdev_raidz_string);
+
+		icp_gcm_impl_get(icp_gcm_string, sizeof (icp_gcm_string));
+		kstat_named_setstr(&ks->icp_gcm_impl, icp_gcm_string);
+
+		icp_aes_impl_get(icp_aes_string, sizeof (icp_aes_string));
+		kstat_named_setstr(&ks->icp_aes_impl, icp_aes_string);
+
+		zfs_fletcher_4_impl_get(zfs_fletcher_4_string,
+		    sizeof (zfs_fletcher_4_string));
+		kstat_named_setstr(&ks->zfs_fletcher_4_impl,
+		    zfs_fletcher_4_string);
+#endif
+
+		ks->zfs_expire_snapshot.value.ui64 = zfs_expire_snapshot;
+		ks->zfs_admin_snapshot.value.ui64 = zfs_admin_snapshot;
+		ks->zfs_auto_snapshot.value.ui64 = zfs_auto_snapshot;
+
+		ks->zfs_spa_discard_memory_limit.value.ui64 =
+		    zfs_spa_discard_memory_limit;
+		ks->zfs_async_block_max_blocks.value.ui64 =
+		    zfs_async_block_max_blocks;
+		ks->zfs_initialize_chunk_size.value.ui64 =
+		    zfs_initialize_chunk_size;
+		ks->zfs_scan_suspend_progress.value.ui64 =
+		    zfs_scan_suspend_progress;
+		ks->zfs_livelist_max_entries.value.ui64 =
+		    zfs_livelist_max_entries;
+
+		ks->zfs_allow_redacted_dataset_mount.value.ui64 =
+		    zfs_allow_redacted_dataset_mount;
+		ks->zfs_checksum_events_per_second.value.ui64 =
+		    zfs_checksum_events_per_second;
+		ks->zfs_commit_timeout_pct.value.ui64 = zfs_commit_timeout_pct;
+		ks->zfs_compressed_arc_enabled.value.ui64 =
+		    zfs_compressed_arc_enabled;
+		ks->zfs_condense_indirect_commit_entry_delay_ms.value.ui64 =
+		    zfs_condense_indirect_commit_entry_delay_ms;
+		ks->zfs_condense_min_mapping_bytes.value.ui64 =
+		    zfs_condense_min_mapping_bytes;
+		ks->zfs_deadman_checktime_ms.value.ui64 =
+		    zfs_deadman_checktime_ms;
+
+		kstat_named_setstr(&ks->zfs_deadman_failmode,
+		    zfs_deadman_failmode);
+
+		ks->zfs_deadman_synctime_ms.value.ui64 =
+		    zfs_deadman_synctime_ms;
+		ks->zfs_deadman_ziotime_ms.value.ui64 = zfs_deadman_ziotime_ms;
+		ks->zfs_disable_ivset_guid_check.value.ui64 =
+		    zfs_disable_ivset_guid_check;
+		ks->zfs_initialize_value.value.ui64 = zfs_initialize_value;
+		ks->zfs_keep_log_spacemaps_at_export.value.ui64 =
+		    zfs_keep_log_spacemaps_at_export;
+		ks->l2arc_rebuild_blocks_min_l2size.value.ui64 =
+		    l2arc_rebuild_blocks_min_l2size;
+		ks->l2arc_rebuild_enabled.value.ui64 = l2arc_rebuild_enabled;
+		ks->l2arc_trim_ahead.value.ui64 = l2arc_trim_ahead;
+		ks->zfs_livelist_condense_new_alloc.value.ui64 =
+		    zfs_livelist_condense_new_alloc;
+		ks->zfs_livelist_condense_sync_cancel.value.ui64 =
+		    zfs_livelist_condense_sync_cancel;
+		ks->zfs_livelist_condense_sync_pause.value.ui64 =
+		    zfs_livelist_condense_sync_pause;
+		ks->zfs_livelist_condense_zthr_cancel.value.ui64 =
+		    zfs_livelist_condense_zthr_cancel;
+		ks->zfs_livelist_condense_zthr_pause.value.ui64 =
+		    zfs_livelist_condense_zthr_pause;
+		ks->zfs_livelist_min_percent_shared.value.ui64 =
+		    zfs_livelist_min_percent_shared;
+		ks->zfs_max_dataset_nesting.value.ui64 =
+		    zfs_max_dataset_nesting;
+		ks->zfs_max_missing_tvds.value.ui64 = zfs_max_missing_tvds;
+		ks->metaslab_debug_load.value.ui64 = metaslab_debug_load;
+		ks->metaslab_force_ganging.value.ui64 = metaslab_force_ganging;
+		ks->zfs_multihost_fail_intervals.value.ui64 =
+		    zfs_multihost_fail_intervals;
+		ks->zfs_multihost_import_intervals.value.ui64 =
+		    zfs_multihost_import_intervals;
+		ks->zfs_multihost_interval.value.ui64 = zfs_multihost_interval;
+		ks->zfs_override_estimate_recordsize.value.ui64 =
+		    zfs_override_estimate_recordsize;
+		ks->zfs_remove_max_segment.value.ui64 = zfs_remove_max_segment;
+		ks->zfs_resilver_min_time_ms.value.ui64 =
+		    zfs_resilver_min_time_ms;
+		ks->zfs_scan_legacy.value.ui64 = zfs_scan_legacy;
+		ks->zfs_scan_vdev_limit.value.ui64 = zfs_scan_vdev_limit;
+		ks->zfs_slow_io_events_per_second.value.ui64 =
+		    zfs_slow_io_events_per_second;
+		ks->spa_load_verify_data.value.ui64 = spa_load_verify_data;
+		ks->spa_load_verify_metadata.value.ui64 =
+		    spa_load_verify_metadata;
+		ks->zfs_unlink_suspend_progress.value.ui64 =
+		    zfs_unlink_suspend_progress;
+		ks->zfs_vdev_min_ms_count.value.ui64 = zfs_vdev_min_ms_count;
+		ks->vdev_validate_skip.value.ui64 = vdev_validate_skip;
+		ks->zfs_zevent_len_max.value.ui64 = zfs_zevent_len_max;
+		ks->zio_slow_io_ms.value.ui64 = zio_slow_io_ms;
+	}
+
+	return (0);
+}
+
+
+
+int
+kstat_osx_init(void)
+{
+	osx_kstat_ksp = kstat_create("zfs", 0, "tunable", "darwin",
+	    KSTAT_TYPE_NAMED, sizeof (osx_kstat) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
+
+	if (osx_kstat_ksp != NULL) {
+		osx_kstat_ksp->ks_data = &osx_kstat;
+		osx_kstat_ksp->ks_update = osx_kstat_update;
+		kstat_install(osx_kstat_ksp);
+	}
+
+	return (0);
+}
+
+void
+kstat_osx_fini(void)
+{
+	if (osx_kstat_ksp != NULL) {
+		kstat_delete(osx_kstat_ksp);
+		osx_kstat_ksp = NULL;
+	}
+}
diff --git a/module/os/macos/zfs/zfs_osx.cpp b/module/os/macos/zfs/zfs_osx.cpp
new file mode 100644
index 0000000000..898ff897ec
--- /dev/null
+++ b/module/os/macos/zfs/zfs_osx.cpp
@@ -0,0 +1,310 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013-2020, Jorgen Lundman.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+#include <sys/dataset_kstats.h>
+#include <sys/zvol.h>
+
+#include <sys/zvolIO.h>
+#include <sys/ldi_osx.h>
+
+#include <sys/zfs_vnops.h>
+#include <sys/taskq.h>
+#include <sys/spa_impl.h>
+#include <sys/zfs_boot.h>
+
+#include <libkern/version.h>
+#include <libkern/sysctl.h>
+
+#include <zfs_gitrev.h>
+#include <zfs_config.h>
+
+// Define the superclass.
+#define	super IOService
+
+OSDefineMetaClassAndStructors(net_lundman_zfs_zvol, IOService)
+
+extern "C" {
+
+#include <sys/zfs_ioctl_impl.h>
+#include <sys/utsname.h>
+#include <string.h>
+
+extern SInt32 zfs_active_fs_count;
+
+#ifdef DEBUG
+#define	ZFS_DEBUG_STR	" (DEBUG mode)"
+#else
+#define	ZFS_DEBUG_STR	""
+#endif
+
+static char spl_gitrev[64] = ZFS_META_GITREV;
+
+SYSCTL_DECL(_zfs);
+SYSCTL_NODE(, OID_AUTO, zfs, CTLFLAG_RD, 0, "");
+SYSCTL_STRING(_zfs, OID_AUTO, kext_version,
+    CTLFLAG_RD | CTLFLAG_LOCKED,
+    spl_gitrev, 0, "ZFS KEXT Version");
+
+
+extern kern_return_t _start(kmod_info_t *ki, void *data);
+extern kern_return_t _stop(kmod_info_t *ki, void *data);
+
+__attribute__((visibility("default"))) KMOD_EXPLICIT_DECL(net.lundman.zfs,
+    "1.0.0", _start, _stop)
+kmod_start_func_t *_realmain = 0;
+kmod_stop_func_t  *_antimain = 0;
+int _kext_apple_cc = __APPLE_CC__;
+
+} // Extern "C"
+
+bool
+net_lundman_zfs_zvol::init(OSDictionary* dict)
+{
+	bool res;
+
+	/* Need an OSSet for open clients */
+	_openClients = OSSet::withCapacity(1);
+	if (_openClients == NULL) {
+		dprintf("client OSSet failed");
+		return (false);
+	}
+
+	res = super::init(dict);
+
+	// IOLog("ZFS::init\n");
+	return (res);
+}
+
+void
+net_lundman_zfs_zvol::free(void)
+{
+	OSSafeReleaseNULL(_openClients);
+
+	// IOLog("ZFS::free\n");
+	super::free();
+}
+
+bool
+net_lundman_zfs_zvol::isOpen(const IOService *forClient) const
+{
+	bool ret;
+	ret = IOService::isOpen(forClient);
+	return (ret);
+}
+
+bool
+net_lundman_zfs_zvol::handleOpen(IOService *client,
+    IOOptionBits options, void *arg)
+{
+	bool ret = true;
+
+	dprintf("");
+
+	_openClients->setObject(client);
+	ret = _openClients->containsObject(client);
+
+	return (ret);
+}
+
+bool
+net_lundman_zfs_zvol::handleIsOpen(const IOService *client) const
+{
+	bool ret;
+
+	dprintf("");
+
+	ret = _openClients->containsObject(client);
+
+	return (ret);
+}
+
+void
+net_lundman_zfs_zvol::handleClose(IOService *client,
+    IOOptionBits options)
+{
+	dprintf("");
+
+	if (_openClients->containsObject(client) == false) {
+		dprintf("not open");
+	}
+
+	_openClients->removeObject(client);
+}
+
+IOService*
+net_lundman_zfs_zvol::probe(IOService *provider, SInt32 *score)
+{
+	IOService *res = super::probe(provider, score);
+	return (res);
+}
+
+
+/*
+ *
+ * ************************************************************************
+ *
+ * Kernel Module Load
+ *
+ * ************************************************************************
+ *
+ */
+
+bool
+net_lundman_zfs_zvol::start(IOService *provider)
+{
+	bool res = super::start(provider);
+
+	IOLog("ZFS: Loading module ... \n");
+
+	if (!res)
+		return (res);
+
+	/* Fire up all SPL modules and threads */
+	spl_start(NULL, NULL);
+
+	/* registerService() allows zconfigd to match against the service */
+	this->registerService();
+
+	/*
+	 * hostid is left as 0 on OSX, and left to be set if developers wish to
+	 * use it. If it is 0, we will hash the hardware.uuid into a 32 bit
+	 * value and set the hostid.
+	 */
+	if (!zone_get_hostid(NULL)) {
+		uint32_t myhostid = 0;
+		IORegistryEntry *ioregroot =
+		    IORegistryEntry::getRegistryRoot();
+		if (ioregroot) {
+			IORegistryEntry *macmodel =
+			    ioregroot->getChildEntry(gIOServicePlane);
+
+			if (macmodel) {
+				OSObject *ioplatformuuidobj;
+				ioplatformuuidobj =
+				    macmodel->getProperty(kIOPlatformUUIDKey);
+				if (ioplatformuuidobj) {
+					OSString *ioplatformuuidstr =
+					    OSDynamicCast(OSString,
+					    ioplatformuuidobj);
+
+					myhostid = fnv_32a_str(
+					    ioplatformuuidstr->
+					    getCStringNoCopy(),
+					    FNV1_32A_INIT);
+
+					sysctlbyname("kern.hostid", NULL, NULL,
+					    &myhostid, sizeof (myhostid));
+					printf("ZFS: hostid set to %08x from "
+					    "UUID '%s'\n", myhostid,
+					    ioplatformuuidstr->
+					    getCStringNoCopy());
+				}
+			}
+		}
+	}
+
+	/* Register ZFS KEXT Version sysctl - separate to kstats */
+	sysctl_register_oid(&sysctl__zfs);
+	sysctl_register_oid(&sysctl__zfs_kext_version);
+
+	/* Init LDI */
+	int error = 0;
+	error = ldi_init(NULL);
+	if (error) {
+		IOLog("%s ldi_init error %d\n", __func__, error);
+		goto failure;
+	}
+
+	/* Start ZFS itself */
+	zfs_kmod_init();
+
+	/* Register fs with XNU */
+	zfs_vfsops_init();
+
+	/*
+	 * When is the best time to start the system_taskq? It is strictly
+	 * speaking not used by SPL, but by ZFS. ZFS should really start it?
+	 */
+	system_taskq_init();
+
+	res = zfs_boot_init((IOService *)this);
+
+	printf("ZFS: Loaded module v%s-%s%s, "
+	    "ZFS pool version %s, ZFS filesystem version %s\n",
+	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
+	    SPA_VERSION_STRING, ZPL_VERSION_STRING);
+
+	return (true);
+
+failure:
+	spl_stop(NULL, NULL);
+	sysctl_unregister_oid(&sysctl__zfs_kext_version);
+	sysctl_unregister_oid(&sysctl__zfs);
+	return (false);
+}
+
+/* Here we are, at the end of all things */
+void
+net_lundman_zfs_zvol::stop(IOService *provider)
+{
+
+	zfs_boot_fini();
+
+	IOLog("ZFS: Attempting to unload ...\n");
+
+	super::stop(provider);
+
+	system_taskq_fini();
+
+	zfs_vfsops_fini();
+
+	zfs_kmod_fini();
+
+	ldi_fini();
+
+	sysctl_unregister_oid(&sysctl__zfs_kext_version);
+	sysctl_unregister_oid(&sysctl__zfs);
+
+	spl_stop(NULL, NULL);
+
+	printf("ZFS: Unloaded module v%s-%s%s\n",
+	    ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
+
+	/*
+	 * There is no way to ensure all threads have actually got to the
+	 * thread_exit() call, before we exit here (and XNU unloads all
+	 * memory for the KEXT). So we increase the odds of that happening
+	 * by delaying a little bit before we return to XNU. Quite possibly
+	 * the worst "solution" but Apple has not given any good options.
+	 */
+	delay(hz*5);
+}
diff --git a/module/os/macos/zfs/zfs_vfsops.c b/module/os/macos/zfs/zfs_vfsops.c
new file mode 100644
index 0000000000..0487ebf870
--- /dev/null
+++ b/module/os/macos/zfs/zfs_vfsops.c
@@ -0,0 +1,2988 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/* Portions Copyright 2010 Robert Milkowski */
+/* Portions Copyright 2013,2020 Jorgen Lundman */
+
+#include <sys/types.h>
+#include <sys/zfs_dir.h>
+#include <sys/policy.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/dmu_objset.h>
+#include <sys/zfs_quota.h>
+
+#include "zfs_comutil.h"
+
+#include <sys/zfs_vnops.h>
+#include <sys/systeminfo.h>
+#include <sys/zfs_mount.h>
+#include <sys/ZFSDatasetScheme.h>
+#include <sys/dsl_dir.h>
+#include <sys/dataset_kstats.h>
+
+// #define dprintf kprintf
+
+unsigned int zfs_vnop_skip_unlinked_drain = 0;
+
+int  zfs_module_start(kmod_info_t *ki, void *data);
+int  zfs_module_stop(kmod_info_t *ki, void *data);
+extern int getzfsvfs(const char *dsname, zfsvfs_t **zfvp);
+
+void arc_os_init(void);
+void arc_os_fini(void);
+
+/*
+ * AVL tree of hardlink entries, which we need to map for Finder. The va_linkid
+ * needs to be unique for each hardlink target, as well as, return the znode
+ * in vget(va_linkid). Unfortunately, the va_linkid is 32bit (lost in the
+ * syscall translation to userland struct). We sort the AVL tree by
+ * -> directory id
+ *       -> z_id
+ *              -> name
+ *
+ */
+static int hardlinks_compare(const void *arg1, const void *arg2)
+{
+	const hardlinks_t *node1 = arg1;
+	const hardlinks_t *node2 = arg2;
+	int value;
+	if (node1->hl_parent > node2->hl_parent)
+		return (1);
+	if (node1->hl_parent < node2->hl_parent)
+		return (-1);
+	if (node1->hl_fileid > node2->hl_fileid)
+		return (1);
+	if (node1->hl_fileid < node2->hl_fileid)
+		return (-1);
+
+	value = strncmp(node1->hl_name, node2->hl_name, PATH_MAX);
+	if (value < 0)
+		return (-1);
+	if (value > 0)
+		return (1);
+	return (0);
+}
+
+/*
+ * Lookup same information from linkid, to get at parentid, objid and name
+ */
+static int hardlinks_compare_linkid(const void *arg1, const void *arg2)
+{
+	const hardlinks_t *node1 = arg1;
+	const hardlinks_t *node2 = arg2;
+	if (node1->hl_linkid > node2->hl_linkid)
+		return (1);
+	if (node1->hl_linkid < node2->hl_linkid)
+		return (-1);
+	return (0);
+}
+
+extern int
+zfs_obtain_xattr(znode_t *, const char *, mode_t, cred_t *, vnode_t **, int);
+
+
+/*
+ * We need to keep a count of active fs's.
+ * This is necessary to prevent our kext
+ * from being unloaded after a umount -f
+ */
+uint32_t	zfs_active_fs_count = 0;
+
+extern void zfs_ioctl_init(void);
+extern void zfs_ioctl_fini(void);
+
+static int
+zfsvfs_parse_option(char *option, char *value, vfs_t *vfsp)
+{
+	if (!option || !*option)
+		return (0);
+	dprintf("parse '%s' '%s'\n", option?option:"",
+	    value?value:"");
+	if (!strcasecmp(option, "readonly")) {
+		if (value && *value &&
+		    strcasecmp(value, "off") == 0)
+			vfs_clearflags(vfsp, (uint64_t)MNT_RDONLY);
+		else
+			vfs_setflags(vfsp, (uint64_t)MNT_RDONLY);
+	}
+	return (0);
+}
+
+/*
+ * Parse the raw mntopts and return a vfs_t describing the options.
+ */
+static int
+zfsvfs_parse_options(char *mntopts, vfs_t *vfsp)
+{
+	int error = 0;
+
+	if (mntopts != NULL) {
+		char *p, *t, *v;
+		char *keep;
+
+		int len = strlen(mntopts) + 1;
+		keep = kmem_alloc(len, KM_SLEEP);
+		t = keep;
+		memcpy(t, mntopts, len);
+
+		while (1) {
+			while (t && *t == ' ') t++;
+
+			p = strpbrk(t, ",");
+			if (p) *p = 0;
+
+			// find "="
+			v = strpbrk(t, "=");
+			if (v) {
+				*v = 0;
+				v++;
+				while (*v == ' ') v++;
+			}
+			error = zfsvfs_parse_option(t, v, vfsp);
+			if (error) break;
+			if (!p) break;
+			t = &p[1];
+		}
+		kmem_free(keep, len);
+	}
+
+	return (error);
+}
+
+int
+zfs_is_readonly(zfsvfs_t *zfsvfs)
+{
+	return (!!(vfs_isrdonly(zfsvfs->z_vfs)));
+}
+
+/*
+ * The OS sync ignored by default, as ZFS handles internal periodic
+ * syncs. (As per illumos) Unfortunately, we can not tell the difference
+ * of when users run "sync" by hand. Sync is called on umount though.
+ */
+uint64_t zfs_vfs_sync_paranoia = 0;
+
+int
+zfs_vfs_sync(struct mount *vfsp, __unused int waitfor,
+    __unused vfs_context_t context)
+{
+	/*
+	 * Data integrity is job one. We don't want a compromised kernel
+	 * writing to the storage pool, so we never sync during panic.
+	 */
+	if (spl_panicstr())
+		return (0);
+
+	/* Check if sysctl setting wants sync - and we are not unmounting */
+	if (zfs_vfs_sync_paranoia == 0 &&
+	    !vfs_isunmount(vfsp))
+		return (0);
+
+	if (vfsp != NULL) {
+		/*
+		 * Sync a specific filesystem.
+		 */
+		zfsvfs_t *zfsvfs = vfs_fsprivate(vfsp);
+		dsl_pool_t *dp;
+
+		ZFS_ENTER(zfsvfs);
+		dp = dmu_objset_pool(zfsvfs->z_os);
+
+		/*
+		 * If the system is shutting down, then skip any
+		 * filesystems which may exist on a suspended pool.
+		 */
+		if (spl_system_inshutdown() && spa_suspended(dp->dp_spa)) {
+			ZFS_EXIT(zfsvfs);
+			return (0);
+		}
+
+		if (zfsvfs->z_log != NULL)
+			zil_commit(zfsvfs->z_log, 0);
+
+		ZFS_EXIT(zfsvfs);
+
+	} else {
+		/*
+		 * Sync all ZFS filesystems. This is what happens when you
+		 * run sync(1M). Unlike other filesystems, ZFS honors the
+		 * request by waiting for all pools to commit all dirty data.
+		 */
+		spa_sync_allpools();
+	}
+
+	return (0);
+}
+
+static void
+atime_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	if (newval == B_TRUE) {
+		zfsvfs->z_atime = B_TRUE;
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_NOATIME);
+	} else {
+		zfsvfs->z_atime = B_FALSE;
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_NOATIME);
+	}
+}
+
+static void
+xattr_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	/*
+	 * Apple does have MNT_NOUSERXATTR mount option, but unfortunately
+	 * the VFS layer returns EACCESS if xattr access is attempted.
+	 * Finder etc, will do so, even if filesystem capabilities is set
+	 * without xattr, rendering the mount option useless. We no longer
+	 * set it, and handle xattrs being disabled internally.
+	 */
+
+	if (newval == ZFS_XATTR_OFF) {
+		zfsvfs->z_xattr = B_FALSE;
+		// vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_NOUSERXATTR);
+	} else {
+		zfsvfs->z_xattr = B_TRUE;
+		// vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_NOUSERXATTR);
+
+		if (newval == ZFS_XATTR_SA)
+			zfsvfs->z_xattr_sa = B_TRUE;
+		else
+			zfsvfs->z_xattr_sa = B_FALSE;
+	}
+}
+
+static void
+blksz_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
+	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
+	ASSERT(ISP2(newval));
+
+	zfsvfs->z_max_blksz = newval;
+	// zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
+}
+
+static void
+readonly_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_TRUE) {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_RDONLY);
+	} else {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_RDONLY);
+	}
+}
+
+static void
+devices_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_FALSE) {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_NODEV);
+	} else {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_NODEV);
+	}
+}
+
+static void
+setuid_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_FALSE) {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_NOSUID);
+	} else {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_NOSUID);
+	}
+}
+
+static void
+exec_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_FALSE) {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_NOEXEC);
+	} else {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_NOEXEC);
+	}
+}
+
+static void
+snapdir_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	zfsvfs->z_show_ctldir = newval;
+	cache_purgevfs(zfsvfs->z_vfs);
+}
+
+static void
+vscan_changed_cb(void *arg, uint64_t newval)
+{
+	// zfsvfs_t *zfsvfs = arg;
+	// zfsvfs->z_vscan = newval;
+}
+
+static void
+acl_mode_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	zfsvfs->z_acl_mode = newval;
+}
+
+static void
+acl_inherit_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+
+	zfsvfs->z_acl_inherit = newval;
+}
+
+static void
+finderbrowse_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_FALSE) {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_DONTBROWSE);
+	} else {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_DONTBROWSE);
+	}
+}
+static void
+ignoreowner_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	if (newval == B_FALSE) {
+		vfs_clearflags(zfsvfs->z_vfs, (uint64_t)MNT_IGNORE_OWNERSHIP);
+	} else {
+		vfs_setflags(zfsvfs->z_vfs, (uint64_t)MNT_IGNORE_OWNERSHIP);
+	}
+}
+
+static void
+mimic_changed_cb(void *arg, uint64_t newval)
+{
+	zfsvfs_t *zfsvfs = arg;
+	struct vfsstatfs *vfsstatfs;
+	vfsstatfs = vfs_statfs(zfsvfs->z_vfs);
+
+	if (newval == 0) {
+		strlcpy(vfsstatfs->f_fstypename, "zfs", MFSTYPENAMELEN);
+	} else {
+		strlcpy(vfsstatfs->f_fstypename, "hfs", MFSTYPENAMELEN);
+	}
+}
+
+static int
+zfs_register_callbacks(struct mount *vfsp)
+{
+	struct dsl_dataset *ds = NULL;
+
+	objset_t *os = NULL;
+	zfsvfs_t *zfsvfs = NULL;
+	boolean_t readonly = B_FALSE;
+	boolean_t do_readonly = B_FALSE;
+	boolean_t setuid = B_FALSE;
+	boolean_t do_setuid = B_FALSE;
+	boolean_t exec = B_FALSE;
+	boolean_t do_exec = B_FALSE;
+	boolean_t devices = B_FALSE;
+	boolean_t do_devices = B_FALSE;
+	boolean_t xattr = B_FALSE;
+	boolean_t do_xattr = B_FALSE;
+	boolean_t atime = B_FALSE;
+	boolean_t do_atime = B_FALSE;
+	boolean_t finderbrowse = B_FALSE;
+	boolean_t do_finderbrowse = B_FALSE;
+	boolean_t ignoreowner = B_FALSE;
+	boolean_t do_ignoreowner = B_FALSE;
+	int error = 0;
+
+	ASSERT(vfsp);
+	zfsvfs = vfs_fsprivate(vfsp);
+	ASSERT(zfsvfs);
+	os = zfsvfs->z_os;
+
+	/*
+	 * This function can be called for a snapshot when we update snapshot's
+	 * mount point, which isn't really supported.
+	 */
+	if (dmu_objset_is_snapshot(os))
+		return (EOPNOTSUPP);
+
+	/*
+	 * The act of registering our callbacks will destroy any mount
+	 * options we may have.  In order to enable temporary overrides
+	 * of mount options, we stash away the current values and
+	 * restore them after we register the callbacks.
+	 */
+#define	vfs_optionisset(X, Y, Z) (vfs_flags(X)&(Y))
+
+	if (vfs_optionisset(vfsp, MNT_RDONLY, NULL) ||
+	    !spa_writeable(dmu_objset_spa(os))) {
+		readonly = B_TRUE;
+		do_readonly = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_NODEV, NULL)) {
+		devices = B_FALSE;
+		do_devices = B_TRUE;
+	}
+	/* xnu SETUID, not IllumOS SUID */
+	if (vfs_optionisset(vfsp, MNT_NOSUID, NULL)) {
+		setuid = B_FALSE;
+		do_setuid = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_NOEXEC, NULL)) {
+		exec = B_FALSE;
+		do_exec = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_NOUSERXATTR, NULL)) {
+		xattr = B_FALSE;
+		do_xattr = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_NOATIME, NULL)) {
+		atime = B_FALSE;
+		do_atime = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_DONTBROWSE, NULL)) {
+		finderbrowse = B_FALSE;
+		do_finderbrowse = B_TRUE;
+	}
+	if (vfs_optionisset(vfsp, MNT_IGNORE_OWNERSHIP, NULL)) {
+		ignoreowner = B_TRUE;
+		do_ignoreowner = B_TRUE;
+	}
+
+	/*
+	 * nbmand is a special property.  It can only be changed at
+	 * mount time.
+	 *
+	 * This is weird, but it is documented to only be changeable
+	 * at mount time.
+	 */
+
+	/*
+	 * Register property callbacks.
+	 *
+	 * It would probably be fine to just check for i/o error from
+	 * the first prop_register(), but I guess I like to go
+	 * overboard...
+	 */
+	ds = dmu_objset_ds(os);
+	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+	error = dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
+	// This appears to be PROP_PRIVATE, investigate if we want this
+	// ZOL calls this ACLTYPE
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+	    zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
+
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_BROWSE), finderbrowse_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_IGNOREOWNER),
+	    ignoreowner_changed_cb, zfsvfs);
+	error = error ? error : dsl_prop_register(ds,
+	    zfs_prop_to_name(ZFS_PROP_MIMIC), mimic_changed_cb, zfsvfs);
+
+	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
+	if (error)
+		goto unregister;
+
+	/*
+	 * Invoke our callbacks to restore temporary mount options.
+	 */
+	if (do_readonly)
+		readonly_changed_cb(zfsvfs, readonly);
+	if (do_setuid)
+		setuid_changed_cb(zfsvfs, setuid);
+	if (do_exec)
+		exec_changed_cb(zfsvfs, exec);
+	if (do_devices)
+		devices_changed_cb(zfsvfs, devices);
+	if (do_xattr)
+		xattr_changed_cb(zfsvfs, xattr);
+	if (do_atime)
+		atime_changed_cb(zfsvfs, atime);
+
+	if (do_finderbrowse)
+		finderbrowse_changed_cb(zfsvfs, finderbrowse);
+	if (do_ignoreowner)
+		ignoreowner_changed_cb(zfsvfs, ignoreowner);
+
+	return (0);
+
+unregister:
+	dsl_prop_unregister_all(ds, zfsvfs);
+	return (error);
+}
+
+/*
+ * Takes a dataset, a property, a value and that value's setpoint as
+ * found in the ZAP. Checks if the property has been changed in the vfs.
+ * If so, val and setpoint will be overwritten with updated content.
+ * Otherwise, they are left unchanged.
+ */
+int
+zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
+    char *setpoint)
+{
+	int error;
+	zfsvfs_t *zfvp;
+	mount_t vfsp;
+	objset_t *os;
+	uint64_t tmp = *val;
+
+	error = dmu_objset_from_ds(ds, &os);
+	if (error != 0)
+		return (error);
+
+	if (dmu_objset_type(os) != DMU_OST_ZFS)
+		return (EINVAL);
+
+	mutex_enter(&os->os_user_ptr_lock);
+	zfvp = dmu_objset_get_user(os);
+	mutex_exit(&os->os_user_ptr_lock);
+	if (zfvp == NULL)
+		return (ESRCH);
+
+	vfsp = zfvp->z_vfs;
+
+	switch (zfs_prop) {
+		case ZFS_PROP_ATIME:
+//			if (vfsp->vfs_do_atime)
+//				tmp = vfsp->vfs_atime;
+			break;
+		case ZFS_PROP_RELATIME:
+//			if (vfsp->vfs_do_relatime)
+//				tmp = vfsp->vfs_relatime;
+			break;
+		case ZFS_PROP_DEVICES:
+//			if (vfsp->vfs_do_devices)
+//				tmp = vfsp->vfs_devices;
+			break;
+		case ZFS_PROP_EXEC:
+//			if (vfsp->vfs_do_exec)
+//				tmp = vfsp->vfs_exec;
+			break;
+		case ZFS_PROP_SETUID:
+//			if (vfsp->vfs_do_setuid)
+//				tmp = vfsp->vfs_setuid;
+			break;
+		case ZFS_PROP_READONLY:
+//			if (vfsp->vfs_do_readonly)
+//				tmp = vfsp->vfs_readonly;
+			break;
+		case ZFS_PROP_XATTR:
+//			if (vfsp->vfs_do_xattr)
+//				tmp = vfsp->vfs_xattr;
+			break;
+		case ZFS_PROP_NBMAND:
+//			if (vfsp->vfs_do_nbmand)
+//				tmp = vfsp->vfs_nbmand;
+			break;
+		default:
+			return (ENOENT);
+	}
+
+	if (tmp != *val) {
+		(void) strlcpy(setpoint, "temporary", ZFS_MAX_DATASET_NAME_LEN);
+		*val = tmp;
+	}
+	return (0);
+}
+
+
+/*
+ * Associate this zfsvfs with the given objset, which must be owned.
+ * This will cache a bunch of on-disk state from the objset in the
+ * zfsvfs.
+ */
+static int
+zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+	uint64_t val;
+
+	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
+	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
+	zfsvfs->z_os = os;
+
+	/* Volume status "all ok" */
+	zfsvfs->z_notification_conditions = 0;
+	zfsvfs->z_freespace_notify_warninglimit = 0;
+	zfsvfs->z_freespace_notify_dangerlimit = 0;
+	zfsvfs->z_freespace_notify_desiredlevel = 0;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
+	if (error != 0)
+		return (error);
+	if (zfsvfs->z_version >
+	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+		(void) printf("Can't mount a version %lld file system "
+		    "on a version %lld pool\n. Pool must be upgraded to mount "
+		    "this file system.\n", (u_longlong_t)zfsvfs->z_version,
+		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
+		return (SET_ERROR(ENOTSUP));
+	}
+	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_norm = (int)val;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_utf8 = (val != 0);
+
+	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_case = (uint_t)val;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_ACLMODE, &val);
+	if (error != 0)
+		return (error);
+	zfsvfs->z_acl_mode = (uint_t)val;
+
+	zfs_get_zplprop(os, ZFS_PROP_LASTUNMOUNT, &val);
+	zfsvfs->z_last_unmount_time = val;
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+	    zfsvfs->z_case == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+	uint64_t sa_obj = 0;
+	if (zfsvfs->z_use_sa) {
+		/* should either have both of these objects or none */
+		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+		    &sa_obj);
+
+		if (error != 0)
+			return (error);
+
+		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
+		if ((error == 0) && (val == ZFS_XATTR_SA))
+			zfsvfs->z_xattr_sa = B_TRUE;
+	}
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+	if (error != 0)
+		return (error);
+
+	if (zfsvfs->z_version >= ZPL_VERSION_SA)
+		sa_register_update_callback(os, zfs_sa_upgrade);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
+	    &zfsvfs->z_root);
+	if (error != 0)
+		return (error);
+	ASSERT(zfsvfs->z_root != 0);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+	    &zfsvfs->z_unlinkedobj);
+	if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
+	    8, 1, &zfsvfs->z_userquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_userquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
+	    8, 1, &zfsvfs->z_groupquota_obj);
+	if (error == ENOENT)
+		zfsvfs->z_groupquota_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
+	    &zfsvfs->z_fuid_obj);
+	if (error == ENOENT)
+		zfsvfs->z_fuid_obj = 0;
+	else if (error != 0)
+		return (error);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
+	    &zfsvfs->z_shares_dir);
+	if (error == ENOENT)
+		zfsvfs->z_shares_dir = 0;
+	else if (error != 0)
+		return (error);
+
+	return (0);
+}
+
+int
+zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
+{
+	objset_t *os;
+	zfsvfs_t *zfsvfs;
+	int error;
+
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+	/*
+	 * We claim to always be readonly so we can open snapshots;
+	 * other ZPL code will prevent us from writing to snapshots.
+	 */
+	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE,
+	    zfsvfs, &os);
+	if (error != 0) {
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
+	}
+
+	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
+	if (error != 0) {
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+	}
+	return (error);
+}
+
+int
+zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
+{
+	int error;
+
+	zfsvfs->z_vfs = NULL;
+	zfsvfs->z_parent = zfsvfs;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+
+	zfsvfs->z_ctldir_startid = ZFSCTL_INO_SNAPDIRS;
+
+	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
+
+	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+
+	int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
+	    ZFS_OBJ_MTX_MAX);
+	zfsvfs->z_hold_size = size;
+	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
+	    KM_SLEEP);
+	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+	for (int i = 0; i != size; i++) {
+		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
+		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+	}
+
+	rw_init(&zfsvfs->z_hardlinks_lock, NULL, RW_DEFAULT, NULL);
+	avl_create(&zfsvfs->z_hardlinks, hardlinks_compare,
+	    sizeof (hardlinks_t), offsetof(hardlinks_t, hl_node));
+	avl_create(&zfsvfs->z_hardlinks_linkid, hardlinks_compare_linkid,
+	    sizeof (hardlinks_t), offsetof(hardlinks_t, hl_node_linkid));
+	zfsvfs->z_rdonly = 0;
+
+	mutex_init(&zfsvfs->z_drain_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&zfsvfs->z_drain_cv, NULL, CV_DEFAULT, NULL);
+
+	error = zfsvfs_init(zfsvfs, os);
+	if (error != 0) {
+		*zfvp = NULL;
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
+	}
+
+	*zfvp = zfsvfs;
+	return (0);
+}
+
+static int
+zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
+{
+	int error;
+	boolean_t readonly = vfs_isrdonly(zfsvfs->z_vfs);
+
+	error = zfs_register_callbacks(zfsvfs->z_vfs);
+	if (error)
+		return (error);
+
+	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
+
+	/*
+	 * If we are not mounting (ie: online recv), then we don't
+	 * have to worry about replaying the log as we blocked all
+	 * operations out since we closed the ZIL.
+	 */
+	if (mounting) {
+
+		/*
+		 * During replay we remove the read only flag to
+		 * allow replays to succeed.
+		 */
+
+		if (readonly != 0)
+			readonly_changed_cb(zfsvfs, B_FALSE);
+		else
+			if (!zfs_vnop_skip_unlinked_drain)
+				zfs_unlinked_drain(zfsvfs);
+
+		/*
+		 * Parse and replay the intent log.
+		 *
+		 * Because of ziltest, this must be done after
+		 * zfs_unlinked_drain().  (Further note: ziltest
+		 * doesn't use readonly mounts, where
+		 * zfs_unlinked_drain() isn't called.)  This is because
+		 * ziltest causes spa_sync() to think it's committed,
+		 * but actually it is not, so the intent log contains
+		 * many txg's worth of changes.
+		 *
+		 * In particular, if object N is in the unlinked set in
+		 * the last txg to actually sync, then it could be
+		 * actually freed in a later txg and then reallocated
+		 * in a yet later txg.  This would write a "create
+		 * object N" record to the intent log.  Normally, this
+		 * would be fine because the spa_sync() would have
+		 * written out the fact that object N is free, before
+		 * we could write the "create object N" intent log
+		 * record.
+		 *
+		 * But when we are in ziltest mode, we advance the "open
+		 * txg" without actually spa_sync()-ing the changes to
+		 * disk.  So we would see that object N is still
+		 * allocated and in the unlinked set, and there is an
+		 * intent log record saying to allocate it.
+		 */
+		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+			if (zil_replay_disable) {
+				zil_destroy(zfsvfs->z_log, B_FALSE);
+			} else {
+				zfsvfs->z_replay = B_TRUE;
+				zil_replay(zfsvfs->z_os, zfsvfs,
+				    zfs_replay_vector);
+				zfsvfs->z_replay = B_FALSE;
+			}
+		}
+
+		/* restore readonly bit */
+		if (readonly != 0)
+			readonly_changed_cb(zfsvfs, B_TRUE);
+	}
+
+	/*
+	 * Set the objset user_ptr to track its zfsvfs.
+	 */
+	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+
+	return (0);
+}
+
+extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
+
+void
+zfsvfs_free(zfsvfs_t *zfsvfs)
+{
+	int i, size = zfsvfs->z_hold_size;
+
+	dprintf("+zfsvfs_free\n");
+
+	zfs_fuid_destroy(zfsvfs);
+
+	cv_destroy(&zfsvfs->z_drain_cv);
+	mutex_destroy(&zfsvfs->z_drain_lock);
+	mutex_destroy(&zfsvfs->z_znodes_lock);
+	mutex_destroy(&zfsvfs->z_lock);
+	list_destroy(&zfsvfs->z_all_znodes);
+	rrm_destroy(&zfsvfs->z_teardown_lock);
+	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
+	rw_destroy(&zfsvfs->z_fuid_lock);
+
+	for (i = 0; i != size; i++) {
+		avl_destroy(&zfsvfs->z_hold_trees[i]);
+		mutex_destroy(&zfsvfs->z_hold_locks[i]);
+	}
+	kmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+	kmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
+
+	dprintf("ZFS: Unloading hardlink AVLtree: %lu\n",
+	    avl_numnodes(&zfsvfs->z_hardlinks));
+	void *cookie = NULL;
+	hardlinks_t *hardlink;
+	rw_destroy(&zfsvfs->z_hardlinks_lock);
+	while ((hardlink = avl_destroy_nodes(&zfsvfs->z_hardlinks_linkid,
+	    &cookie))) {
+	}
+	cookie = NULL;
+	while ((hardlink = avl_destroy_nodes(&zfsvfs->z_hardlinks, &cookie))) {
+		kmem_free(hardlink, sizeof (*hardlink));
+	}
+	avl_destroy(&zfsvfs->z_hardlinks);
+	avl_destroy(&zfsvfs->z_hardlinks_linkid);
+
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+	dprintf("-zfsvfs_free\n");
+}
+
+static void
+zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
+{
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	if (zfsvfs->z_vfs) {
+#if 0
+		if (zfsvfs->z_use_fuids) {
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+		} else {
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
+			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
+		}
+#endif
+	}
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+}
+
+
+static int
+zfs_domount(struct mount *vfsp, dev_t mount_dev, char *osname, char *options,
+    vfs_context_t ctx)
+{
+	int error = 0;
+	zfsvfs_t *zfsvfs;
+	uint64_t mimic = 0;
+	struct timeval tv;
+
+	ASSERT(vfsp);
+	ASSERT(osname);
+
+	error = zfsvfs_create(osname, B_FALSE, &zfsvfs);
+	if (error)
+		return (error);
+	zfsvfs->z_vfs = vfsp;
+
+	error = zfsvfs_parse_options(options, zfsvfs->z_vfs);
+	if (error)
+		goto out;
+
+	zfsvfs->z_rdev = mount_dev;
+
+	/* HFS sets this prior to mounting */
+	vfs_setflags(vfsp, (uint64_t)((unsigned int)MNT_DOVOLFS));
+	/* Advisory locking should be handled at the VFS layer */
+	vfs_setlocklocal(vfsp);
+
+	/*
+	 * Record the mount time (for Spotlight)
+	 */
+	microtime(&tv);
+	zfsvfs->z_mount_time = tv.tv_sec;
+
+	vfs_setfsprivate(vfsp, zfsvfs);
+
+	/*
+	 * The fsid is 64 bits, composed of an 8-bit fs type, which
+	 * separates our fsid from any other filesystem types, and a
+	 * 56-bit objset unique ID.  The objset unique ID is unique to
+	 * all objsets open on this system, provided by unique_create().
+	 * The 8-bit fs type must be put in the low bits of fsid[1]
+	 * because that's where other Solaris filesystems put it.
+	 */
+
+	error = dsl_prop_get_integer(osname, "com.apple.mimic", &mimic, NULL);
+	if (zfsvfs->z_rdev) {
+		struct vfsstatfs *vfsstatfs;
+		vfsstatfs = vfs_statfs(vfsp);
+		vfsstatfs->f_fsid.val[0] = zfsvfs->z_rdev;
+		vfsstatfs->f_fsid.val[1] = vfs_typenum(vfsp);
+	} else {
+		// Otherwise, ask VFS to give us a random unique one.
+		vfs_getnewfsid(vfsp);
+		struct vfsstatfs *vfsstatfs;
+		vfsstatfs = vfs_statfs(vfsp);
+		zfsvfs->z_rdev = vfsstatfs->f_fsid.val[0];
+	}
+
+	/*
+	 * If we are readonly (ie, waiting for rootmount) we need to reply
+	 * honestly, so launchd runs fsck_zfs and mount_zfs
+	 */
+	if (mimic) {
+		struct vfsstatfs *vfsstatfs;
+		vfsstatfs = vfs_statfs(vfsp);
+		strlcpy(vfsstatfs->f_fstypename, "hfs", MFSTYPENAMELEN);
+	}
+
+	/*
+	 * Set features for file system.
+	 */
+	zfs_set_fuid_feature(zfsvfs);
+
+	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
+		uint64_t pval;
+		char fsname[ZFS_MAX_DATASET_NAME_LEN];
+		zfsvfs_t *fs_zfsvfs;
+
+		dmu_fsname(osname, fsname);
+		error = getzfsvfs(fsname, &fs_zfsvfs);
+		if (error == 0) {
+			if (fs_zfsvfs->z_unmounted)
+				error = SET_ERROR(EINVAL);
+			vfs_unbusy(fs_zfsvfs->z_vfs);
+		}
+		if (error) {
+			printf("file system '%s' is unmounted : error %d\n",
+			    fsname,
+			    error);
+			goto out;
+		}
+
+		atime_changed_cb(zfsvfs, B_FALSE);
+		readonly_changed_cb(zfsvfs, B_TRUE);
+		if ((error = dsl_prop_get_integer(osname, "xattr", &pval,
+		    NULL)))
+			goto out;
+		xattr_changed_cb(zfsvfs, pval);
+		zfsvfs->z_issnap = B_TRUE;
+		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
+
+		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+
+		zfsctl_mount_signal(osname, B_TRUE);
+
+	} else {
+		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
+			goto out;
+	}
+
+	vfs_setflags(vfsp, (uint64_t)((unsigned int)MNT_JOURNALED));
+
+	if ((vfs_flags(vfsp) & MNT_ROOTFS) != 0) {
+		/* Root FS */
+		vfs_clearflags(vfsp,
+		    (uint64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));
+		vfs_clearflags(vfsp,
+		    (uint64_t)((unsigned int)MNT_IGNORE_OWNERSHIP));
+	}
+
+#if 1 // Want .zfs or not
+	if (!zfsvfs->z_issnap) {
+		zfsctl_create(zfsvfs);
+	}
+#endif
+
+out:
+	if (error) {
+		vfs_setfsprivate(vfsp, NULL);
+		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
+		zfsvfs_free(zfsvfs);
+	} else {
+		atomic_inc_32(&zfs_active_fs_count);
+	}
+
+	return (error);
+}
+
+void
+zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
+{
+	objset_t *os = zfsvfs->z_os;
+
+	/*
+	 * Unregister properties.
+	 */
+	if (!dmu_objset_is_snapshot(os)) {
+		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
+	}
+}
+
+/*
+ * zfs_vfs_mountroot
+ * Given a device vnode created by vfs_mountroot bdevvp,
+ * and with the root pool already imported, root mount the
+ * dataset specified in the pool's bootfs property.
+ *
+ * Inputs:
+ * mp: VFS mount struct
+ * devvp: device vnode, currently only used to retrieve the
+ *  dev_t for the fsid. Could vnode_get, vnode_ref, vnode_put,
+ *  with matching get/rele/put in zfs_vfs_umount, but this is
+ *  already done by XNU as well.
+ * ctx: VFS context, unused.
+ *
+ * Return:
+ * 0 on success, positive int on failure.
+ */
+int
+zfs_vfs_mountroot(struct mount *mp, struct vnode *devvp, vfs_context_t ctx)
+{
+	/*
+	 * static int zfsrootdone = 0;
+	 */
+	zfsvfs_t *zfsvfs = NULL;
+	spa_t *spa = 0;
+	char *zfs_bootfs = 0;
+	dev_t dev = 0;
+	int error = EINVAL;
+
+	printf("ZFS: %s\n", __func__);
+	ASSERT(mp);
+	ASSERT(devvp);
+	ASSERT(ctx);
+	if (!mp || !devvp | !ctx) {
+		cmn_err(CE_NOTE, "%s: missing one of mp %p devvp %p"
+		    " or ctx %p", __func__, mp, devvp, ctx);
+		return (EINVAL);
+	}
+
+	/* Look up bootfs variable from pool here */
+	zfs_bootfs = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	if (!zfs_bootfs) {
+		cmn_err(CE_NOTE, "%s: bootfs alloc failed",
+		    __func__);
+		return (ENOMEM);
+	}
+
+	mutex_enter(&spa_namespace_lock);
+	spa = spa_next(NULL);
+	if (!spa) {
+		mutex_exit(&spa_namespace_lock);
+		cmn_err(CE_NOTE, "%s: no pool available",
+		    __func__);
+		goto out;
+	}
+
+	error = dsl_dsobj_to_dsname(spa_name(spa),
+	    spa_bootfs(spa), zfs_bootfs);
+	if (error != 0) {
+		mutex_exit(&spa_namespace_lock);
+		cmn_err(CE_NOTE, "%s: bootfs to name error %d",
+		    __func__, error);
+		goto out;
+	}
+	mutex_exit(&spa_namespace_lock);
+
+	/*
+	 * By setting the dev_t value in the mount vfsp,
+	 * mount_zfs will be called with the /dev/diskN
+	 * proxy, but we can leave the dataset name in
+	 * the mountedfrom field
+	 */
+	dev = vnode_specrdev(devvp);
+
+	printf("Setting readonly\n");
+
+	if ((error = zfs_domount(mp, dev, zfs_bootfs, NULL, ctx)) != 0) {
+		printf("zfs_domount: error %d", error);
+		goto out;
+	}
+
+	zfsvfs = (zfsvfs_t *)vfs_fsprivate(mp);
+	ASSERT(zfsvfs);
+	if (!zfsvfs) {
+		cmn_err(CE_NOTE, "missing zfsvfs");
+		goto out;
+	}
+
+	/* Set this mount to read-only */
+	zfsvfs->z_rdonly = 1;
+
+	/*
+	 * Due to XNU mount flags, readonly gets set off for a short
+	 * while, which means mimic will kick in if enabled. But we need
+	 * to reply with true "zfs" until root has been remounted RW, so
+	 * that launchd tries to run mount_zfs instead of mount_hfs
+	 */
+	mimic_changed_cb(zfsvfs, B_FALSE);
+
+	/*
+	 * Leave rootvp held.  The root file system is never unmounted.
+	 *
+	 * XXX APPLE
+	 * xnu will in fact call vfs_unmount on the root filesystem
+	 * during shutdown/reboot.
+	 */
+
+out:
+
+	if (zfs_bootfs) {
+		kmem_free(zfs_bootfs, MAXPATHLEN);
+	}
+	return (error);
+
+}
+
+/*ARGSUSED*/
+int
+zfs_vfs_mount(struct mount *vfsp, vnode_t *mvp /* devvp */,
+    user_addr_t data, vfs_context_t context)
+{
+	char		*osname = NULL;
+	char		*options = NULL;
+	uint64_t	flags = vfs_flags(vfsp);
+	int		error = 0;
+	int		rdonly = 0;
+	int		mflag = 0;
+	char		*proxy = NULL;
+	struct zfs_mount_args mnt_args;
+	size_t		osnamelen = 0;
+	uint32_t	cmdflags = 0;
+
+	cmdflags = (uint32_t)vfs_flags(vfsp) & MNT_CMDFLAGS;
+	rdonly = vfs_isrdonly(vfsp);
+
+	if (!data) {
+		/*
+		 * From 10.12, if you set VFS_TBLCANMOUNTROOT, XNU will
+		 * call vfs_mountroot if set (and we can not set it), OR
+		 * call vfs_mount if not set. Since data is always passed NULL
+		 * in this case, we know we are supposed to call mountroot.
+		 */
+		dprintf("ZFS: vfs_mount -> vfs_mountroot\n");
+		return (zfs_vfs_mountroot(vfsp, mvp, context));
+	}
+
+	/*
+	 * Get the objset name (the "special" mount argument).
+	 */
+	if (data) {
+
+		// Clear the struct, so that "flags" is null if only given path.
+		bzero(&mnt_args, sizeof (mnt_args));
+
+		osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+		if (vfs_context_is64bit(context)) {
+			if ((error = ddi_copyin((void *)data,
+			    (caddr_t)&mnt_args, sizeof (mnt_args), 0))) {
+				printf("%s: error on mnt_args copyin %d\n",
+				    __func__, error);
+				goto out;
+			}
+		} else {
+			user32_addr_t tmp;
+			if ((error = ddi_copyin((void *)data,
+			    (caddr_t)&tmp, sizeof (tmp), 0))) {
+				printf("%s: error on mnt_args copyin32 %d\n",
+				    __func__, error);
+				goto out;
+			}
+			/* munge into LP64 addr */
+			mnt_args.fspec = (char *)CAST_USER_ADDR_T(tmp);
+		}
+
+		// Copy over the string
+		if ((error = ddi_copyinstr((const void *)mnt_args.fspec, osname,
+		    MAXPATHLEN, &osnamelen))) {
+			printf("%s: error on osname copyin %d\n",
+			    __func__, error);
+			if (!mvp)
+				goto out;
+		}
+	}
+
+	proxy = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	if (!proxy) {
+		dprintf("%s proxy string alloc failed\n", __func__);
+		goto out;
+	}
+	*proxy = 0;
+
+	/*
+	 * Translate /dev/disk path into dataset name
+	 * After this;
+	 * "proxy" will have "/dev/disk" (IF given)
+	 * "osname" has the dataset name as usual
+	 */
+	if (strncmp(osname, "/dev/disk", 9) == 0) {
+		strlcpy(proxy, osname, MAXPATHLEN);
+		error = zfs_osx_proxy_get_osname(osname,
+		    osname, MAXPATHLEN);
+		if (error != 0) {
+			printf("%s couldn't get dataset from %s\n",
+			    __func__, osname);
+			error = ENOENT;
+			goto out;
+		}
+		dprintf("%s got new osname %s\n", __func__, osname);
+	}
+
+	if (mnt_args.struct_size == sizeof (mnt_args)) {
+		mflag = mnt_args.mflag;
+		options = kmem_alloc(mnt_args.optlen, KM_SLEEP);
+		error = ddi_copyin((const void *)mnt_args.optptr,
+		    (caddr_t)options, mnt_args.optlen, 0);
+	}
+
+	if (mflag & MS_RDONLY) {
+		dprintf("%s: adding MNT_RDONLY\n", __func__);
+		flags |= MNT_RDONLY;
+	}
+
+	if (mflag & MS_OVERLAY) {
+		dprintf("%s: adding MNT_UNION\n", __func__);
+		flags |= MNT_UNION;
+	}
+
+	if (mflag & MS_FORCE) {
+		dprintf("%s: adding MNT_FORCE\n", __func__);
+		flags |= MNT_FORCE;
+	}
+
+	if (mflag & MS_REMOUNT) {
+		dprintf("%s: adding MNT_UPDATE on MS_REMOUNT\n", __func__);
+		flags |= MNT_UPDATE;
+	}
+
+	vfs_setflags(vfsp, (uint64_t)flags);
+
+	/*
+	 * When doing a remount, we simply refresh our temporary properties
+	 * according to those options set in the current VFS options.
+	 */
+	if (cmdflags & MNT_UPDATE) {
+
+		if (cmdflags & MNT_RELOAD) {
+			printf("%s: reload after fsck\n", __func__);
+			error = 0;
+			goto out;
+		}
+
+		/* refresh mount options */
+		zfsvfs_t *zfsvfs = vfs_fsprivate(vfsp);
+		ASSERT(zfsvfs);
+
+		if (zfsvfs->z_rdonly == 0 && (flags & MNT_RDONLY ||
+		    vfs_isrdonly(vfsp))) {
+			/* downgrade */
+			dprintf("%s: downgrade requested\n", __func__);
+			zfsvfs->z_rdonly = 1;
+			readonly_changed_cb(zfsvfs, B_TRUE);
+			zfs_unregister_callbacks(zfsvfs);
+			error = zfs_register_callbacks(vfsp);
+			if (error) {
+				printf("%s: remount returned %d",
+				    __func__, error);
+			}
+		}
+
+		// if (zfsvfs->z_rdonly != 0 && vfs_iswriteupgrade(vfsp)) {
+		if (vfs_iswriteupgrade(vfsp)) {
+			/* upgrade */
+			dprintf("%s: upgrade requested\n", __func__);
+			zfsvfs->z_rdonly = 0;
+			readonly_changed_cb(zfsvfs, B_FALSE);
+			zfs_unregister_callbacks(zfsvfs);
+			error = zfs_register_callbacks(vfsp);
+			if (error) {
+				printf("%s: remount returned %d",
+				    __func__, error);
+			}
+		}
+
+		goto out;
+	}
+
+	if (vfs_fsprivate(vfsp) != NULL) {
+		printf("already mounted\n");
+		error = 0;
+		goto out;
+	}
+
+	error = zfs_domount(vfsp, 0, osname, options, context);
+	if (error) {
+		printf("%s: zfs_domount returned %d\n",
+		    __func__, error);
+		goto out;
+	}
+
+
+out:
+
+	if (error == 0) {
+
+		/* Indicate to VFS that we support ACLs. */
+		vfs_setextendedsecurity(vfsp);
+
+		// Set /dev/disk name if we have one, otherwise, datasetname
+		vfs_mountedfrom(vfsp, proxy && *proxy ? proxy : osname);
+
+	}
+
+	if (error)
+		printf("zfs_vfs_mount: error %d\n", error);
+
+	if (osname)
+		kmem_free(osname, MAXPATHLEN);
+
+	if (proxy)
+		kmem_free(proxy, MAXPATHLEN);
+
+	if (options)
+		kmem_free(options, mnt_args.optlen);
+
+	return (error);
+}
+
+
+int
+zfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap,
+    __unused vfs_context_t context)
+{
+	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
+	uint64_t refdbytes, availbytes, usedobjs, availobjs;
+	uint64_t log_blksize;
+	uint64_t log_blkcnt;
+
+	// dprintf("vfs_getattr\n");
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Finder will show the old/incorrect size, we can force a sync of the
+	 * pool to make it correct, but that has side effects which are
+	 *  undesirable.
+	 */
+	/* txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); */
+
+	dmu_objset_space(zfsvfs->z_os,
+	    &refdbytes, &availbytes, &usedobjs, &availobjs);
+
+	VFSATTR_RETURN(fsap, f_objcount, usedobjs);
+	VFSATTR_RETURN(fsap, f_maxobjcount, 0x7fffffffffffffff);
+	/*
+	 * Carbon depends on f_filecount and f_dircount so
+	 * make up some values based on total objects.
+	 */
+	VFSATTR_RETURN(fsap, f_filecount, usedobjs - (usedobjs / 4));
+	VFSATTR_RETURN(fsap, f_dircount, usedobjs / 4);
+
+	/*
+	 * Model after HFS in working out if we should use the legacy size
+	 * 512, or go to 4096. Note that XNU only likes those two
+	 * blocksizes, so we don't use the ZFS recordsize
+	 */
+	log_blkcnt = (u_int64_t)((refdbytes + availbytes) >> SPA_MINBLOCKSHIFT);
+	log_blksize = (log_blkcnt > 0x000000007fffffff) ?
+	    4096 : (1 << SPA_MINBLOCKSHIFT);
+
+	/*
+	 * The underlying storage pool actually uses multiple block sizes.
+	 * We report the fragsize as the smallest block size we support,
+	 * and we report our blocksize as the filesystem's maximum blocksize.
+	 */
+	VFSATTR_RETURN(fsap, f_bsize, log_blksize);
+	VFSATTR_RETURN(fsap, f_iosize, zfsvfs->z_max_blksz);
+
+	/*
+	 * The following report "total" blocks of various kinds in the
+	 * file system, but reported in terms of f_frsize - the
+	 * "fragment" size.
+	 */
+	VFSATTR_RETURN(fsap, f_blocks,
+	    (u_int64_t)((refdbytes + availbytes) / log_blksize));
+	VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)(availbytes / log_blksize));
+	VFSATTR_RETURN(fsap, f_bavail, fsap->f_bfree);
+	VFSATTR_RETURN(fsap, f_bused, fsap->f_blocks - fsap->f_bfree);
+
+	/*
+	 * statvfs() should really be called statufs(), because it assumes
+	 * static metadata.  ZFS doesn't preallocate files, so the best
+	 * we can do is report the max that could possibly fit in f_files,
+	 * and that minus the number actually used in f_ffree.
+	 * For f_ffree, report the smaller of the number of object available
+	 * and the number of blocks (each object will take at least a block).
+	 */
+	VFSATTR_RETURN(fsap, f_ffree, (u_int64_t)MIN(availobjs, fsap->f_bfree));
+	VFSATTR_RETURN(fsap, f_files,  fsap->f_ffree + usedobjs);
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
+		fsap->f_fsid.val[0] = zfsvfs->z_rdev;
+		fsap->f_fsid.val[1] = vfs_typenum(mp);
+		VFSATTR_SET_SUPPORTED(fsap, f_fsid);
+	}
+	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
+		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
+		    VOL_CAP_FMT_PERSISTENTOBJECTIDS |
+		    VOL_CAP_FMT_HARDLINKS |		// ZFS
+		    VOL_CAP_FMT_SPARSE_FILES |	// ZFS
+		    VOL_CAP_FMT_2TB_FILESIZE |	// ZFS
+		    VOL_CAP_FMT_JOURNAL | VOL_CAP_FMT_JOURNAL_ACTIVE | // ZFS
+		    VOL_CAP_FMT_SYMBOLICLINKS | // msdos..
+			// ZFS has root times just fine
+			/* VOL_CAP_FMT_NO_ROOT_TIMES | */
+			// Ask XNU to remember zero-runs, instead of writing
+			// zeros to it.
+		    VOL_CAP_FMT_ZERO_RUNS |
+		    VOL_CAP_FMT_CASE_PRESERVING |
+		    VOL_CAP_FMT_FAST_STATFS |
+		    VOL_CAP_FMT_PATH_FROM_ID |
+		    VOL_CAP_FMT_64BIT_OBJECT_IDS |
+			/* VOL_CAP_FMT_DECMPFS_COMPRESSION | */
+		    VOL_CAP_FMT_HIDDEN_FILES;
+
+		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] =
+		    VOL_CAP_INT_ATTRLIST |		// ZFS
+		    VOL_CAP_INT_NFSEXPORT |		// ZFS
+		    VOL_CAP_INT_EXTENDED_SECURITY | // ZFS
+#if NAMEDSTREAMS
+		    VOL_CAP_INT_NAMEDSTREAMS |	// ZFS
+#endif
+		    VOL_CAP_INT_EXTENDED_ATTR |	// ZFS
+		    VOL_CAP_INT_VOL_RENAME |	// msdos..
+		    VOL_CAP_INT_ADVLOCK |
+			// ZFS does not yet have exchangedata (it's in a branch)
+			/* VOL_CAP_INT_EXCHANGEDATA| */
+			// ZFS does not yet have copyfile
+			/* VOL_CAP_INT_COPYFILE| */
+			// ZFS does not yet have allocate
+			/* VOL_CAP_INT_ALLOCATE| */
+		    VOL_CAP_INT_FLOCK;
+
+		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] =
+		    0;
+		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] =
+		    0;
+
+		/*
+		 * This is the list of valid capabilities at time of
+		 * compile. The valid list should have them all defined
+		 * and the "capability" list above should enable only
+		 * those we have implemented
+		 */
+		fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
+		    VOL_CAP_FMT_PERSISTENTOBJECTIDS |
+		    VOL_CAP_FMT_SYMBOLICLINKS |
+		    VOL_CAP_FMT_HARDLINKS |
+		    VOL_CAP_FMT_JOURNAL |
+		    VOL_CAP_FMT_JOURNAL_ACTIVE |
+		    VOL_CAP_FMT_NO_ROOT_TIMES |
+		    VOL_CAP_FMT_SPARSE_FILES |
+		    VOL_CAP_FMT_ZERO_RUNS |
+		    VOL_CAP_FMT_CASE_SENSITIVE |
+		    VOL_CAP_FMT_CASE_PRESERVING |
+		    VOL_CAP_FMT_FAST_STATFS |
+		    VOL_CAP_FMT_2TB_FILESIZE |
+		    VOL_CAP_FMT_OPENDENYMODES |
+		    VOL_CAP_FMT_PATH_FROM_ID |
+		    VOL_CAP_FMT_64BIT_OBJECT_IDS |
+		    VOL_CAP_FMT_NO_VOLUME_SIZES |
+		    VOL_CAP_FMT_DECMPFS_COMPRESSION |
+		    VOL_CAP_FMT_HIDDEN_FILES;
+		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
+		    VOL_CAP_INT_SEARCHFS |
+		    VOL_CAP_INT_ATTRLIST |
+		    VOL_CAP_INT_NFSEXPORT |
+		    VOL_CAP_INT_READDIRATTR |
+		    VOL_CAP_INT_EXCHANGEDATA |
+		    VOL_CAP_INT_COPYFILE |
+		    VOL_CAP_INT_ALLOCATE |
+		    VOL_CAP_INT_VOL_RENAME |
+		    VOL_CAP_INT_ADVLOCK |
+		    VOL_CAP_INT_FLOCK |
+		    VOL_CAP_INT_EXTENDED_ATTR |
+		    VOL_CAP_INT_USERACCESS |
+#if NAMEDSTREAMS
+		    VOL_CAP_INT_NAMEDSTREAMS |
+#endif
+		    VOL_CAP_INT_MANLOCK;
+
+		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
+		fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
+
+		/* Check if we are case-sensitive */
+		if (zfsvfs->z_case == ZFS_CASE_SENSITIVE)
+			fsap->f_capabilities.capabilities[
+			    VOL_CAPABILITIES_FORMAT] |=
+			    VOL_CAP_FMT_CASE_SENSITIVE;
+
+		/* Check if xattr is enabled */
+		if (zfsvfs->z_xattr == B_TRUE) {
+			fsap->f_capabilities.capabilities[
+			    VOL_CAPABILITIES_INTERFACES] |=
+			    VOL_CAP_INT_EXTENDED_ATTR;
+		}
+
+		// Check if mimic is on
+		struct vfsstatfs *vfsstatfs;
+		vfsstatfs = vfs_statfs(zfsvfs->z_vfs);
+		if (strcmp(vfsstatfs->f_fstypename, "hfs") == 0) {
+			fsap->f_capabilities.capabilities[
+			    VOL_CAPABILITIES_FORMAT] |=
+			    VOL_CAP_FMT_DECMPFS_COMPRESSION;
+		}
+
+		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
+	}
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
+		fsap->f_attributes.validattr.commonattr =
+		    ATTR_CMN_NAME	|
+		    ATTR_CMN_DEVID	|
+		    ATTR_CMN_FSID	|
+		    ATTR_CMN_OBJTYPE |
+		    ATTR_CMN_OBJTAG	|
+		    ATTR_CMN_OBJID	|
+		    ATTR_CMN_OBJPERMANENTID |
+		    ATTR_CMN_PAROBJID |
+		    /* ATTR_CMN_SCRIPT | */
+		    ATTR_CMN_CRTIME |
+		    ATTR_CMN_MODTIME |
+		    ATTR_CMN_CHGTIME |
+		    ATTR_CMN_ACCTIME |
+		    /* ATTR_CMN_BKUPTIME | */
+		    ATTR_CMN_FNDRINFO |
+		    ATTR_CMN_OWNERID |
+		    ATTR_CMN_GRPID	|
+		    ATTR_CMN_ACCESSMASK |
+		    ATTR_CMN_FLAGS	|
+		    ATTR_CMN_USERACCESS |
+		    ATTR_CMN_EXTENDED_SECURITY |
+		    ATTR_CMN_UUID |
+		    ATTR_CMN_GRPUUID |
+#ifdef ATTR_CMN_DOCUMENT_ID
+		    ATTR_CMN_DOCUMENT_ID |
+#endif
+#ifdef ATTR_CMN_GEN_COUNT
+		    ATTR_CMN_GEN_COUNT |
+#endif
+		    0;
+		fsap->f_attributes.validattr.volattr =
+		    ATTR_VOL_FSTYPE	|
+		    ATTR_VOL_SIGNATURE |
+		    ATTR_VOL_SIZE	|
+		    ATTR_VOL_SPACEFREE |
+		    ATTR_VOL_SPACEAVAIL |
+		    ATTR_VOL_MINALLOCATION |
+		    ATTR_VOL_ALLOCATIONCLUMP |
+		    ATTR_VOL_IOBLOCKSIZE |
+		    ATTR_VOL_OBJCOUNT |
+		    ATTR_VOL_FILECOUNT |
+		    ATTR_VOL_DIRCOUNT |
+		    ATTR_VOL_MAXOBJCOUNT |
+		    /* ATTR_VOL_MOUNTPOINT | */
+		    ATTR_VOL_NAME	|
+		    ATTR_VOL_MOUNTFLAGS |
+		    /* ATTR_VOL_MOUNTEDDEVICE | */
+		    /* ATTR_VOL_ENCODINGSUSED | */
+		    ATTR_VOL_CAPABILITIES |
+		    ATTR_VOL_ATTRIBUTES;
+		fsap->f_attributes.validattr.dirattr =
+		    ATTR_DIR_LINKCOUNT |
+		    ATTR_DIR_ENTRYCOUNT |
+		    ATTR_DIR_MOUNTSTATUS;
+		fsap->f_attributes.validattr.fileattr =
+		    ATTR_FILE_LINKCOUNT |
+		    ATTR_FILE_TOTALSIZE |
+		    ATTR_FILE_ALLOCSIZE |
+		    /* ATTR_FILE_IOBLOCKSIZE */
+		    ATTR_FILE_DEVTYPE |
+		    /* ATTR_FILE_FORKCOUNT */
+		    /* ATTR_FILE_FORKLIST */
+		    ATTR_FILE_DATALENGTH |
+		    ATTR_FILE_DATAALLOCSIZE |
+		    ATTR_FILE_RSRCLENGTH |
+		    ATTR_FILE_RSRCALLOCSIZE;
+		fsap->f_attributes.validattr.forkattr = 0;
+		fsap->f_attributes.nativeattr.commonattr =
+		    ATTR_CMN_NAME	|
+		    ATTR_CMN_DEVID	|
+		    ATTR_CMN_FSID	|
+		    ATTR_CMN_OBJTYPE |
+		    ATTR_CMN_OBJTAG	|
+		    ATTR_CMN_OBJID	|
+		    ATTR_CMN_OBJPERMANENTID |
+		    ATTR_CMN_PAROBJID |
+		    /* ATTR_CMN_SCRIPT | */
+		    ATTR_CMN_CRTIME |
+		    ATTR_CMN_MODTIME |
+		    /* ATTR_CMN_CHGTIME | */	/* Supported but not native */
+		    ATTR_CMN_ACCTIME |
+		    /* ATTR_CMN_BKUPTIME | */
+		    /* ATTR_CMN_FNDRINFO | */
+		    ATTR_CMN_OWNERID | 	/* Supported but not native */
+		    ATTR_CMN_GRPID	| 	/* Supported but not native */
+		    ATTR_CMN_ACCESSMASK | 	/* Supported but not native */
+		    ATTR_CMN_FLAGS	|
+		    ATTR_CMN_USERACCESS |
+		    ATTR_CMN_EXTENDED_SECURITY |
+		    ATTR_CMN_UUID |
+		    ATTR_CMN_GRPUUID |
+#ifdef ATTR_CMN_DOCUMENT_ID
+		    ATTR_CMN_DOCUMENT_ID |
+#endif
+#ifdef ATTR_CMN_GEN_COUNT
+		    ATTR_CMN_GEN_COUNT |
+#endif
+		    0;
+		fsap->f_attributes.nativeattr.volattr =
+		    ATTR_VOL_FSTYPE	|
+		    ATTR_VOL_SIGNATURE |
+		    ATTR_VOL_SIZE	|
+		    ATTR_VOL_SPACEFREE |
+		    ATTR_VOL_SPACEAVAIL |
+		    ATTR_VOL_MINALLOCATION |
+		    ATTR_VOL_ALLOCATIONCLUMP |
+		    ATTR_VOL_IOBLOCKSIZE |
+		    ATTR_VOL_OBJCOUNT |
+		    ATTR_VOL_FILECOUNT |
+		    ATTR_VOL_DIRCOUNT |
+		    ATTR_VOL_MAXOBJCOUNT |
+		    /* ATTR_VOL_MOUNTPOINT | */
+		    ATTR_VOL_NAME	|
+		    ATTR_VOL_MOUNTFLAGS |
+		    /* ATTR_VOL_MOUNTEDDEVICE | */
+		    /* ATTR_VOL_ENCODINGSUSED */
+		    ATTR_VOL_CAPABILITIES |
+		    ATTR_VOL_ATTRIBUTES;
+		fsap->f_attributes.nativeattr.dirattr = 0;
+		fsap->f_attributes.nativeattr.fileattr =
+		    /* ATTR_FILE_LINKCOUNT | */	/* Supported but not native */
+		    ATTR_FILE_TOTALSIZE |
+		    ATTR_FILE_ALLOCSIZE |
+		    /* ATTR_FILE_IOBLOCKSIZE */
+		    ATTR_FILE_DEVTYPE |
+		    /* ATTR_FILE_FORKCOUNT */
+		    /* ATTR_FILE_FORKLIST */
+		    ATTR_FILE_DATALENGTH |
+		    ATTR_FILE_DATAALLOCSIZE |
+		    ATTR_FILE_RSRCLENGTH |
+		    ATTR_FILE_RSRCALLOCSIZE;
+		fsap->f_attributes.nativeattr.forkattr = 0;
+
+		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
+	}
+	if (VFSATTR_IS_ACTIVE(fsap, f_create_time)) {
+		char osname[MAXNAMELEN];
+		uint64_t value;
+
+		// Get dataset name
+		dmu_objset_name(zfsvfs->z_os, osname);
+		dsl_prop_get_integer(osname, "CREATION",
+		    &value, NULL);
+		fsap->f_create_time.tv_sec  = value;
+		fsap->f_create_time.tv_nsec = 0;
+		VFSATTR_SET_SUPPORTED(fsap, f_create_time);
+	}
+	if (VFSATTR_IS_ACTIVE(fsap, f_modify_time)) {
+		timestruc_t  now;
+		uint64_t mtime[2];
+
+		gethrestime(&now);
+		ZFS_TIME_ENCODE(&now, mtime);
+		// fsap->f_modify_time = mtime;
+		ZFS_TIME_DECODE(&fsap->f_modify_time, mtime);
+
+		VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
+	}
+	/*
+	 * For Carbon compatibility, pretend to support this legacy/unused
+	 * attribute
+	 */
+	if (VFSATTR_IS_ACTIVE(fsap, f_backup_time)) {
+		fsap->f_backup_time.tv_sec = 0;
+		fsap->f_backup_time.tv_nsec = 0;
+		VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
+	}
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
+		char osname[MAXNAMELEN], *slash;
+		dmu_objset_name(zfsvfs->z_os, osname);
+
+		slash = strrchr(osname, '/');
+		if (slash) {
+			/* Advance past last slash */
+			slash += 1;
+		} else {
+			/* Copy whole osname (pool root) */
+			slash = osname;
+		}
+		strlcpy(fsap->f_vol_name, slash, MAXPATHLEN);
+
+		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
+		dprintf("vfs_getattr: volume name '%s'\n", fsap->f_vol_name);
+	}
+
+	/* If we are mimicking, we need userland know we are really ZFS */
+	VFSATTR_RETURN(fsap, f_fssubtype, MNTTYPE_ZFS_SUBTYPE);
+
+	/*
+	 * According to joshade over at
+	 * https://github.com/joshado/liberate-applefileserver/blob/
+	 * master/liberate.m
+	 * the following values need to be returned for it to be considered
+	 * by Apple's AFS.
+	 */
+	VFSATTR_RETURN(fsap, f_signature, 0x482b);  /* "H+" in ascii */
+	VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
+	// Make up a UUID here, based on the name
+	if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
+
+		char osname[MAXNAMELEN];
+		int error;
+		// Get dataset name
+		dmu_objset_name(zfsvfs->z_os, osname);
+		dprintf("%s: osname [%s]\n", __func__, osname);
+
+		if ((error = zfs_vfs_uuid_gen(osname,
+		    fsap->f_uuid)) != 0) {
+			dprintf("%s uuid_gen error %d\n", __func__, error);
+		} else {
+			/* return f_uuid in fsap */
+			VFSATTR_SET_SUPPORTED(fsap, f_uuid);
+		}
+	}
+
+	uint64_t missing = 0;
+	missing = (fsap->f_active ^ (fsap->f_active & fsap->f_supported));
+	if (missing != 0) {
+		dprintf("%s: asked %08llx reply %08llx missing %08llx\n",
+		    __func__, fsap->f_active, fsap->f_supported,
+		    missing);
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (0);
+}
+
+int
+zfs_vnode_lock(vnode_t *vp, int flags)
+{
+	int error;
+
+	ASSERT(vp != NULL);
+
+	error = vn_lock(vp, flags);
+	return (error);
+}
+
+/*
+ * The ARC has requested that the filesystem drop entries from the dentry
+ * and inode caches.  This can occur when the ARC needs to free meta data
+ * blocks but can't because they are all pinned by entries in these caches.
+ */
+
+/* Get vnode for the root object of this mount */
+int
+zfs_vfs_root(struct mount *mp, vnode_t **vpp, __unused vfs_context_t context)
+{
+	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
+	znode_t *rootzp = NULL;
+	int error;
+
+	if (!zfsvfs) {
+		struct vfsstatfs *stat = 0;
+		if (mp) stat = vfs_statfs(mp);
+		if (stat)
+			printf("%s mp on %s from %s\n", __func__,
+			    stat->f_mntonname, stat->f_mntfromname);
+		printf("%s no zfsvfs yet for mp\n", __func__);
+		return (EINVAL);
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	if (error == 0)
+		*vpp = ZTOV(rootzp);
+	else
+		*vpp = NULL;
+
+	ZFS_EXIT(zfsvfs);
+
+	if (error == 0 && *vpp != NULL)
+		if (vnode_vtype(*vpp) != VDIR) {
+			panic("%s: not a directory\n", __func__);
+		}
+
+	return (error);
+}
+
+/*
+ * Teardown the zfsvfs::z_os.
+ *
+ * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
+ * and 'z_teardown_inactive_lock' held.
+ */
+static int
+zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
+{
+	znode_t	*zp;
+	/*
+	 * We have experienced deadlocks with dmu_recv_end happening between
+	 * suspend_fs() and resume_fs(). Clearly something is not quite ready
+	 * so we will wait for pools to be synced first.
+	 * This is considered a temporary solution until we can work out
+	 * the full issue.
+	 */
+
+	zfs_unlinked_drain_stop_wait(zfsvfs);
+
+	/*
+	 * If someone has not already unmounted this file system,
+	 * drain the iput_taskq to ensure all active references to the
+	 * zfs_sb_t have been handled only then can it be safely destroyed.
+	 */
+	if (zfsvfs->z_os) {
+		/*
+		 * If we're unmounting we have to wait for the list to
+		 * drain completely.
+		 *
+		 * If we're not unmounting there's no guarantee the list
+		 * will drain completely, but iputs run from the taskq
+		 * may add the parents of dir-based xattrs to the taskq
+		 * so we want to wait for these.
+		 *
+		 * We can safely read z_nr_znodes without locking because the
+		 * VFS has already blocked operations which add to the
+		 * z_all_znodes list and thus increment z_nr_znodes.
+		 */
+		int round = 0;
+		while (!list_empty(&zfsvfs->z_all_znodes)) {
+			taskq_wait_outstanding(dsl_pool_zrele_taskq(
+			    dmu_objset_pool(zfsvfs->z_os)), 0);
+			if (++round > 1 && !unmounting)
+				break;
+			break; /* Only loop once - osx can get stuck */
+		}
+	}
+
+	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+
+	if (!unmounting) {
+		/*
+		 * We purge the parent filesystem's vfsp as the parent
+		 * filesystem and all of its snapshots have their vnode's
+		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
+		 * 'z_parent' is self referential for non-snapshots.
+		 */
+		cache_purgevfs(zfsvfs->z_parent->z_vfs);
+	}
+
+	/*
+	 * Close the zil. NB: Can't close the zil while zfs_inactive
+	 * threads are blocked as zil_close can call zfs_inactive.
+	 */
+	if (zfsvfs->z_log) {
+		zil_close(zfsvfs->z_log);
+		zfsvfs->z_log = NULL;
+	}
+
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
+
+	/*
+	 * If we are not unmounting (ie: online recv) and someone already
+	 * unmounted this file system while we were doing the switcheroo,
+	 * or a reopen of z_os failed then just bail out now.
+	 */
+	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
+		return (SET_ERROR(EIO));
+	}
+	/*
+	 * At this point there are no VFS ops active, and any new VFS ops
+	 * will fail with EIO since we have z_teardown_lock for writer (only
+	 * relevant for forced unmount).
+	 *
+	 * Release all holds on dbufs. We also grab an extra reference to all
+	 * the remaining inodes so that the kernel does not attempt to free
+	 * any inodes of a suspended fs. This can cause deadlocks since the
+	 * zfs_resume_fs() process may involve starting threads, which might
+	 * attempt to free unreferenced inodes to free up memory for the new
+	 * thread.
+	 */
+	if (!unmounting) {
+		mutex_enter(&zfsvfs->z_znodes_lock);
+		for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
+		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+			if (zp->z_sa_hdl)
+				zfs_znode_dmu_fini(zp);
+			if (VN_HOLD(ZTOV(zp)) == 0) {
+				vnode_ref(ZTOV(zp));
+				zp->z_suspended = B_TRUE;
+				VN_RELE(ZTOV(zp));
+			}
+		}
+		mutex_exit(&zfsvfs->z_znodes_lock);
+	}
+
+	/*
+	 * If we are unmounting, set the unmounted flag and let new VFS ops
+	 * unblock.  zfs_inactive will have the unmounted behavior, and all
+	 * other VFS ops will fail with EIO.
+	 */
+	if (unmounting) {
+		zfsvfs->z_unmounted = B_TRUE;
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
+	}
+
+	/*
+	 * z_os will be NULL if there was an error in attempting to reopen
+	 * zfsvfs, so just return as the properties had already been
+	 * unregistered and cached data had been evicted before.
+	 */
+	if (zfsvfs->z_os == NULL)
+		return (0);
+
+	/*
+	 * Unregister properties.
+	 */
+	zfs_unregister_callbacks(zfsvfs);
+
+	/*
+	 * Evict cached data
+	 */
+	/*
+	 * Evict cached data. We must write out any dirty data before
+	 * disowning the dataset.
+	 */
+	objset_t *os = zfsvfs->z_os;
+	boolean_t os_dirty = B_FALSE;
+	for (int t = 0; t < TXG_SIZE; t++) {
+		if (dmu_objset_is_dirty(os, t)) {
+			os_dirty = B_TRUE;
+			break;
+		}
+	}
+	if (!zfs_is_readonly(zfsvfs) && os_dirty) {
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+	}
+	dmu_objset_evict_dbufs(zfsvfs->z_os);
+	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
+	dsl_dir_cancel_waiters(dd);
+
+	return (0);
+}
+
+int
+zfs_vfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
+{
+	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
+	objset_t *os;
+	char osname[MAXNAMELEN];
+	int ret;
+	/* cred_t *cr = (cred_t *)vfs_context_ucred(context); */
+	int destroyed_zfsctl = 0;
+
+	dprintf("%s\n", __func__);
+
+	zfs_unlinked_drain_stop_wait(zfsvfs);
+
+	/* Save osname for later */
+	dmu_objset_name(zfsvfs->z_os, osname);
+
+	/*
+	 * We might skip the sync called in the unmount path, since
+	 * zfs_vfs_sync() is generally ignoring xnu's calls, and alas,
+	 * mount_isforce() is set AFTER that sync call, so we can not
+	 * detect unmount is inflight. But why not just sync now, it
+	 * is safe. Optionally, sync if (mount_isforce());
+	 */
+	spa_sync_allpools();
+
+	/*
+	 * We purge the parent filesystem's vfsp as the parent filesystem
+	 * and all of its snapshots have their vnode's v_vfsp set to the
+	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
+	 * referential for non-snapshots.
+	 */
+	cache_purgevfs(zfsvfs->z_parent->z_vfs);
+
+	/*
+	 * Unmount any snapshots mounted under .zfs before unmounting the
+	 * dataset itself.
+	 *
+	 * Unfortunately, XNU will check for mounts in preflight, and
+	 * simply not call us at all if snapshots are mounted.
+	 * We expect userland to unmount snapshots now.
+	 */
+
+	ret = vflush(mp, NULLVP, SKIPSYSTEM);
+
+	if (mntflags & MNT_FORCE) {
+		/*
+		 * Mark file system as unmounted before calling
+		 * vflush(FORCECLOSE). This way we ensure no future vnops
+		 * will be called and risk operating on DOOMED vnodes.
+		 */
+		rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+		zfsvfs->z_unmounted = B_TRUE;
+		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
+	}
+
+	/*
+	 * We must release ctldir before vflush on osx.
+	 */
+	if (zfsvfs->z_ctldir != NULL) {
+		destroyed_zfsctl = 1;
+		zfsctl_destroy(zfsvfs);
+	}
+
+	/*
+	 * Flush all the files.
+	 */
+	ret = vflush(mp, NULLVP,
+	    (mntflags & MNT_FORCE) ? FORCECLOSE|SKIPSYSTEM : SKIPSYSTEM);
+
+	if ((ret != 0) && !(mntflags & MNT_FORCE)) {
+		if (destroyed_zfsctl)
+			zfsctl_create(zfsvfs);
+		return (ret);
+	}
+
+	/* If we are ourselves a snapshot */
+	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
+		/* Wake up anyone waiting for unmount */
+		zfsctl_mount_signal(osname, B_FALSE);
+	}
+
+	if (!vfs_isrdonly(zfsvfs->z_vfs) &&
+	    spa_writeable(dmu_objset_spa(zfsvfs->z_os)) &&
+	    !(mntflags & MNT_FORCE)) {
+		/* Update the last-unmount time for Spotlight's next mount */
+		timestruc_t  now;
+		dmu_tx_t *tx;
+		int error;
+		uint64_t value;
+
+		dprintf("ZFS: '%s' Updating spotlight LASTUNMOUNT property\n",
+		    osname);
+
+		gethrestime(&now);
+		zfsvfs->z_last_unmount_time = now.tv_sec;
+
+		tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE, NULL);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			value = zfsvfs->z_last_unmount_time;
+			error = zap_update(zfsvfs->z_os, MASTER_NODE_OBJ,
+			    zfs_prop_to_name(ZFS_PROP_LASTUNMOUNT),
+			    8, 1,
+			    &value, tx);
+			dmu_tx_commit(tx);
+		}
+		dprintf("ZFS: '%s' set lastunmount to 0x%lx (%d)\n",
+		    osname, zfsvfs->z_last_unmount_time, error);
+	}
+
+	/*
+	 * Last chance to dump unreferenced system files.
+	 */
+	(void) vflush(mp, NULLVP, FORCECLOSE);
+
+	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
+	os = zfsvfs->z_os;
+
+	/*
+	 * z_os will be NULL if there was an error in
+	 * attempting to reopen zfsvfs.
+	 */
+	if (os != NULL) {
+		/*
+		 * Unset the objset user_ptr.
+		 */
+		mutex_enter(&os->os_user_ptr_lock);
+		dmu_objset_set_user(os, NULL);
+		mutex_exit(&os->os_user_ptr_lock);
+
+		/*
+		 * Finally release the objset
+		 */
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
+	}
+
+	zfs_freevfs(zfsvfs->z_vfs);
+
+	dprintf("zfs_osx_proxy_remove");
+	zfs_osx_proxy_remove(osname);
+
+	return (0);
+}
+
+static int
+zfs_vget_internal(zfsvfs_t *zfsvfs, ino64_t ino, vnode_t **vpp)
+{
+	znode_t		*zp;
+	int 		err;
+
+	printf("vget get %llu\n", ino);
+
+	/*
+	 * Check to see if we expect to find this in the hardlink avl tree of
+	 * hashes. Use the MSB set high as indicator.
+	 */
+	hardlinks_t *findnode = NULL;
+	if ((1ULL<<31) & ino) {
+		hardlinks_t *searchnode;
+		avl_index_t loc;
+
+		searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+
+		dprintf("ZFS: vget looking for (%llx,%llu)\n", ino, ino);
+
+		searchnode->hl_linkid = ino;
+
+		rw_enter(&zfsvfs->z_hardlinks_lock, RW_READER);
+		findnode = avl_find(&zfsvfs->z_hardlinks_linkid, searchnode,
+		    &loc);
+		rw_exit(&zfsvfs->z_hardlinks_lock);
+
+		kmem_free(searchnode, sizeof (hardlinks_t));
+
+		if (findnode) {
+			dprintf("ZFS: vget found (%llu, %llu, %u): '%s'\n",
+			    findnode->hl_parent,
+			    findnode->hl_fileid, findnode->hl_linkid,
+			    findnode->hl_name);
+			// Lookup the actual zp instead
+			ino = findnode->hl_fileid;
+		} // findnode
+	} // MSB set
+
+
+	/* We can not be locked during zget. */
+	if (!ino) {
+		dprintf("%s: setting ino from %lld to 2\n", __func__, ino);
+		ino = 2;
+	}
+
+	err = zfs_zget(zfsvfs, ino, &zp);
+
+	if (err) {
+		dprintf("zget failed %d\n", err);
+		return (err);
+	}
+
+	/* Don't expose EA objects! */
+	if (zp->z_pflags & ZFS_XATTR) {
+		err = ENOENT;
+		goto out;
+	}
+	if (zp->z_unlinked) {
+		err = EINVAL;
+		goto out;
+	}
+
+	*vpp = ZTOV(zp);
+
+	err = zfs_vnode_lock(*vpp, 0 /* flags */);
+
+	/*
+	 * Spotlight requires that vap->va_name() is set when returning
+	 * from vfs_vget, so that vfs_getrealpath() can succeed in returning
+	 * a path to mds.
+	 */
+	char *name = kmem_alloc(MAXPATHLEN + 2, KM_SLEEP);
+
+	/* Root can't lookup in ZAP */
+	if (zp->z_id == zfsvfs->z_root) {
+
+		dmu_objset_name(zfsvfs->z_os, name);
+		dprintf("vget: set root '%s'\n", name);
+		vnode_update_identity(*vpp, NULL, name,
+		    strlen(name), 0, VNODE_UPDATE_NAME);
+
+	} else {
+		uint64_t parent;
+
+		// if its a hardlink cache
+		if (findnode) {
+
+			dprintf("vget: updating vnode to '%s' parent %llu\n",
+			    findnode->hl_name, findnode->hl_parent);
+
+			vnode_update_identity(*vpp,
+			    NULL, findnode->hl_name,
+			    strlen(findnode->hl_name), 0,
+			    VNODE_UPDATE_NAME|VNODE_UPDATE_PARENT);
+			mutex_enter(&zp->z_lock);
+			strlcpy(zp->z_name_cache, findnode->hl_name, PATH_MAX);
+			zp->z_finder_parentid = findnode->hl_parent;
+			mutex_exit(&zp->z_lock);
+
+
+		// If we already have the name, cached in zfs_vnop_lookup
+		} else if (zp->z_name_cache[0]) {
+			dprintf("vget: cached name '%s'\n", zp->z_name_cache);
+			vnode_update_identity(*vpp, NULL, zp->z_name_cache,
+			    strlen(zp->z_name_cache), 0,
+			    VNODE_UPDATE_NAME);
+
+			/* If needed, if findnode is set, update the parentid */
+
+		} else {
+
+			/* Lookup name from ID, grab parent */
+			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+			    &parent, sizeof (parent)) == 0);
+
+			if (zap_value_search(zfsvfs->z_os, parent, zp->z_id,
+			    ZFS_DIRENT_OBJ(-1ULL), name) == 0) {
+
+				dprintf("vget: set name '%s'\n", name);
+				vnode_update_identity(*vpp, NULL, name,
+				    strlen(name), 0,
+				    VNODE_UPDATE_NAME);
+			} else {
+				dprintf("vget: unable to get name for %llu\n",
+				    zp->z_id);
+			} // !zap_search
+		}
+	} // rootid
+
+	kmem_free(name, MAXPATHLEN + 2);
+
+out:
+
+	if (err != 0) {
+		VN_RELE(ZTOV(zp));
+		*vpp = NULL;
+	}
+
+	dprintf("vget return %d\n", err);
+	return (err);
+}
+
+/*
+ * Get a vnode from a file id (ignoring the generation)
+ *
+ * Use by NFS Server (readdirplus) and VFS (build_path)
+ */
+int
+zfs_vfs_vget(struct mount *mp, ino64_t ino, vnode_t **vpp,
+    __unused vfs_context_t context)
+{
+	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
+	int error;
+
+	printf("%s: %llu\n", __func__, ino);
+
+	ZFS_ENTER(zfsvfs);
+
+	/* We also need to handle (.zfs) and (.zfs/snapshot). */
+	if ((ino == ZFSCTL_INO_ROOT) && (zfsvfs->z_ctldir != NULL)) {
+		if (VN_HOLD(zfsvfs->z_ctldir) == 0) {
+			*vpp = zfsvfs->z_ctldir;
+			error = 0;
+		} else {
+			error = ENOENT;
+		}
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * This one is trickier, we have no reference to it, but it is
+	 * in the all list. A little expensive to search list, but at
+	 * least "snapshot" is infrequently accessed
+	 * We also need to check if it is a ".zfs/snapshot/$name" entry -
+	 * luckily we keep the "lowest" ID seen, so we only need to check
+	 * when it is in the range.
+	 */
+	if (zfsvfs->z_ctldir != NULL) {
+
+		/*
+		 * Either it is the snapdir itself, or one of the snapshot
+		 * directories inside it
+		 */
+		if ((ino == ZFSCTL_INO_SNAPDIR) ||
+		    ((ino >= zfsvfs->z_ctldir_startid) &&
+		    (ino <= ZFSCTL_INO_SNAPDIRS))) {
+			znode_t *zp;
+
+			mutex_enter(&zfsvfs->z_znodes_lock);
+			for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+			    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+				if (zp->z_id == ino)
+					break;
+				if (zp->z_id == ZFSCTL_INO_SHARES - ino)
+					break;
+			}
+			mutex_exit(&zfsvfs->z_znodes_lock);
+
+			error = ENOENT;
+			if (zp != NULL) {
+				if (VN_HOLD(ZTOV(zp)) == 0) {
+					*vpp = ZTOV(zp);
+					error = 0;
+				}
+			}
+
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	/*
+	 * On Mac OS X we always export the root directory id as 2.
+	 * So we don't expect to see the real root directory id
+	 * from zfs_vfs_vget KPI (unless of course the real id was
+	 * already 2).
+	 */
+	ino = INO_XNUTOZFS(ino, zfsvfs->z_root);
+
+	error = zfs_vget_internal(zfsvfs, ino, vpp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+int
+zfs_vfs_setattr(__unused struct mount *mp, __unused struct vfs_attr *fsap,
+    __unused vfs_context_t context)
+{
+	// 10a286 bits has an implementation of this: to set volume name.
+	return (ENOTSUP);
+}
+
+/*
+ * NFS Server File Handle File ID
+ */
+typedef struct zfs_zfid {
+	uint8_t   zf_object[8];		/* obj[i] = obj >> (8 * i) */
+	uint8_t   zf_gen[8];		/* gen[i] = gen >> (8 * i) */
+} zfs_zfid_t;
+
+/*
+ * File handle to vnode pointer
+ */
+int
+zfs_vfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp,
+    vnode_t **vpp, __unused vfs_context_t context)
+{
+	dprintf("%s\n", __func__);
+	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
+	zfs_zfid_t	*zfid = (zfs_zfid_t *)fhp;
+	znode_t		*zp;
+	uint64_t	obj_num = 0;
+	uint64_t	fid_gen = 0;
+	uint64_t	zp_gen;
+	int 		i;
+	int		error;
+
+	*vpp = NULL;
+
+	ZFS_ENTER(zfsvfs);
+
+	if (fhlen < sizeof (zfs_zfid_t)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Grab the object and gen numbers in an endian neutral manner
+	 */
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		obj_num |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
+
+	if ((error = zfs_zget(zfsvfs, obj_num, &zp))) {
+		goto out;
+	}
+
+	zp_gen = zp->z_gen;
+	if (zp_gen == 0)
+		zp_gen = 1;
+
+	if (zp->z_unlinked || zp_gen != fid_gen) {
+		vnode_put(ZTOV(zp));
+		error = EINVAL;
+		goto out;
+	}
+	*vpp = ZTOV(zp);
+out:
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Vnode pointer to File handle
+ *
+ * XXX Do we want to check the DSL sharenfs property?
+ */
+int
+zfs_vfs_vptofh(vnode_t *vp, int *fhlenp, unsigned char *fhp,
+    __unused vfs_context_t context)
+{
+	dprintf("%s\n", __func__);
+	zfsvfs_t	*zfsvfs = vfs_fsprivate(vnode_mount(vp));
+	zfs_zfid_t	*zfid = (zfs_zfid_t *)fhp;
+	znode_t		*zp = VTOZ(vp);
+	uint64_t	obj_num;
+	uint64_t	zp_gen;
+	int		i;
+
+	if (*fhlenp < sizeof (zfs_zfid_t)) {
+		return (EOVERFLOW);
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	obj_num = zp->z_id;
+	zp_gen = zp->z_gen;
+	if (zp_gen == 0)
+		zp_gen = 1;
+
+	/*
+	 * Store the object and gen numbers in an endian neutral manner
+	 */
+	for (i = 0; i < sizeof (zfid->zf_object); i++)
+		zfid->zf_object[i] = (uint8_t)(obj_num >> (8 * i));
+
+	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+		zfid->zf_gen[i] = (uint8_t)(zp_gen >> (8 * i));
+
+	*fhlenp = sizeof (zfs_zfid_t);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/*
+ * Block out VOPs and close zfsvfs_t::z_os
+ *
+ * Note, if successful, then we return with the 'z_teardown_lock' and
+ * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
+ * dataset and objset intact so that they can be atomically handed off during
+ * a subsequent rollback or recv operation and the resume thereafter.
+ */
+int
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
+{
+	int error;
+
+	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
+		return (error);
+
+	return (0);
+}
+
+/*
+ * Reopen zfsvfs_t::z_os and release VOPs.
+ */
+int
+zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	int err, err2;
+	znode_t *zp;
+
+	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
+	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+
+	/*
+	 * We already own this, so just update the objset_t, as the one we
+	 * had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+
+	err = zfsvfs_init(zfsvfs, os);
+	if (err != 0)
+		goto bail;
+
+	ds->ds_dir->dd_activity_cancelled = B_FALSE;
+	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
+
+	zfs_set_fuid_feature(zfsvfs);
+
+	/*
+	 * Attempt to re-establish all the active inodes with their
+	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
+	 * and mark it stale.  This prevents a collision if a new
+	 * inode/object is created which must use the same inode
+	 * number.  The stale inode will be be released when the
+	 * VFS prunes the dentry holding the remaining references
+	 * on the stale inode.
+	 */
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+		err2 = zfs_rezget(zp);
+		if (err2) {
+			zp->z_is_stale = B_TRUE;
+		}
+
+		/* see comment in zfs_suspend_fs() */
+		if (zp->z_suspended) {
+			if (vnode_getwithref(ZTOV(zp)) == 0) {
+				vnode_rele(ZTOV(zp));
+				zfs_zrele_async(zp);
+				zp->z_suspended = B_FALSE;
+			}
+		}
+	}
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	if (!vfs_isrdonly(zfsvfs->z_vfs) && !zfsvfs->z_unmounted) {
+		/*
+		 * zfs_suspend_fs() could have interrupted freeing
+		 * of dnodes. We need to restart this freeing so
+		 * that we don't "leak" the space.
+		 */
+		zfs_unlinked_drain(zfsvfs);
+	}
+
+	cache_purgevfs(zfsvfs->z_parent->z_vfs);
+
+bail:
+	/* release the VFS ops */
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
+
+	if (err) {
+		/*
+		 * Since we couldn't setup the sa framework, try to force
+		 * unmount this file system.
+		 */
+		if (zfsvfs->z_os)
+			zfs_vfs_unmount(zfsvfs->z_vfs, 0, NULL);
+	}
+	return (err);
+}
+
+
+void
+zfs_freevfs(struct mount *vfsp)
+{
+	zfsvfs_t *zfsvfs = vfs_fsprivate(vfsp);
+
+	dprintf("+freevfs\n");
+
+	vfs_setfsprivate(vfsp, NULL);
+
+	zfsvfs_free(zfsvfs);
+
+	atomic_dec_32(&zfs_active_fs_count);
+	dprintf("-freevfs\n");
+}
+
+void
+zfs_init(void)
+{
+
+	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
+
+	/*
+	 * Initialize .zfs directory structures
+	 */
+	zfsctl_init();
+
+	/*
+	 * Initialize znode cache, vnode ops, etc...
+	 */
+	zfs_znode_init();
+
+	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
+
+	/* Start arc_os - reclaim thread */
+	arc_os_init();
+
+}
+
+void
+zfs_fini(void)
+{
+	arc_os_fini();
+	zfsctl_fini();
+	zfs_znode_fini();
+}
+
+int
+zfs_busy(void)
+{
+	return (zfs_active_fs_count != 0);
+}
+
+/*
+ * Release VOPs and unmount a suspended filesystem.
+ */
+int
+zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
+{
+	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
+	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
+
+	/*
+	 * We already own this, so just hold and rele it to update the
+	 * objset_t, as the one we had before may have been evicted.
+	 */
+	objset_t *os;
+	VERIFY3P(ds->ds_owner, ==, zfsvfs);
+	VERIFY(dsl_dataset_long_held(ds));
+	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
+	dsl_pool_config_enter(dp, FTAG);
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	dsl_pool_config_exit(dp, FTAG);
+	zfsvfs->z_os = os;
+
+	/* release the VOPs */
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
+
+	/*
+	 * Try to force unmount this file system.
+	 */
+	zfs_vfs_unmount(zfsvfs->z_vfs, 0, NULL);
+	zfsvfs->z_unmounted = B_TRUE;
+	return (0);
+}
+
+int
+zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
+{
+	int error;
+	objset_t *os = zfsvfs->z_os;
+	dmu_tx_t *tx;
+
+	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
+		return (SET_ERROR(EINVAL));
+
+	if (newvers < zfsvfs->z_version)
+		return (SET_ERROR(EINVAL));
+
+	if (zfs_spa_version_map(newvers) >
+	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
+		return (SET_ERROR(ENOTSUP));
+
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    ZFS_SA_ATTRS);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	}
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		return (error);
+	}
+
+	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+	    8, 1, &newvers, tx);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		return (error);
+	}
+
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		uint64_t sa_obj;
+
+		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+		    SPA_VERSION_SA);
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+
+		error = zap_add(os, MASTER_NODE_OBJ,
+		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT(error == 0);
+
+		VERIFY(0 == sa_set_sa_object(os, sa_obj));
+		sa_register_update_callback(os, zfs_sa_upgrade);
+	}
+
+	spa_history_log_internal(dmu_objset_spa(os), "upgrade", tx,
+	    "oldver=%llu newver=%llu dataset = %llu", zfsvfs->z_version,
+	    newvers, dmu_objset_id(os));
+
+	dmu_tx_commit(tx);
+
+	zfsvfs->z_version = newvers;
+	os->os_version = newvers;
+
+	zfs_set_fuid_feature(zfsvfs);
+
+	return (0);
+}
+
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+			case ZFS_PROP_VERSION:
+				cached_copy = &os->os_version;
+				break;
+			case ZFS_PROP_NORMALIZE:
+				cached_copy = &os->os_normalization;
+				break;
+			case ZFS_PROP_UTF8ONLY:
+				cached_copy = &os->os_utf8only;
+				break;
+			case ZFS_PROP_CASE:
+				cached_copy = &os->os_casesensitivity;
+				break;
+			default:
+				break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION) {
+		pname = ZPL_VERSION_STR;
+	} else {
+		pname = zfs_prop_to_name(prop);
+	}
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLMODE:
+			*value = ZFS_ACLTYPE_OFF;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
+/*
+ * Return true if the coresponding vfs's unmounted flag is set.
+ * Otherwise return false.
+ * If this function returns true we know VFS unmount has been initiated.
+ */
+boolean_t
+zfs_get_vfs_flag_unmounted(objset_t *os)
+{
+	zfsvfs_t *zfvp;
+	boolean_t unmounted = B_FALSE;
+
+	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
+
+	mutex_enter(&os->os_user_ptr_lock);
+	zfvp = dmu_objset_get_user(os);
+	if (zfvp != NULL && zfvp->z_vfs != NULL &&
+	    (vfs_isunmount(zfvp->z_vfs)))
+		unmounted = B_TRUE;
+	mutex_exit(&os->os_user_ptr_lock);
+
+	return (unmounted);
+}
diff --git a/module/os/macos/zfs/zfs_vnops.c b/module/os/macos/zfs/zfs_vnops.c
new file mode 100644
index 0000000000..de33bbaeae
--- /dev/null
+++ b/module/os/macos/zfs/zfs_vnops.c
@@ -0,0 +1,4563 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+/* Portions Copyright 2010 Robert Milkowski */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/dirent.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/taskq.h>
+#include <sys/uio.h>
+#include <sys/vmsystm.h>
+#include <sys/atomic.h>
+#include <sys/pathname.h>
+#include <sys/cmn_err.h>
+#include <sys/errno.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/dbuf.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+#include <sys/sid.h>
+#include <sys/zfs_ctldir.h>
+#include <sys/zfs_fuid.h>
+#include <sys/zfs_quota.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfs_rlock.h>
+#include <sys/cred.h>
+#include <sys/zpl.h>
+#include <sys/zil.h>
+#include <sys/sa_impl.h>
+#include <sys/utfconv.h>
+
+int zfs_vnop_force_formd_normalized_output = 0; /* disabled by default */
+
+#undef dprintf
+#define	dprintf printf
+
+/*
+ * Programming rules.
+ *
+ * Each vnode op performs some logical unit of work.  To do this, the ZPL must
+ * properly lock its in-core state, create a DMU transaction, do the work,
+ * record this work in the intent log (ZIL), commit the DMU transaction,
+ * and wait for the intent log to commit if it is a synchronous operation.
+ * Moreover, the vnode ops must work in both normal and log replay context.
+ * The ordering of events is important to avoid deadlocks and references
+ * to freed memory.  The example below illustrates the following Big Rules:
+ *
+ *  (1) A check must be made in each zfs thread for a mounted file system.
+ *	This is done avoiding races using ZFS_ENTER(zfsvfs).
+ *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *      can return EIO from the calling function.
+ *
+ *  (2)	zrele() should always be the last thing except for zil_commit()
+ *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
+ *	First, if it's the last reference, the vnode/znode
+ *	can be freed, so the zp may point to freed memory.  Second, the last
+ *	reference will call zfs_zinactive(), which may induce a lot of work --
+ *	pushing cached pages (which acquires range locks) and syncing out
+ *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
+ *	which could deadlock the system if you were already holding one.
+ *	If you must call zrele() within a tx then use zfs_zrele_async().
+ *
+ *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
+ *	as they can span dmu_tx_assign() calls.
+ *
+ *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
+ *      dmu_tx_assign().  This is critical because we don't want to block
+ *      while holding locks.
+ *
+ *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	reduces lock contention and CPU usage when we must wait (note that if
+ *	throughput is constrained by the storage, nearly every transaction
+ *	must wait).
+ *
+ *      Note, in particular, that if a lock is sometimes acquired before
+ *      the tx assigns, and sometimes after (e.g. z_lock), then failing
+ *      to use a non-blocking assign can deadlock the system.  The scenario:
+ *
+ *	Thread A has grabbed a lock before calling dmu_tx_assign().
+ *	Thread B is in an already-assigned tx, and blocks for this lock.
+ *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
+ *	forever, because the previous txg can't quiesce until B's tx commits.
+ *
+ *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
+ *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
+ *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
+ *	to indicate that this operation has already called dmu_tx_wait().
+ *	This will ensure that we don't retry forever, waiting a short bit
+ *	each time.
+ *
+ *  (5)	If the operation succeeded, generate the intent log entry for it
+ *	before dropping locks.  This ensures that the ordering of events
+ *	in the intent log matches the order in which they actually occurred.
+ *	During ZIL replay the zfs_log_* functions will update the sequence
+ *	number to indicate the zil transaction has replayed.
+ *
+ *  (6)	At the end of each vnode op, the DMU tx must always commit,
+ *	regardless of whether there were any errors.
+ *
+ *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
+ *	to ensure that synchronous semantics are provided when necessary.
+ *
+ * In general, this is how things should be ordered in each vnode op:
+ *
+ *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ * top:
+ *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
+ *	rw_enter(...);			// grab any other locks you need
+ *	tx = dmu_tx_create(...);	// get DMU tx
+ *	dmu_tx_hold_*();		// hold each object you might modify
+ *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ *	if (error) {
+ *		rw_exit(...);		// drop locks
+ *		zfs_dirent_unlock(dl);	// unlock directory entry
+ *		zrele(...);		// release held znodes
+ *		if (error == ERESTART) {
+ *			waited = B_TRUE;
+ *			dmu_tx_wait(tx);
+ *			dmu_tx_abort(tx);
+ *			goto top;
+ *		}
+ *		dmu_tx_abort(tx);	// abort DMU tx
+ *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		return (error);		// really out of space
+ *	}
+ *	error = do_real_work();		// do whatever this VOP does
+ *	if (error == 0)
+ *		zfs_log_*(...);		// on success, make ZIL entry
+ *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
+ *	rw_exit(...);			// drop locks
+ *	zfs_dirent_unlock(dl);		// unlock directory entry
+ *	zrele(...);			// release held znodes
+ *	zil_commit(zilog, foid);	// synchronous when necessary
+ *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	return (error);			// done, report error
+ */
+
+/*
+ * Virus scanning is unsupported.  It would be possible to add a hook
+ * here to performance the required virus scan.  This could be done
+ * entirely in the kernel or potentially as an update to invoke a
+ * scanning utility.
+ */
+static int
+zfs_vscan(struct vnode *vp, cred_t *cr, int async)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_open(struct vnode *vp, int mode, int flag, cred_t *cr)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ITOZSB(vp);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/* Honor ZFS_APPENDONLY file attribute */
+	if ((mode & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	    ((flag & O_APPEND) == 0)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	/* Virus scan eligible files on open */
+	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(zp->z_mode) &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
+		if (zfs_vscan(vp, cr, 0) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(EACCES));
+		}
+	}
+
+	/* Keep a count of the synchronous opens in the znode */
+	if (flag & (FSYNC | FDSYNC))
+		atomic_inc_32(&zp->z_sync_cnt);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_close(struct vnode *vp, int flag, cred_t *cr)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ITOZSB(vp);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/* Decrement the synchronous opens in the znode */
+	if (flag & (FSYNC | FDSYNC))
+		atomic_dec_32(&zp->z_sync_cnt);
+
+	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(zp->z_mode) &&
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
+		VERIFY(zfs_vscan(vp, cr, 1) == 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+/*
+ * Lseek support for finding holes (cmd == SEEK_HOLE) and
+ * data (cmd == SEEK_DATA). "off" is an in/out parameter.
+ */
+static int
+zfs_holey_common(struct vnode *vp, int cmd, loff_t *off)
+{
+	znode_t	*zp = VTOZ(vp);
+	uint64_t noff = (uint64_t)*off; /* new offset */
+	uint64_t file_sz;
+	int error;
+	boolean_t hole;
+
+	file_sz = zp->z_size;
+	if (noff >= file_sz)  {
+		return (SET_ERROR(ENXIO));
+	}
+
+	if (cmd == SEEK_HOLE)
+		hole = B_TRUE;
+	else
+		hole = B_FALSE;
+
+	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
+
+	if (error == ESRCH)
+		return (SET_ERROR(ENXIO));
+
+	/* file was dirty, so fall back to using generic logic */
+	if (error == EBUSY) {
+		if (hole)
+			*off = file_sz;
+
+		return (0);
+	}
+
+	/*
+	 * We could find a hole that begins after the logical end-of-file,
+	 * because dmu_offset_next() only works on whole blocks.  If the
+	 * EOF falls mid-block, then indicate that the "virtual hole"
+	 * at the end of the file begins at the logical EOF, rather than
+	 * at the end of the last block.
+	 */
+	if (noff > file_sz) {
+		ASSERT(hole);
+		noff = file_sz;
+	}
+
+	if (noff < *off)
+		return (error);
+	*off = noff;
+	return (error);
+}
+
+int
+zfs_holey(struct vnode *vp, int cmd, loff_t *off)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	error = zfs_holey_common(vp, cmd, off);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+#endif /* SEEK_HOLE && SEEK_DATA */
+
+#if defined(_KERNEL)
+/*
+ * When a file is memory mapped, we must keep the IO data synchronized
+ * between the DMU cache and the memory mapped pages.  What this means:
+ *
+ * On Write:	If we find a memory mapped page, we write to *both*
+ *		the page and the dmu buffer.
+ */
+static void
+update_pages(vnode_t *vp, int64_t start, int64_t len,
+    objset_t *os, uint64_t oid)
+{
+	znode_t *zp = VTOZ(vp);
+	int error = 0;
+	vm_offset_t vaddr = 0;
+	upl_t upl;
+	upl_page_info_t *pl = NULL;
+	int upl_size;
+	int upl_page;
+	off_t off;
+
+	off = start & (PAGE_SIZE - 1);
+	start &= ~PAGE_MASK;
+
+	upl_size = (off + len + (PAGE_SIZE - 1)) & ~PAGE_MASK;
+
+	// dprintf("update_pages: start 0x%llx len 0x%llx: 1st off x%llx\n",
+	//    start, len, off);
+	/*
+	 * Create a UPL for the current range and map its
+	 * page list into the kernel virtual address space.
+	 */
+	error = ubc_create_upl(vp, start, upl_size, &upl, &pl,
+	    UPL_FILE_IO | UPL_SET_LITE);
+	if ((error != KERN_SUCCESS) || !upl) {
+		printf("ZFS: update_pages failed to ubc_create_upl: %d\n",
+		    error);
+		return;
+	}
+
+	if (ubc_upl_map(upl, &vaddr) != KERN_SUCCESS) {
+		printf("ZFS: update_pages failed to ubc_upl_map: %d\n",
+		    error);
+		(void) ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
+		return;
+	}
+	for (upl_page = 0; len > 0; ++upl_page) {
+		uint64_t nbytes = MIN(PAGESIZE - off, len);
+		/*
+		 * We don't want a new page to "appear" in the middle of
+		 * the file update (because it may not get the write
+		 * update data), so we grab a lock to block
+		 * zfs_getpage().
+		 */
+		rw_enter(&zp->z_map_lock, RW_WRITER);
+		if (pl && upl_valid_page(pl, upl_page)) {
+			rw_exit(&zp->z_map_lock);
+			(void) dmu_read(os, oid, start+off, nbytes,
+			    (void *)(vaddr+off), DMU_READ_PREFETCH);
+
+		} else { // !upl_valid_page
+			rw_exit(&zp->z_map_lock);
+		}
+		vaddr += PAGE_SIZE;
+		start += PAGE_SIZE;
+		len -= nbytes;
+		off = 0;
+	}
+
+	/*
+	 * Unmap the page list and free the UPL.
+	 */
+	(void) ubc_upl_unmap(upl);
+	/*
+	 * We want to abort here since due to dmu_write()
+	 * we effectively didn't dirty any pages.
+	 */
+	(void) ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
+}
+
+/*
+ * When a file is memory mapped, we must keep the IO data synchronized
+ * between the DMU cache and the memory mapped pages.  What this means:
+ *
+ * On Read:	We "read" preferentially from memory mapped pages,
+ *		else we default from the dmu buffer.
+ *
+ * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
+ *	 the file is memory mapped.
+ */
+static int
+mappedread(struct vnode *vp, int nbytes, uio_t *uio)
+{
+	znode_t *zp = VTOZ(vp);
+	objset_t *os = zp->z_zfsvfs->z_os;
+	int len = nbytes;
+	int error = 0;
+	vm_offset_t vaddr = 0;
+	upl_t upl;
+	upl_page_info_t *pl = NULL;
+	off_t upl_start;
+	int upl_size;
+	int upl_page;
+	off_t off;
+
+	upl_start = uio_offset(uio);
+	off = upl_start & PAGE_MASK;
+	upl_start &= ~PAGE_MASK;
+	upl_size = (off + nbytes + (PAGE_SIZE - 1)) & ~PAGE_MASK;
+
+	/*
+	 * Create a UPL for the current range and map its
+	 * page list into the kernel virtual address space.
+	 */
+	error = ubc_create_upl(vp, upl_start, upl_size, &upl, &pl,
+	    UPL_FILE_IO | UPL_SET_LITE);
+	if ((error != KERN_SUCCESS) || !upl) {
+		return (EIO);
+	}
+
+	if (ubc_upl_map(upl, &vaddr) != KERN_SUCCESS) {
+		(void) ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
+		return (ENOMEM);
+	}
+
+	for (upl_page = 0; len > 0; ++upl_page) {
+		uint64_t bytes = MIN(PAGE_SIZE - off, len);
+		if (pl && upl_valid_page(pl, upl_page)) {
+			uio_setrw(uio, UIO_READ);
+			error = uiomove((caddr_t)vaddr + off, bytes, UIO_READ,
+			    uio);
+		} else {
+			error = dmu_read_uio(os, zp->z_id, uio, bytes);
+		}
+
+		vaddr += PAGE_SIZE;
+		len -= bytes;
+		off = 0;
+		if (error)
+			break;
+	}
+
+	/*
+	 * Unmap the page list and free the UPL.
+	 */
+	(void) ubc_upl_unmap(upl);
+	(void) ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
+
+	return (error);
+}
+#endif /* _KERNEL */
+
+unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
+unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
+
+/*
+ * Read bytes from specified file into supplied buffer.
+ *
+ *	IN:	ip	- inode of file to be read from.
+ *		uio	- structure supplying read location, range info,
+ *			  and return buffer.
+ *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
+ *			  O_DIRECT flag; used to bypass page cache.
+ *		cr	- credentials of caller.
+ *
+ *	OUT:	uio	- updated offset and range, buffer filled.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Side Effects:
+ *	inode - atime updated if byte count > 0
+ */
+/* ARGSUSED */
+int
+zfs_read(struct vnode *vp, uio_t *uio, int ioflag, cred_t *cr)
+{
+	int error = 0;
+	boolean_t frsync = B_FALSE;
+
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ITOZSB(vp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EACCES));
+	}
+
+	/*
+	 * Validate file offset
+	 */
+	if (uio_offset(uio) < (offset_t)0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Fasttrack empty reads
+	 */
+	if (uio_resid(uio) == 0) {
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+#ifdef FRSYNC
+	/*
+	 * If we're in FRSYNC mode, sync out this znode before reading it.
+	 * Only do this for non-snapshots.
+	 *
+	 * Some platforms do not support FRSYNC and instead map it
+	 * to O_SYNC, which results in unnecessary calls to zil_commit. We
+	 * only honor FRSYNC requests on platforms which support it.
+	 */
+	frsync = !!(ioflag & FRSYNC);
+#endif
+	if (zfsvfs->z_log &&
+	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
+		zil_commit(zfsvfs->z_log, zp->z_id);
+
+	/*
+	 * Lock the range against changes.
+	 */
+	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
+	    uio_offset(uio), uio_resid(uio), RL_READER);
+
+	/*
+	 * If we are reading past end-of-file we can skip
+	 * to the end; but we might still need to set atime.
+	 */
+	if (uio_offset(uio) >= zp->z_size) {
+		error = 0;
+		goto out;
+	}
+
+	ASSERT(uio_offset(uio) < zp->z_size);
+	ssize_t n = MIN(uio_resid(uio), zp->z_size - uio_offset(uio));
+
+	while (n > 0) {
+		ssize_t nbytes = MIN(n, zfs_read_chunk_size -
+		    P2PHASE(uio_offset(uio), zfs_read_chunk_size));
+
+		if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
+			error = mappedread(vp, nbytes, uio);
+		} else {
+			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+			    uio, nbytes);
+		}
+
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = SET_ERROR(EIO);
+			break;
+		}
+
+		n -= nbytes;
+	}
+
+out:
+	zfs_rangelock_exit(lr);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Write the bytes to a file.
+ *
+ *	IN:	ip	- inode of file to be written to.
+ *		uio	- structure supplying write location, range info,
+ *			  and data buffer.
+ *		ioflag	- O_APPEND flag set if in append mode.
+ *			  O_DIRECT flag; used to bypass page cache.
+ *		cr	- credentials of caller.
+ *
+ *	OUT:	uio	- updated offset and range.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - ctime|mtime updated if byte count > 0
+ */
+
+/* ARGSUSED */
+int
+zfs_write(struct vnode *vp, uio_t *uio, int ioflag, cred_t *cr)
+{
+	int error = 0;
+	ssize_t start_resid = uio_resid(uio);
+	rlim64_t limit = MAXOFFSET_T;
+	const iovec_t *aiov = NULL;
+	arc_buf_t *abuf = NULL;
+	int write_eof;
+
+	/*
+	 * Fasttrack empty write
+	 */
+	ssize_t n = start_resid;
+	if (n == 0)
+		return (0);
+
+	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
+		limit = MAXOFFSET_T;
+
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+	uint64_t mtime[2], ctime[2];
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+
+	/*
+	 * Callers might not be able to detect properly that we are read-only,
+	 * so check it explicitly here.
+	 */
+	if (zfs_is_readonly(zfsvfs)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	/*
+	 * If immutable or not appending then return EPERM
+	 */
+	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
+	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
+	    (uio_offset(uio) < zp->z_size))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	/*
+	 * Validate file offset
+	 */
+	offset_t woff = ioflag & O_APPEND ? zp->z_size : uio_offset(uio);
+	if (woff < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	int max_blksz = zfsvfs->z_max_blksz;
+	xuio_t *xuio = NULL;
+
+	/*
+	 * Pre-fault the pages to ensure slow (eg NFS) pages
+	 * don't hold up txg.
+	 * Skip this if uio contains loaned arc_buf.
+	 */
+	if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EFAULT));
+	}
+
+	/*
+	 * If in append mode, set the io offset pointer to eof.
+	 */
+	zfs_locked_range_t *lr;
+	if (ioflag & O_APPEND) {
+		/*
+		 * Obtain an appending range lock to guarantee file append
+		 * semantics.  We reset the write offset once we have the lock.
+		 */
+		lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
+		woff = lr->lr_offset;
+		if (lr->lr_length == UINT64_MAX) {
+			/*
+			 * We overlocked the file because this write will cause
+			 * the file block size to increase.
+			 * Note that zp_size cannot change with this lock held.
+			 */
+			woff = zp->z_size;
+		}
+		uio_setoffset(uio, woff);
+	} else {
+		/*
+		 * Note that if the file block size will change as a result of
+		 * this write, then this range lock will lock the entire file
+		 * so that we can re-write the block safely.
+		 */
+		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
+	}
+
+	if (woff >= limit) {
+		zfs_rangelock_exit(lr);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EFBIG));
+	}
+
+	if ((woff + n) > limit || woff > (limit - n))
+		n = limit - woff;
+
+	/* Will this write extend the file length? */
+	write_eof = (woff + n > zp->z_size);
+	uint64_t end_size = MAX(zp->z_size, woff + n);
+	zilog_t *zilog = zfsvfs->z_log;
+
+	/*
+	 * Write the file in reasonable size chunks.  Each chunk is written
+	 * in a separate transaction; this keeps the intent log records small
+	 * and allows us to do more fine-grained space accounting.
+	 */
+	while (n > 0) {
+		woff = uio_offset(uio);
+
+		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
+		    zp->z_uid) ||
+		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
+		    zp->z_gid) ||
+		    (zp->z_projid != ZFS_DEFAULT_PROJID &&
+		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
+		    zp->z_projid))) {
+			error = SET_ERROR(EDQUOT);
+			break;
+		}
+
+		abuf = NULL;
+		if (xuio) {
+
+		} else if (n >= max_blksz && woff >= zp->z_size &&
+		    P2PHASE(woff, max_blksz) == 0 &&
+		    zp->z_blksz == max_blksz) {
+			/*
+			 * This write covers a full block.  "Borrow" a buffer
+			 * from the dmu so that we can fill it before we enter
+			 * a transaction.  This avoids the possibility of
+			 * holding up the transaction if the data copy hangs
+			 * up on a pagefault (e.g., from an NFS server mapping).
+			 */
+			size_t cbytes;
+
+			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+			    max_blksz);
+			ASSERT(abuf != NULL);
+			ASSERT(arc_buf_size(abuf) == max_blksz);
+			if ((error = uiocopy(abuf->b_data, max_blksz,
+			    UIO_WRITE, uio, &cbytes))) {
+				dmu_return_arcbuf(abuf);
+				break;
+			}
+			ASSERT(cbytes == max_blksz);
+		}
+
+		/*
+		 * Start a transaction.
+		 */
+		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
+		DB_DNODE_ENTER(db);
+		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
+		    MIN(n, max_blksz));
+		DB_DNODE_EXIT(db);
+		zfs_sa_upgrade_txholds(tx, zp);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			if (abuf != NULL)
+				dmu_return_arcbuf(abuf);
+			break;
+		}
+
+		/*
+		 * If rangelock_enter() over-locked we grow the blocksize
+		 * and then reduce the lock range.  This will only happen
+		 * on the first iteration since rangelock_reduce() will
+		 * shrink down lr_length to the appropriate size.
+		 */
+		if (lr->lr_length == UINT64_MAX) {
+			uint64_t new_blksz;
+
+			if (zp->z_blksz > max_blksz) {
+				/*
+				 * File's blocksize is already larger than the
+				 * "recordsize" property.  Only let it grow to
+				 * the next power of 2.
+				 */
+				ASSERT(!ISP2(zp->z_blksz));
+				new_blksz = MIN(end_size,
+				    1 << highbit64(zp->z_blksz));
+			} else {
+				new_blksz = MIN(end_size, max_blksz);
+			}
+			zfs_grow_blocksize(zp, new_blksz, tx);
+			zfs_rangelock_reduce(lr, woff, n);
+		}
+
+		/*
+		 * XXX - should we really limit each write to z_max_blksz?
+		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
+		 */
+		ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
+
+		ssize_t tx_bytes = 0;
+
+		if (woff + nbytes > zp->z_size)
+			vnode_pager_setsize(vp, woff + nbytes);
+
+		/*
+		 * This conditional is always true in OSX, it is kept so
+		 * the sources look familiar to other platforms
+		 */
+		if (abuf == NULL) {
+			tx_bytes = uio_resid(uio);
+			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+			    uio, nbytes, tx);
+			tx_bytes -= uio_resid(uio);
+		} else {
+			tx_bytes = nbytes;
+			/*
+			 * If this is not a full block write, but we are
+			 * extending the file past EOF and this data starts
+			 * block-aligned, use assign_arcbuf().  Otherwise,
+			 * write via dmu_write().
+			 */
+			if (tx_bytes < max_blksz && (!write_eof ||
+			    aiov->iov_base != abuf->b_data)) {
+				ASSERT(xuio);
+				dmu_write(zfsvfs->z_os, zp->z_id, woff,
+				    aiov->iov_len, aiov->iov_base, tx);
+				dmu_return_arcbuf(abuf);
+				xuio_stat_wbuf_copied();
+			} else {
+				ASSERT(xuio || tx_bytes == max_blksz);
+				error = dmu_assign_arcbuf_by_dbuf(
+				    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
+				if (error != 0) {
+					dmu_return_arcbuf(abuf);
+					dmu_tx_commit(tx);
+					break;
+				}
+			}
+			ASSERT(tx_bytes <= uio_resid(uio));
+			uioskip(uio, tx_bytes);
+		}
+		if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
+			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
+			    zp->z_id);
+		}
+
+		/*
+		 * If we made no progress, we're done.  If we made even
+		 * partial progress, update the znode and ZIL accordingly.
+		 */
+		if (tx_bytes == 0) {
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+			    (void *)&zp->z_size, sizeof (uint64_t), tx);
+			dmu_tx_commit(tx);
+			ASSERT(error != 0);
+			break;
+		}
+
+		/*
+		 * Clear Set-UID/Set-GID bits on successful write if not
+		 * privileged and at least one of the execute bits is set.
+		 *
+		 * It would be nice to do this after all writes have
+		 * been done, but that would still expose the ISUID/ISGID
+		 * to another app after the partial write is committed.
+		 *
+		 * Note: we don't call zfs_fuid_map_id() here because
+		 * user 0 is not an ephemeral uid.
+		 */
+		mutex_enter(&zp->z_acl_lock);
+		uint32_t uid = KUID_TO_SUID(zp->z_uid);
+		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
+		    (S_IXUSR >> 6))) != 0 &&
+		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
+		    secpolicy_vnode_setid_retain(cr,
+		    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
+			uint64_t newmode;
+			zp->z_mode &= ~(S_ISUID | S_ISGID);
+			zp->z_mode = newmode = zp->z_mode;
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
+			    (void *)&newmode, sizeof (uint64_t), tx);
+		}
+		mutex_exit(&zp->z_acl_lock);
+
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+
+		/*
+		 * Update the file size (zp_size) if it has changed;
+		 * account for possible concurrent updates.
+		 */
+		while ((end_size = zp->z_size) < uio_offset(uio)) {
+			(void) atomic_cas_64(&zp->z_size, end_size,
+			    uio_offset(uio));
+			ASSERT(error == 0);
+		}
+		/*
+		 * If we are replaying and eof is non zero then force
+		 * the file size to the specified eof. Note, there's no
+		 * concurrency during replay.
+		 */
+		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
+			zp->z_size = zfsvfs->z_replay_eof;
+
+		if (error == 0)
+			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		else
+			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+
+		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
+		    NULL, NULL);
+		dmu_tx_commit(tx);
+
+		if (error != 0)
+			break;
+
+		ASSERT(tx_bytes == nbytes);
+		n -= nbytes;
+	}
+
+	zfs_rangelock_exit(lr);
+
+	/*
+	 * If we're in replay mode, or we made no progress, return error.
+	 * Otherwise, it's at least a partial write, so it's successful.
+	 */
+	if (zfsvfs->z_replay || uio_resid(uio) == start_resid) {
+		dprintf("%s: error resid %llu\n", __func__, uio_resid(uio));
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (ioflag & (O_SYNC | O_DSYNC) ||
+	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, zp->z_id);
+
+	ZFS_EXIT(zfsvfs);
+
+	return (0);
+}
+
+/*
+ * Write the bytes to a file.
+ *
+ *	IN:	zp	- znode of file to be written to
+ *		data	- bytes to write
+ *		len	- number of bytes to write
+ *		pos	- offset to start writing at
+ *
+ *	OUT:	resid	- remaining bytes to write
+ *
+ *	RETURN:	0 if success
+ *		positive error code if failure
+ *
+ * Timestamps:
+ *	zp - ctime|mtime updated if byte count > 0
+ */
+int
+zfs_write_simple(znode_t *zp, const void *data, size_t len,
+    loff_t pos, size_t *presid)
+{
+	int error = 0;
+	ssize_t resid;
+
+	error = zfs_vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len,
+	    pos, UIO_SYSSPACE, IO_SYNC, RLIM64_INFINITY, NOCRED, &resid);
+
+	if (error) {
+		return (SET_ERROR(error));
+	} else if (presid == NULL) {
+		if (resid != 0) {
+			error = SET_ERROR(EIO);
+		}
+	} else {
+		*presid = resid;
+	}
+	return (error);
+}
+
+/*
+ * Drop a reference on the passed inode asynchronously. This ensures
+ * that the caller will never drop the last reference on an inode in
+ * the current context. Doing so while holding open a tx could result
+ * in a deadlock if iput_final() re-enters the filesystem code.
+ */
+void
+zfs_zrele_async(znode_t *zp)
+{
+	struct vnode *vp = ZTOV(zp);
+	objset_t *os = ITOZSB(vp)->z_os;
+
+	ASSERT(os != NULL);
+
+	if (vnode_iocount(vp) == 1)
+		VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
+		    (task_func_t *)vnode_put, vp, TQ_SLEEP) != TASKQID_INVALID);
+	else
+		zrele(zp);
+}
+
+/* ARGSUSED */
+void
+zfs_get_done(zgd_t *zgd, int error)
+{
+	znode_t *zp = zgd->zgd_private;
+
+	if (zgd->zgd_db)
+		dmu_buf_rele(zgd->zgd_db, zgd);
+
+	zfs_rangelock_exit(zgd->zgd_lr);
+
+	/*
+	 * Release the vnode asynchronously as we currently have the
+	 * txg stopped from syncing.
+	 */
+	zfs_zrele_async(zp);
+
+	kmem_free(zgd, sizeof (zgd_t));
+}
+
+#ifdef DEBUG
+static int zil_fault_io = 0;
+#endif
+
+/*
+ * Get data to generate a TX_WRITE intent log record.
+ */
+int
+zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
+{
+	zfsvfs_t *zfsvfs = arg;
+	objset_t *os = zfsvfs->z_os;
+	znode_t *zp;
+	uint64_t object = lr->lr_foid;
+	uint64_t offset = lr->lr_offset;
+	uint64_t size = lr->lr_length;
+	dmu_buf_t *db;
+	zgd_t *zgd;
+	int error = 0;
+
+	ASSERT3P(lwb, !=, NULL);
+	ASSERT3P(zio, !=, NULL);
+	ASSERT3U(size, !=, 0);
+
+	/*
+	 * Nothing to do if the file has been removed
+	 */
+	if (zfs_zget(zfsvfs, object, &zp) != 0)
+		return (SET_ERROR(ENOENT));
+	if (zp->z_unlinked) {
+		/*
+		 * Release the vnode asynchronously as we currently have the
+		 * txg stopped from syncing.
+		 */
+		zfs_zrele_async(zp);
+		return (SET_ERROR(ENOENT));
+	}
+
+	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
+	zgd->zgd_lwb = lwb;
+	zgd->zgd_private = zp;
+
+	/*
+	 * Write records come in two flavors: immediate and indirect.
+	 * For small writes it's cheaper to store the data with the
+	 * log record (immediate); for large writes it's cheaper to
+	 * sync the data and get a pointer to it (indirect) so that
+	 * we don't have to write the data twice.
+	 */
+	if (buf != NULL) { /* immediate write */
+		zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
+		    offset, size, RL_READER);
+		/* test for truncation needs to be done while range locked */
+		if (offset >= zp->z_size) {
+			error = SET_ERROR(ENOENT);
+		} else {
+			error = dmu_read(os, object, offset, size, buf,
+			    DMU_READ_NO_PREFETCH);
+		}
+		ASSERT(error == 0 || error == ENOENT);
+	} else { /* indirect write */
+		/*
+		 * Have to lock the whole block to ensure when it's
+		 * written out and its checksum is being calculated
+		 * that no one can change the data. We need to re-check
+		 * blocksize after we get the lock in case it's changed!
+		 */
+		for (;;) {
+			uint64_t blkoff;
+			size = zp->z_blksz;
+			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
+			offset -= blkoff;
+			zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
+			    offset, size, RL_READER);
+			if (zp->z_blksz == size)
+				break;
+			offset += blkoff;
+			zfs_rangelock_exit(zgd->zgd_lr);
+		}
+		/* test for truncation needs to be done while range locked */
+		if (lr->lr_offset >= zp->z_size)
+			error = SET_ERROR(ENOENT);
+#ifdef DEBUG
+		if (zil_fault_io) {
+			error = SET_ERROR(EIO);
+			zil_fault_io = 0;
+		}
+#endif
+		if (error == 0)
+			error = dmu_buf_hold(os, object, offset, zgd, &db,
+			    DMU_READ_NO_PREFETCH);
+
+		if (error == 0) {
+			blkptr_t *bp = &lr->lr_blkptr;
+
+			zgd->zgd_db = db;
+			zgd->zgd_bp = bp;
+
+			ASSERT(db->db_offset == offset);
+			ASSERT(db->db_size == size);
+
+			error = dmu_sync(zio, lr->lr_common.lrc_txg,
+			    zfs_get_done, zgd);
+			ASSERT(error || lr->lr_length <= size);
+
+			/*
+			 * On success, we need to wait for the write I/O
+			 * initiated by dmu_sync() to complete before we can
+			 * release this dbuf.  We will finish everything up
+			 * in the zfs_get_done() callback.
+			 */
+			if (error == 0)
+				return (0);
+
+			if (error == EALREADY) {
+				lr->lr_common.lrc_txtype = TX_WRITE2;
+				/*
+				 * TX_WRITE2 relies on the data previously
+				 * written by the TX_WRITE that caused
+				 * EALREADY.  We zero out the BP because
+				 * it is the old, currently-on-disk BP.
+				 */
+				zgd->zgd_bp = NULL;
+				BP_ZERO(bp);
+				error = 0;
+			}
+		}
+	}
+
+	zfs_get_done(zgd, error);
+
+	return (error);
+}
+
+/*ARGSUSED*/
+int
+zfs_access(struct vnode *vp, int mode, int flag, cred_t *cr)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ITOZSB(vp);
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (flag & V_ACE_MASK)
+		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
+	else
+		error = zfs_zaccess_rwx(zp, mode, flag, cr);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Lookup an entry in a directory, or an extended attribute directory.
+ * If it exists, return a held inode reference for it.
+ *
+ *	IN:	zdp	- znode of directory to search.
+ *		nm	- name of entry to lookup.
+ *		flags	- LOOKUP_XATTR set if looking for an attribute.
+ *		cr	- credentials of caller.
+ *		direntflags - directory lookup flags
+ *		realpnp - returned pathname.
+ *
+ *	OUT:	zpp	- znode of located entry, NULL if not found.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	NA
+ */
+/* ARGSUSED */
+int
+zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags,
+    cred_t *cr, int *direntflags, struct componentname *realpnp)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zdp);
+	int error = 0;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zdp);
+
+	*zpp = NULL;
+
+	/*
+	 * OsX has separate vnops for XATTR activity
+	 */
+
+
+	if (!S_ISDIR(zdp->z_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENOTDIR));
+	}
+
+	/*
+	 * Check accessibility of directory.
+	 */
+
+	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Attempt to create a new entry in a directory.  If the entry
+ * already exists, truncate the file if permissible, else return
+ * an error.  Return the ip of the created or trunc'd file.
+ *
+ *	IN:	dzp	- znode of directory to put new file entry in.
+ *		name	- name of new file entry.
+ *		vap	- attributes of new file.
+ *		excl	- flag indicating exclusive or non-exclusive mode.
+ *		mode	- mode to open file with.
+ *		cr	- credentials of caller.
+ *		flag	- file flag.
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	zpp	- znode of created or trunc'd entry.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated if new entry created
+ *	 zp - ctime|mtime always, atime if new
+ */
+
+/* ARGSUSED */
+int
+zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+{
+	znode_t		*zp = NULL;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	objset_t	*os;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	uid_t		uid;
+	gid_t		gid;
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+	boolean_t	have_acl = B_FALSE;
+	boolean_t	waited = B_FALSE;
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+
+	gid = crgetgid(cr);
+	uid = crgetuid(cr);
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	os = zfsvfs->z_os;
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (vap->va_mask & ATTR_XVATTR) {
+		if ((error = secpolicy_xvattr(vap,
+		    crgetuid(cr), cr, vap->va_mode)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+top:
+	*zpp = NULL;
+	if (*name == '\0') {
+		/*
+		 * Null component name refers to the directory itself.
+		 */
+		zhold(dzp);
+		zp = dzp;
+		dl = NULL;
+		error = 0;
+	} else {
+		/* possible igrab(zp) */
+		int zflg = 0;
+
+		if (flag & FIGNORECASE)
+			zflg |= ZCILOOK;
+
+		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+		    NULL, NULL);
+		if (error) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			if (strcmp(name, "..") == 0)
+				error = SET_ERROR(EISDIR);
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if (zp == NULL) {
+		uint64_t txtype;
+		uint64_t projid = ZFS_DEFAULT_PROJID;
+
+		/*
+		 * Create a new file object and update the directory
+		 * to reference it.
+		 */
+		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			goto out;
+		}
+
+		/*
+		 * We only support the creation of regular files in
+		 * extended attribute directories.
+		 */
+
+		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
+			if (have_acl)
+				zfs_acl_ids_free(&acl_ids);
+			error = SET_ERROR(EINVAL);
+			goto out;
+		}
+
+		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
+		    cr, vsecp, &acl_ids)) != 0)
+			goto out;
+		have_acl = B_TRUE;
+
+		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
+			projid = zfs_inherit_projid(dzp);
+		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
+			zfs_acl_ids_free(&acl_ids);
+			error = SET_ERROR(EDQUOT);
+			goto out;
+		}
+
+		tx = dmu_tx_create(os);
+
+		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+		    ZFS_SA_BASE_ATTR_SIZE);
+
+		fuid_dirtied = zfsvfs->z_fuid_dirty;
+		if (fuid_dirtied)
+			zfs_fuid_txhold(zfsvfs, tx);
+		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		if (!zfsvfs->z_use_sa &&
+		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, acl_ids.z_aclp->z_acl_bytes);
+		}
+
+		error = dmu_tx_assign(tx,
+		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+		if (error) {
+			zfs_dirent_unlock(dl);
+			if (error == ERESTART) {
+				waited = B_TRUE;
+				dmu_tx_wait(tx);
+				dmu_tx_abort(tx);
+				goto top;
+			}
+			zfs_acl_ids_free(&acl_ids);
+			dmu_tx_abort(tx);
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+		error = zfs_link_create(dl, zp, tx, ZNEW);
+		if (error != 0) {
+			/*
+			 * Since, we failed to add the directory entry for it,
+			 * delete the newly created dnode.
+			 */
+			zfs_znode_delete(zp, tx);
+			zfs_acl_ids_free(&acl_ids);
+			dmu_tx_commit(tx);
+
+			/*
+			 * Failed, have zp but on OsX we don't have a vp, as it
+			 * would have been attached below, and we've cleared out
+			 * zp, signal then not to call zrele() on it.
+			 */
+			if (ZTOV(zp) == NULL) {
+				zfs_znode_free(zp);
+				zp = NULL;
+			}
+
+			goto out;
+		}
+
+		if (fuid_dirtied)
+			zfs_fuid_sync(zfsvfs, tx);
+
+		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
+		if (flag & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
+		    vsecp, acl_ids.z_fuidp, vap);
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_commit(tx);
+
+		/*
+		 * OS X - attach the vnode _after_ committing the transaction
+		 */
+		zfs_znode_getvnode(zp, zfsvfs);
+
+	} else {
+		int aflags = (flag & O_APPEND) ? V_APPEND : 0;
+
+		if (have_acl)
+			zfs_acl_ids_free(&acl_ids);
+		have_acl = B_FALSE;
+
+		/*
+		 * A directory entry already exists for this name.
+		 */
+		/*
+		 * Can't truncate an existing file if in exclusive mode.
+		 */
+		if (excl) {
+			error = SET_ERROR(EEXIST);
+			goto out;
+		}
+		/*
+		 * Can't open a directory for writing.
+		 */
+		if (S_ISDIR(zp->z_mode)) {
+			error = SET_ERROR(EISDIR);
+			goto out;
+		}
+		/*
+		 * Verify requested access to file.
+		 */
+		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+			goto out;
+		}
+
+		mutex_enter(&dzp->z_lock);
+		dzp->z_seq++;
+		mutex_exit(&dzp->z_lock);
+
+		/*
+		 * Truncate regular files if requested.
+		 */
+		if (S_ISREG(zp->z_mode) &&
+		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
+			/* we can't hold any locks when calling zfs_freesp() */
+			if (dl) {
+				zfs_dirent_unlock(dl);
+				dl = NULL;
+			}
+			error = zfs_freesp(zp, 0, 0, mode, TRUE);
+		}
+	}
+out:
+
+	if (dl)
+		zfs_dirent_unlock(dl);
+
+	if (error) {
+		if (zp)
+			zrele(zp);
+	} else {
+		*zpp = zp;
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Remove an entry from a directory.
+ *
+ *	IN:	dzp	- znode of directory to remove entry from.
+ *		name	- name of entry to remove.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime
+ *	 ip - ctime (if nlink > 0)
+ */
+
+uint64_t null_xattr = 0;
+
+/*ARGSUSED*/
+int
+zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
+{
+	znode_t		*zp;
+	znode_t		*xzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	uint64_t	acl_obj, xattr_obj;
+	uint64_t	xattr_obj_unlinked = 0;
+	uint64_t	obj = 0;
+	uint64_t	links;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	boolean_t	may_delete_now, delete_now = FALSE;
+	boolean_t	unlinked, toobig = FALSE;
+	uint64_t	txtype;
+	struct componentname	*realnmp = NULL;
+	struct componentname	realnm = { 0 };
+	int		error;
+	int		zflg = ZEXISTS;
+	boolean_t	waited = B_FALSE;
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (flags & FIGNORECASE) {
+		zflg |= ZCILOOK;
+
+		realnm.cn_nameptr = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+		realnm.cn_namelen = MAXPATHLEN;
+		realnmp = &realnm;
+	}
+
+top:
+	xattr_obj = 0;
+	xzp = NULL;
+	/*
+	 * Attempt to lock directory; fail if entry doesn't exist.
+	 */
+	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+	    NULL, realnmp))) {
+		if (realnmp)
+			kmem_free(realnm.cn_nameptr, realnm.cn_namelen);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	/*
+	 * Need to use rmdir for removing directories.
+	 */
+	if (S_ISDIR(zp->z_mode)) {
+		error = SET_ERROR(EPERM);
+		goto out;
+	}
+
+	mutex_enter(&zp->z_lock);
+	may_delete_now = vnode_iocount(ZTOV(zp)) == 1 &&
+	    !(zp->z_is_mapped);
+	mutex_exit(&zp->z_lock);
+
+	/*
+	 * We may delete the znode now, or we may put it in the unlinked set;
+	 * it depends on whether we're the last link, and on whether there are
+	 * other holds on the inode.  So we dmu_tx_hold() the right things to
+	 * allow for either case.
+	 */
+	obj = zp->z_id;
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+	if (may_delete_now) {
+		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
+		/* if the file is too big, only hold_free a token amount */
+		dmu_tx_hold_free(tx, zp->z_id, 0,
+		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
+	}
+
+	/* are there any extended attributes? */
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT0(error);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
+	}
+
+	mutex_enter(&zp->z_lock);
+	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+	mutex_exit(&zp->z_lock);
+
+	/* charge as an update -- would be nice not to charge at all */
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	/*
+	 * Mark this transaction as typically resulting in a net free of space
+	 */
+	dmu_tx_mark_netfree(tx);
+
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(zp);
+			if (xzp)
+				zrele(xzp);
+			goto top;
+		}
+		if (realnmp)
+			kmem_free(realnm.cn_nameptr, realnm.cn_namelen);
+		dmu_tx_abort(tx);
+		zrele(zp);
+		if (xzp)
+			zrele(xzp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Remove the directory entry.
+	 */
+	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		goto out;
+	}
+
+	if (unlinked) {
+		/*
+		 * Hold z_lock so that we can make sure that the ACL obj
+		 * hasn't changed.  Could have been deleted due to
+		 * zfs_sa_upgrade().
+		 */
+		mutex_enter(&zp->z_lock);
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
+		delete_now = may_delete_now && !toobig &&
+		    vnode_iocount(ZTOV(zp)) == 1 &&
+		    !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+		    zfs_external_acl(zp) == acl_obj;
+	}
+
+	if (delete_now) {
+		if (xattr_obj_unlinked) {
+			ASSERT3U(xzp->z_nlink, ==, 2);
+			mutex_enter(&xzp->z_lock);
+			xzp->z_unlinked = B_TRUE;
+			links = 0;
+			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+			    &links, sizeof (links), tx);
+			ASSERT3U(error,  ==,  0);
+			mutex_exit(&xzp->z_lock);
+			zfs_unlinked_add(xzp, tx);
+
+			if (zp->z_is_sa)
+				error = sa_remove(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), tx);
+			else
+				error = sa_update(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
+				    sizeof (uint64_t), tx);
+			ASSERT0(error);
+		}
+		/*
+		 * Add to the unlinked set because a new reference could be
+		 * taken concurrently resulting in a deferred destruction.
+		 */
+		zfs_unlinked_add(zp, tx);
+		mutex_exit(&zp->z_lock);
+	} else if (unlinked) {
+		mutex_exit(&zp->z_lock);
+		zfs_unlinked_add(zp, tx);
+	}
+
+	txtype = TX_REMOVE;
+	if (flags & FIGNORECASE)
+		txtype |= TX_CI;
+	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
+
+	dmu_tx_commit(tx);
+out:
+	if (realnmp)
+			kmem_free(realnm.cn_nameptr, realnm.cn_namelen);
+
+	zfs_dirent_unlock(dl);
+
+	if (delete_now)
+		zrele(zp);
+	else
+		zfs_zrele_async(zp);
+
+	if (xzp)
+		zfs_zrele_async(xzp);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Create a new directory and insert it into dzp using the name
+ * provided.  Return a pointer to the inserted directory.
+ *
+ *	IN:	dzp	- znode of directory to add subdir to.
+ *		dirname	- name of new directory.
+ *		vap	- attributes of new directory.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *		vsecp	- ACL to be set
+ *
+ *	OUT:	zpp	- znode of created directory.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated
+ *	zpp - ctime|mtime|atime updated
+ */
+/*ARGSUSED*/
+int
+zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
+    cred_t *cr, int flags, vsecattr_t *vsecp)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	uint64_t	txtype;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zf = ZNEW;
+	uid_t		uid;
+	gid_t		gid = crgetgid(cr);
+	zfs_acl_ids_t   acl_ids;
+	boolean_t	fuid_dirtied;
+	boolean_t	waited = B_FALSE;
+
+	ASSERT(S_ISDIR(vap->va_mode));
+
+	/*
+	 * If we have an ephemeral id, ACL, or XVATTR then
+	 * make sure file system is at proper version
+	 */
+
+	uid = crgetuid(cr);
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+		return (SET_ERROR(EINVAL));
+
+	if (dirname == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (dzp->z_pflags & ZFS_XATTR) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(dirname,
+	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zf |= ZCILOOK;
+
+	if (vap->va_mask & ATTR_XVATTR) {
+		if ((error = secpolicy_xvattr(vap,
+		    crgetuid(cr), cr, vap->va_mode)) != 0) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
+	    vsecp, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	/*
+	 * First make sure the new directory doesn't exist.
+	 *
+	 * Existence is checked first to make sure we don't return
+	 * EACCES instead of EEXIST which can cause some applications
+	 * to fail.
+	 */
+top:
+	*zpp = NULL;
+
+	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
+	    NULL, NULL))) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+
+	/*
+	 * Add a new entry to the directory.
+	 */
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create new node.
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	/*
+	 * Now put new name in parent dir.
+	 */
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+		goto out;
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	*zpp = zp;
+
+	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
+	if (flags & FIGNORECASE)
+		txtype |= TX_CI;
+	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
+	    acl_ids.z_fuidp, vap);
+
+out:
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+	/*
+	 * OS X - attach the vnode _after_ committing the transaction
+	 */
+	zfs_znode_getvnode(zp, zfsvfs);
+
+	zfs_dirent_unlock(dl);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	if (error != 0) {
+		zrele(zp);
+	} else {
+	}
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Remove a directory subdir entry.  If the current working
+ * directory is the same as the subdir to be removed, the
+ * remove will fail.
+ *
+ *	IN:	dzp	- znode of directory to remove from.
+ *		name	- name of directory to be removed.
+ *		cwd	- inode of current working directory.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dzp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
+    int flags)
+{
+	znode_t		*zp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zflg = ZEXISTS;
+	boolean_t	waited = B_FALSE;
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+top:
+	zp = NULL;
+
+	/*
+	 * Attempt to lock directory; fail if entry doesn't exist.
+	 */
+	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
+	    NULL, NULL))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+		goto out;
+	}
+
+	if (ZTOTYPE(zp) != VDIR) {
+		error = SET_ERROR(ENOTDIR);
+		goto out;
+	}
+
+	if (zp == cwd) {
+		error = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	/*
+	 * Grab a lock on the directory to make sure that no one is
+	 * trying to add (or lookup) entries while we are removing it.
+	 */
+	rw_enter(&zp->z_name_lock, RW_WRITER);
+
+	/*
+	 * Grab a lock on the parent pointer to make sure we play well
+	 * with the treewalk and directory rename code.
+	 */
+	rw_enter(&zp->z_parent_lock, RW_WRITER);
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		rw_exit(&zp->z_parent_lock);
+		rw_exit(&zp->z_name_lock);
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(zp);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zrele(zp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
+
+	if (error == 0) {
+		uint64_t txtype = TX_RMDIR;
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
+		    B_FALSE);
+	}
+
+	dmu_tx_commit(tx);
+
+	rw_exit(&zp->z_parent_lock);
+	rw_exit(&zp->z_name_lock);
+out:
+	zfs_dirent_unlock(dl);
+
+	zrele(zp);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Read directory entries from the given directory cursor position and emit
+ * name and position for each entry.
+ *
+ *	IN:	ip	- inode of directory to read.
+ *		ctx	- directory entry context.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - atime updated
+ *
+ * Note that the low 4 bits of the cookie returned by zap is always zero.
+ * This allows us to use the low range for "special" directory entries:
+ * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
+ * we use the offset 2 for the '.zfs' directory.
+ */
+/* ARGSUSED */
+int
+zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
+    int flags, int *a_numdirent)
+{
+
+	znode_t		*zp = VTOZ(vp);
+	boolean_t	extended = (flags & VNODE_READDIR_EXTENDED);
+	struct direntry	*eodp;	/* Extended */
+	struct dirent	*odp;	/* Standard */
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+	objset_t	*os;
+	caddr_t		outbuf;
+	size_t		bufsize;
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	uint_t		bytes_wanted;
+	uint64_t	offset; /* must be unsigned; checks for < 1 */
+	uint64_t	parent;
+	int			local_eof;
+	int			outcount;
+	int			error = 0;
+	uint8_t		prefetch;
+	uint8_t		type;
+	int			numdirent = 0;
+	char		*bufptr;
+	boolean_t	isdotdir = B_TRUE;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent))) != 0)
+		goto out;
+
+	/*
+	 * If we are not given an eof variable,
+	 * use a local one.
+	 */
+	if (eofp == NULL)
+		eofp = &local_eof;
+
+	/*
+	 * Check for valid iov_len.
+	 */
+	if (uio_curriovlen(uio) <= 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Quit if directory has been removed (posix)
+	 */
+	if ((*eofp = zp->z_unlinked) != 0) {
+		goto out;
+	}
+
+	error = 0;
+	os = zfsvfs->z_os;
+	offset = uio_offset(uio);
+	prefetch = zp->z_zn_prefetch;
+
+	/*
+	 * Initialize the iterator cursor.
+	 */
+	if (offset <= 3) {
+		/*
+		 * Start iteration from the beginning of the directory.
+		 */
+		zap_cursor_init(&zc, os, zp->z_id);
+	} else {
+		/*
+		 * The offset is a serialized cursor.
+		 */
+		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
+	}
+
+	/*
+	 * Get space to change directory entries into fs independent format.
+	 */
+	bytes_wanted = uio_curriovlen(uio);
+	bufsize = (size_t)bytes_wanted;
+	outbuf = kmem_alloc(bufsize, KM_SLEEP);
+	bufptr = (char *)outbuf;
+
+	/*
+	 * Transform to file-system independent format
+	 */
+
+	outcount = 0;
+	while (outcount < bytes_wanted) {
+		ino64_t objnum;
+		ushort_t reclen;
+		uint64_t *next = NULL;
+		size_t namelen;
+		int force_formd_normalized_output;
+		size_t nfdlen;
+
+		/*
+		 * Special case `.', `..', and `.zfs'.
+		 */
+		if (offset == 0) {
+			(void) strlcpy(zap.za_name, ".", MAXNAMELEN);
+			zap.za_normalization_conflict = 0;
+			objnum = (zp->z_id == zfsvfs->z_root) ? 2 : zp->z_id;
+			type = DT_DIR;
+		} else if (offset == 1) {
+			(void) strlcpy(zap.za_name, "..", MAXNAMELEN);
+			zap.za_normalization_conflict = 0;
+			objnum = (parent == zfsvfs->z_root) ? 2 : parent;
+			objnum = (zp->z_id == zfsvfs->z_root) ? 1 : objnum;
+			type = DT_DIR;
+		} else if (offset == 2 && zfs_show_ctldir(zp)) {
+			(void) strlcpy(zap.za_name, ZFS_CTLDIR_NAME,
+			    MAXNAMELEN);
+			zap.za_normalization_conflict = 0;
+			objnum = ZFSCTL_INO_ROOT;
+			type = DT_DIR;
+		} else {
+
+			/* This is not a special case directory */
+			isdotdir = B_FALSE;
+
+			/*
+			 * Grab next entry.
+			 */
+			if ((error = zap_cursor_retrieve(&zc, &zap))) {
+				if ((*eofp = (error == ENOENT)) != 0)
+					break;
+				else
+					goto update;
+			}
+
+			/*
+			 * Allow multiple entries provided the first entry is
+			 * the object id.  Non-zpl consumers may safely make
+			 * use of the additional space.
+			 *
+			 * XXX: This should be a feature flag for compatibility
+			 */
+			if (zap.za_integer_length != 8 ||
+			    zap.za_num_integers != 1) {
+				cmn_err(CE_WARN, "zap_readdir: bad directory "
+				    "entry, obj = %lld, offset = %lld\n",
+				    (u_longlong_t)zp->z_id,
+				    (u_longlong_t)offset);
+				error = SET_ERROR(ENXIO);
+				goto update;
+			}
+
+			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
+			/*
+			 * MacOS X can extract the object type here such as:
+			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
+			 */
+			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
+
+		}
+
+		/* emit start */
+
+#define	DIRENT_RECLEN(namelen, ext)	\
+	((ext) ? \
+	((sizeof (struct direntry) + (namelen) - (MAXPATHLEN-1) + 7) & ~7) \
+	: \
+	((sizeof (struct dirent) - (NAME_MAX+1)) + (((namelen)+1 + 7) &~ 7)))
+
+		/*
+		 * Check if name will fit.
+		 *
+		 * Note: non-ascii names may expand (3x) when converted to NFD
+		 */
+		namelen = strlen(zap.za_name);
+
+		/* sysctl to force formD normalization of vnop output */
+		if (zfs_vnop_force_formd_normalized_output &&
+		    !is_ascii_str(zap.za_name))
+			force_formd_normalized_output = 1;
+		else
+			force_formd_normalized_output = 0;
+
+		if (force_formd_normalized_output)
+			namelen = MIN(extended ? MAXPATHLEN-1 : MAXNAMLEN,
+			    namelen * 3);
+
+		reclen = DIRENT_RECLEN(namelen, extended);
+
+		/*
+		 * Will this entry fit in the buffer?
+		 */
+		if (outcount + reclen > bufsize) {
+			/*
+			 * Did we manage to fit anything in the buffer?
+			 */
+			if (!outcount) {
+				error = (EINVAL);
+				goto update;
+			}
+			break;
+		}
+
+		if (extended) {
+			/*
+			 * Add extended flag entry:
+			 */
+			eodp = (struct direntry  *)bufptr;
+			/* NOTE: d_seekoff is the offset for the *next* entry */
+			next = &(eodp->d_seekoff);
+			eodp->d_ino = INO_ZFSTOXNU(objnum, zfsvfs->z_root);
+			eodp->d_type = type;
+
+			/*
+			 * Mac OS X: non-ascii names are UTF-8 NFC on disk
+			 * so convert to NFD before exporting them.
+			 */
+			namelen = strlen(zap.za_name);
+			if (!force_formd_normalized_output ||
+			    utf8_normalizestr((const u_int8_t *)zap.za_name,
+			    namelen, (u_int8_t *)eodp->d_name, &nfdlen,
+			    MAXPATHLEN-1, UTF_DECOMPOSED) != 0) {
+				/* ASCII or normalization failed, copy zap */
+				if ((namelen > 0))
+					(void) bcopy(zap.za_name, eodp->d_name,
+					    namelen + 1);
+			} else {
+				/* Normalization succeeded (in buffer) */
+				namelen = nfdlen;
+			}
+			eodp->d_namlen = namelen;
+			eodp->d_reclen = reclen =
+			    DIRENT_RECLEN(namelen, extended);
+
+		} else {
+			/*
+			 * Add normal entry:
+			 */
+
+			odp = (struct dirent *)bufptr;
+			odp->d_ino = INO_ZFSTOXNU(objnum, zfsvfs->z_root);
+			odp->d_type = type;
+
+			/*
+			 * Mac OS X: non-ascii names are UTF-8 NFC on disk
+			 * so convert to NFD before exporting them.
+			 */
+			namelen = strlen(zap.za_name);
+			if (!force_formd_normalized_output ||
+			    utf8_normalizestr((const u_int8_t *)zap.za_name,
+			    namelen, (u_int8_t *)odp->d_name, &nfdlen,
+			    MAXNAMLEN, UTF_DECOMPOSED) != 0) {
+				/* ASCII or normalization failed, copy zap */
+				if ((namelen > 0))
+					(void) bcopy(zap.za_name, odp->d_name,
+					    namelen + 1);
+			} else {
+				/* Normalization succeeded (in buffer). */
+				namelen = nfdlen;
+			}
+			odp->d_namlen = namelen;
+			odp->d_reclen = reclen =
+			    DIRENT_RECLEN(namelen, extended);
+		}
+
+		outcount += reclen;
+		bufptr += reclen;
+		numdirent++;
+
+		ASSERT(outcount <= bufsize);
+
+		/* emit done */
+
+		/* Prefetch znode */
+		if (prefetch)
+			dmu_prefetch(os, objnum, 0, 0, 0,
+			    ZIO_PRIORITY_SYNC_READ);
+
+		/*
+		 * Move to the next entry, fill in the previous offset.
+		 */
+		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
+			zap_cursor_advance(&zc);
+			offset = zap_cursor_serialize(&zc);
+		} else {
+			offset += 1;
+		}
+
+		if (extended)
+			*next = offset;
+	}
+	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
+
+	/* All done, copy temporary buffer to userland */
+	if ((error = uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
+		/*
+		 * Reset the pointer.
+		 */
+		offset = uio_offset(uio);
+	}
+
+
+update:
+	zap_cursor_fini(&zc);
+	if (outbuf) {
+		kmem_free(outbuf, bufsize);
+	}
+
+	if (error == ENOENT)
+		error = 0;
+
+	uio_setoffset(uio, offset);
+	if (a_numdirent)
+		*a_numdirent = numdirent;
+
+out:
+	ZFS_EXIT(zfsvfs);
+
+	dprintf("-zfs_readdir: num %d\n", numdirent);
+
+	return (error);
+}
+
+ulong_t zfs_fsync_sync_cnt = 4;
+
+int
+zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+	vnode_t *vp = ZTOV(zp);
+
+	if (zp->z_is_mapped /* && !(syncflag & FNODSYNC) */ &&
+	    vnode_isreg(vp) && !vnode_isswap(vp)) {
+		cluster_push(vp, /* waitdata ? IO_SYNC : */ 0);
+	}
+
+	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
+
+	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED &&
+	    !vnode_isrecycled(ZTOV(zp))) {
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
+		zil_commit(zfsvfs->z_log, zp->z_id);
+		ZFS_EXIT(zfsvfs);
+	}
+	tsd_set(zfs_fsyncer_key, NULL);
+
+	return (0);
+}
+
+/*
+ * Get the requested file attributes and place them in the provided
+ * vattr structure.
+ *
+ *      IN:     vp      - vnode of file.
+ *              vap     - va_mask identifies requested attributes.
+ *                        If ATTR_XVATTR set, then optional attrs are requested
+ *              flags   - ATTR_NOACLCHECK (CIFS server context)
+ *              cr      - credentials of caller.
+ *              ct      - caller context
+ *
+ *      OUT:    vap     - attribute values.
+ *
+ *      RETURN: 0 (always succeeds)
+ */
+int
+zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+    caller_context_t *ct)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error = 0;
+	uint64_t links;
+	uint64_t mtime[2], ctime[2], crtime[2], rdev;
+	xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
+	xoptattr_t *xoap = NULL;
+	boolean_t skipaclchk = /* (flags&ATTR_NOACLCHECK)?B_TRUE: */ B_FALSE;
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+
+	VERIFY3P(zp->z_zfsvfs, ==, vfs_fsprivate(vnode_mount(vp)));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
+	if (vnode_isblk(vp) || vnode_ischr(vp))
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
+		    &rdev, 8);
+
+	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
+	 * Also, if we are the owner don't bother, since owner should
+	 * always be allowed to read basic attributes of file.
+	 */
+	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
+	    (vap->va_uid != crgetuid(cr))) {
+		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
+		    skipaclchk, cr))) {
+			ZFS_EXIT(zfsvfs);
+			return (error);
+		}
+	}
+
+	/*
+	 * Return all attributes.  It's cheaper to provide the answer
+	 * than to determine whether we were asked the question.
+	 */
+
+	mutex_enter(&zp->z_lock);
+	vap->va_type = IFTOVT(zp->z_mode);
+	vap->va_mode = zp->z_mode & ~S_IFMT;
+	vap->va_nodeid = zp->z_id;
+	if (vnode_isvroot((vp)) && zfs_show_ctldir(zp))
+		links = zp->z_links + 1;
+	else
+		links = zp->z_links;
+	vap->va_nlink = MIN(links, LINK_MAX);   /* nlink_t limit! */
+	vap->va_size = zp->z_size;
+	if (vnode_isblk(vp) || vnode_ischr(vp))
+		vap->va_rdev = zfs_cmpldev(rdev);
+
+	vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
+
+	/*
+	 * Add in any requested optional attributes and the create time.
+	 * Also set the corresponding bits in the returned attribute bitmap.
+	 */
+	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
+		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+			xoap->xoa_archive =
+			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
+			XVA_SET_RTN(xvap, XAT_ARCHIVE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+			xoap->xoa_readonly =
+			    ((zp->z_pflags & ZFS_READONLY) != 0);
+			XVA_SET_RTN(xvap, XAT_READONLY);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+			xoap->xoa_system =
+			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
+			XVA_SET_RTN(xvap, XAT_SYSTEM);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+			xoap->xoa_hidden =
+			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
+			XVA_SET_RTN(xvap, XAT_HIDDEN);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+			xoap->xoa_nounlink =
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
+			XVA_SET_RTN(xvap, XAT_NOUNLINK);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+			xoap->xoa_immutable =
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
+			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+			xoap->xoa_appendonly =
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
+			XVA_SET_RTN(xvap, XAT_APPENDONLY);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+			xoap->xoa_nodump =
+			    ((zp->z_pflags & ZFS_NODUMP) != 0);
+			XVA_SET_RTN(xvap, XAT_NODUMP);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+			xoap->xoa_opaque =
+			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
+			XVA_SET_RTN(xvap, XAT_OPAQUE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+			xoap->xoa_av_quarantined =
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
+			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+			xoap->xoa_av_modified =
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
+			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
+		    vnode_isreg(vp)) {
+			zfs_sa_get_scanstamp(zp, xvap);
+		}
+		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+			uint64_t times[2];
+
+			(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
+			    times, sizeof (times));
+			ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
+			XVA_SET_RTN(xvap, XAT_CREATETIME);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
+			XVA_SET_RTN(xvap, XAT_REPARSE);
+		}
+		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
+			xoap->xoa_generation = zp->z_gen;
+			XVA_SET_RTN(xvap, XAT_GEN);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+			xoap->xoa_offline =
+			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
+			XVA_SET_RTN(xvap, XAT_OFFLINE);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+			xoap->xoa_sparse =
+			    ((zp->z_pflags & ZFS_SPARSE) != 0);
+			XVA_SET_RTN(xvap, XAT_SPARSE);
+		}
+	}
+
+	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
+	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
+	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
+	ZFS_TIME_DECODE(&vap->va_crtime, crtime);
+
+	mutex_exit(&zp->z_lock);
+
+	/*
+	 * If we are told to ignore owners, we scribble over the
+	 * uid and gid here unless root.
+	 */
+	if (((unsigned int)vfs_flags(zfsvfs->z_vfs)) & MNT_IGNORE_OWNERSHIP) {
+		if (kauth_cred_getuid(cr) != 0) {
+			vap->va_uid = UNKNOWNUID;
+			vap->va_gid = UNKNOWNGID;
+		}
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (0);
+}
+
+#ifdef NOTSUREYET
+/*
+ * For the operation of changing file's user/group/project, we need to
+ * handle not only the main object that is assigned to the file directly,
+ * but also the ones that are used by the file via hidden xattr directory.
+ *
+ * Because the xattr directory may contains many EA entries, as to it may
+ * be impossible to change all of them via the transaction of changing the
+ * main object's user/group/project attributes. Then we have to change them
+ * via other multiple independent transactions one by one. It may be not good
+ * solution, but we have no better idea yet.
+ */
+static int
+zfs_setattr_dir(znode_t *dzp)
+{
+	struct vnode	*dxip = ZTOI(dzp);
+	struct vnode	*xip = NULL;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	objset_t	*os = zfsvfs->z_os;
+	zap_cursor_t	zc;
+	zap_attribute_t	zap;
+	zfs_dirlock_t	*dl;
+	znode_t		*zp;
+	dmu_tx_t	*tx = NULL;
+	uint64_t	uid, gid;
+	sa_bulk_attr_t	bulk[4];
+	int		count;
+	int		err;
+
+	zap_cursor_init(&zc, os, dzp->z_id);
+	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
+		count = 0;
+		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
+			err = ENXIO;
+			break;
+		}
+
+		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
+		    ZEXISTS, NULL, NULL);
+		if (err == ENOENT)
+			goto next;
+		if (err)
+			break;
+
+		xip = ZTOI(zp);
+		if (zp->z_uid == dzp->z_uid &&
+		    zp->z_gid == dzp->z_gid &&
+		    zp->z_projid == dzp->z_projid)
+			goto next;
+
+		tx = dmu_tx_create(os);
+		if (!(zp->z_pflags & ZFS_PROJID))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+
+		err = dmu_tx_assign(tx, TXG_WAIT);
+		if (err)
+			break;
+
+		mutex_enter(&dzp->z_lock);
+
+		if (zp->z_uid != dxzp->z_uid) {
+			zp->z_uid = dzp->z_uid;
+			uid = zfs_uid_read(dzp);
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+			    &uid, sizeof (uid));
+		}
+
+		if (KGID_TO_SGID(zp->z_gid) != KGID_TO_SGID(dxzp->z_gid)) {
+			zp->z_gid = dzp->z_gid;
+			gid = zfs_gid_read(dzp);
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+			    &gid, sizeof (gid));
+		}
+
+		if (zp->z_projid != dzp->z_projid) {
+			if (!(zp->z_pflags & ZFS_PROJID)) {
+				zp->z_pflags |= ZFS_PROJID;
+				SA_ADD_BULK_ATTR(bulk, count,
+				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
+				    sizeof (zp->z_pflags));
+			}
+
+			zp->z_projid = dzp->z_projid;
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
+			    NULL, &zp->z_projid, sizeof (zp->z_projid));
+		}
+
+		mutex_exit(&dzp->z_lock);
+
+		if (likely(count > 0)) {
+			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+			dmu_tx_commit(tx);
+		} else {
+			dmu_tx_abort(tx);
+		}
+		tx = NULL;
+		if (err != 0 && err != ENOENT)
+			break;
+
+next:
+		if (zp) {
+			zrele(zp);
+			zp = NULL;
+			zfs_dirent_unlock(dl);
+		}
+		zap_cursor_advance(&zc);
+	}
+
+	if (tx)
+		dmu_tx_abort(tx);
+	if (zp) {
+		zrele(zp);
+		zfs_dirent_unlock(dl);
+	}
+	zap_cursor_fini(&zc);
+
+	return (err == ENOENT ? 0 : err);
+}
+#endif
+
+/*
+ * Set the file attributes to the values contained in the
+ * vattr structure.
+ *
+ *	IN:	zp	- znode of file to be modified.
+ *		vap	- new attribute values.
+ *			  If ATTR_XVATTR set, then optional attrs are being set
+ *		flags	- ATTR_UTIME set if non-default time values provided.
+ *			- ATTR_NOACLCHECK (CIFS context only).
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - ctime updated, mtime updated if size changed.
+ */
+/* ARGSUSED */
+int
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+{
+	struct vnode	*vp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	objset_t	*os = zfsvfs->z_os;
+	zilog_t		*zilog;
+	dmu_tx_t	*tx;
+	vattr_t		oldva;
+	xvattr_t	*tmpxvattr;
+	uint_t		mask = vap->va_mask;
+	uint_t		saved_mask = 0;
+	int		trim_mask = 0;
+	uint64_t	new_mode;
+	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
+	uint64_t	xattr_obj;
+	uint64_t	mtime[2], ctime[2], atime[2], crtime[2];
+	uint64_t	projid = ZFS_INVALID_PROJID;
+	znode_t		*attrzp;
+	int		need_policy = FALSE;
+	int		err, err2 = 0;
+	zfs_fuid_info_t *fuidp = NULL;
+	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
+	xoptattr_t	*xoap;
+	zfs_acl_t	*aclp;
+	boolean_t	fuid_dirtied = B_FALSE;
+	boolean_t	handle_eadir = B_FALSE;
+	sa_bulk_attr_t	*bulk, *xattr_bulk;
+	int		count = 0, xattr_count = 0, bulks = 9;
+
+	if (mask == 0)
+		return (0);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	vp = ZTOV(zp);
+
+	/*
+	 * If this is a xvattr_t, then get a pointer to the structure of
+	 * optional attributes.  If this is NULL, then we have a vattr_t.
+	 */
+	xoap = xva_getxoptattr(xvap);
+	if (xoap != NULL && (mask & ATTR_XVATTR)) {
+		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
+			if (!dmu_objset_projectquota_enabled(os) ||
+			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(ENOTSUP));
+			}
+
+			projid = xoap->xoa_projid;
+			if (unlikely(projid == ZFS_INVALID_PROJID)) {
+				ZFS_EXIT(zfsvfs);
+				return (SET_ERROR(EINVAL));
+			}
+
+			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
+				projid = ZFS_INVALID_PROJID;
+			else
+				need_policy = TRUE;
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
+		    (xoap->xoa_projinherit !=
+		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
+		    (!dmu_objset_projectquota_enabled(os) ||
+		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
+			ZFS_EXIT(zfsvfs);
+			return (SET_ERROR(ENOTSUP));
+		}
+	}
+
+	zilog = zfsvfs->z_log;
+
+	/*
+	 * Make sure that if we have ephemeral uid/gid or xvattr specified
+	 * that file system is at proper version level
+	 */
+
+	if (zfsvfs->z_use_fuids == B_FALSE &&
+	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
+	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
+	    (mask & ATTR_XVATTR))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (mask & ATTR_SIZE && S_ISDIR(zp->z_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EISDIR));
+	}
+
+	if (mask & ATTR_SIZE && !S_ISREG(zp->z_mode) && !S_ISFIFO(zp->z_mode)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
+	xva_init(tmpxvattr);
+
+	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
+	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
+
+	/*
+	 * Immutable files can only alter immutable bit and atime
+	 */
+	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
+	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
+	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
+		err = SET_ERROR(EPERM);
+		goto out3;
+	}
+
+	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
+		err = SET_ERROR(EPERM);
+		goto out3;
+	}
+
+	/*
+	 * Verify timestamps doesn't overflow 32 bits.
+	 * ZFS can handle large timestamps, but 32bit syscalls can't
+	 * handle times greater than 2039.  This check should be removed
+	 * once large timestamps are fully supported.
+	 */
+	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
+		if (((mask & ATTR_ATIME) &&
+		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
+		    ((mask & ATTR_MTIME) &&
+		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
+			err = SET_ERROR(EOVERFLOW);
+			goto out3;
+		}
+	}
+
+top:
+	attrzp = NULL;
+	aclp = NULL;
+
+	/* Can this be moved to before the top label? */
+	if (zfs_is_readonly(zfsvfs)) {
+		err = SET_ERROR(EROFS);
+		goto out3;
+	}
+
+	/*
+	 * First validate permissions
+	 */
+
+	if (mask & ATTR_SIZE) {
+		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr);
+		if (err)
+			goto out3;
+
+		/*
+		 * XXX - Note, we are not providing any open
+		 * mode flags here (like FNDELAY), so we may
+		 * block if there are locks present... this
+		 * should be addressed in openat().
+		 */
+		/* XXX - would it be OK to generate a log record here? */
+		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
+		if (err)
+			goto out3;
+	}
+
+	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
+	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
+	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
+	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
+	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
+	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
+	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
+	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
+		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
+		    B_FALSE, cr);
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		int	idmask = (mask & (ATTR_UID|ATTR_GID));
+		int	take_owner;
+		int	take_group;
+
+		/*
+		 * NOTE: even if a new mode is being set,
+		 * we may clear S_ISUID/S_ISGID bits.
+		 */
+
+		if (!(mask & ATTR_MODE))
+			vap->va_mode = zp->z_mode;
+
+		/*
+		 * Take ownership or chgrp to group we are a member of
+		 */
+
+		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+		take_group = (mask & ATTR_GID) &&
+		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
+
+		/*
+		 * If both ATTR_UID and ATTR_GID are set then take_owner and
+		 * take_group must both be set in order to allow taking
+		 * ownership.
+		 *
+		 * Otherwise, send the check through secpolicy_vnode_setattr()
+		 *
+		 */
+
+		if (((idmask == (ATTR_UID|ATTR_GID)) &&
+		    take_owner && take_group) ||
+		    ((idmask == ATTR_UID) && take_owner) ||
+		    ((idmask == ATTR_GID) && take_group)) {
+			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
+			    B_FALSE, cr) == 0) {
+				/*
+				 * Remove setuid/setgid for non-privileged users
+				 */
+				(void) secpolicy_setid_clear(vap, cr);
+				trim_mask = (mask & (ATTR_UID|ATTR_GID));
+			} else {
+				need_policy =  TRUE;
+			}
+		} else {
+			need_policy =  TRUE;
+		}
+	}
+
+	mutex_enter(&zp->z_lock);
+	oldva.va_mode = zp->z_mode;
+	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
+	if (mask & ATTR_XVATTR) {
+		/*
+		 * Update xvattr mask to include only those attributes
+		 * that are actually changing.
+		 *
+		 * the bits will be restored prior to actually setting
+		 * the attributes so the caller thinks they were set.
+		 */
+		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+			if (xoap->xoa_appendonly !=
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
+				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
+			if (xoap->xoa_projinherit !=
+			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
+				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+			if (xoap->xoa_nounlink !=
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
+				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+			if (xoap->xoa_immutable !=
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
+				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+			if (xoap->xoa_nodump !=
+			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_NODUMP);
+				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+			if (xoap->xoa_av_modified !=
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
+				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+			if ((!S_ISREG(zp->z_mode) &&
+			    xoap->xoa_av_quarantined) ||
+			    xoap->xoa_av_quarantined !=
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
+				need_policy = TRUE;
+			} else {
+				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
+				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
+			}
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+			mutex_exit(&zp->z_lock);
+			err = SET_ERROR(EPERM);
+			goto out3;
+		}
+
+		if (need_policy == FALSE &&
+		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
+		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
+			need_policy = TRUE;
+		}
+	}
+
+	mutex_exit(&zp->z_lock);
+
+	if (mask & ATTR_MODE) {
+		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, B_FALSE, cr) == 0) {
+			err = secpolicy_setid_setsticky_clear(vp, vap,
+			    &oldva, cr);
+			if (err)
+				goto out3;
+
+			trim_mask |= ATTR_MODE;
+		} else {
+			need_policy = TRUE;
+		}
+	}
+
+	if (need_policy) {
+		/*
+		 * If trim_mask is set then take ownership
+		 * has been granted or write_acl is present and user
+		 * has the ability to modify mode.  In that case remove
+		 * UID|GID and or MODE from mask so that
+		 * secpolicy_vnode_setattr() doesn't revoke it.
+		 */
+
+		if (trim_mask) {
+			saved_mask = vap->va_mask;
+			vap->va_mask &= ~trim_mask;
+		}
+		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
+		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+		if (err)
+			goto out3;
+
+		if (trim_mask)
+			vap->va_mask |= saved_mask;
+	}
+
+	/*
+	 * secpolicy_vnode_setattr, or take ownership may have
+	 * changed va_mask
+	 */
+	mask = vap->va_mask;
+
+	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
+		handle_eadir = B_TRUE;
+		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj, sizeof (xattr_obj));
+
+		if (err == 0 && xattr_obj) {
+			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
+			if (err)
+				goto out2;
+		}
+		if (mask & ATTR_UID) {
+			new_kuid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
+			if (new_kuid != zp->z_uid &&
+			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
+			    new_kuid)) {
+				if (attrzp)
+					zrele(attrzp);
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (mask & ATTR_GID) {
+			new_kgid = zfs_fuid_create(zfsvfs,
+			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
+			if (new_kgid != zp->z_gid &&
+			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
+			    new_kgid)) {
+				if (attrzp)
+					zrele(attrzp);
+				err = SET_ERROR(EDQUOT);
+				goto out2;
+			}
+		}
+
+		if (projid != ZFS_INVALID_PROJID &&
+		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
+			if (attrzp)
+				zrele(attrzp);
+			err = EDQUOT;
+			goto out2;
+		}
+	}
+	tx = dmu_tx_create(os);
+
+	if (mask & ATTR_MODE) {
+		uint64_t pmode = zp->z_mode;
+		uint64_t acl_obj;
+		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
+
+		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
+			goto out;
+
+		mutex_enter(&zp->z_lock);
+		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
+			/*
+			 * Are we upgrading ACL from old V0 format
+			 * to V1 format?
+			 */
+			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
+			    zfs_znode_acl_version(zp) ==
+			    ZFS_ACL_VERSION_INITIAL) {
+				dmu_tx_hold_free(tx, acl_obj, 0,
+				    DMU_OBJECT_END);
+				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+				    0, aclp->z_acl_bytes);
+			} else {
+				dmu_tx_hold_write(tx, acl_obj, 0,
+				    aclp->z_acl_bytes);
+			}
+		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, aclp->z_acl_bytes);
+		}
+		mutex_exit(&zp->z_lock);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	} else {
+		if (((mask & ATTR_XVATTR) &&
+		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
+		    (projid != ZFS_INVALID_PROJID &&
+		    !(zp->z_pflags & ZFS_PROJID)))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	}
+
+	if (attrzp) {
+		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
+	}
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err)
+		goto out;
+
+	count = 0;
+	/*
+	 * Set each attribute requested.
+	 * We group settings according to the locks they need to acquire.
+	 *
+	 * Note: you cannot set ctime directly, although it will be
+	 * updated as a side-effect of calling this function.
+	 */
+
+	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
+		/*
+		 * For the existed object that is upgraded from old system,
+		 * its on-disk layout has no slot for the project ID attribute.
+		 * But quota accounting logic needs to access related slots by
+		 * offset directly. So we need to adjust old objects' layout
+		 * to make the project ID to some unified and fixed offset.
+		 */
+		if (attrzp)
+			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
+		if (err == 0)
+			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
+
+		if (unlikely(err == EEXIST))
+			err = 0;
+		else if (err != 0)
+			goto out;
+		else
+			projid = ZFS_INVALID_PROJID;
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+		mutex_enter(&zp->z_acl_lock);
+	mutex_enter(&zp->z_lock);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+
+	if (attrzp) {
+		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+			mutex_enter(&attrzp->z_acl_lock);
+		mutex_enter(&attrzp->z_lock);
+		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
+		    sizeof (attrzp->z_pflags));
+		if (projid != ZFS_INVALID_PROJID) {
+			attrzp->z_projid = projid;
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
+			    sizeof (attrzp->z_projid));
+		}
+	}
+
+	if (mask & (ATTR_UID|ATTR_GID)) {
+
+		if (mask & ATTR_UID) {
+			new_uid = new_kuid;
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+			    &new_uid, sizeof (new_uid));
+			zp->z_uid = new_uid;
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
+				    sizeof (new_uid));
+				attrzp->z_uid = new_uid;
+			}
+		}
+
+		if (mask & ATTR_GID) {
+			new_gid = new_kgid;
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
+			    NULL, &new_gid, sizeof (new_gid));
+			zp->z_gid = new_gid;
+			if (attrzp) {
+				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
+				    sizeof (new_gid));
+				attrzp->z_gid = new_gid;
+			}
+		}
+		if (!(mask & ATTR_MODE)) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
+			    NULL, &new_mode, sizeof (new_mode));
+			new_mode = zp->z_mode;
+		}
+		err = zfs_acl_chown_setattr(zp);
+		ASSERT(err == 0);
+		if (attrzp) {
+			err = zfs_acl_chown_setattr(attrzp);
+			ASSERT(err == 0);
+		}
+	}
+
+	if (mask & ATTR_MODE) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+		    &new_mode, sizeof (new_mode));
+		zp->z_mode = new_mode;
+		ASSERT3P(aclp, !=, NULL);
+		err = zfs_aclset_common(zp, aclp, cr, tx);
+		ASSERT0(err);
+		if (zp->z_acl_cached)
+			zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = aclp;
+		aclp = NULL;
+	}
+
+	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
+		zp->z_atime_dirty = B_FALSE;
+		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+		    &atime, sizeof (atime));
+	}
+
+	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    mtime, sizeof (mtime));
+	}
+
+	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
+		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+	}
+
+	if (mask & ATTR_CRTIME) {
+		ZFS_TIME_ENCODE(&vap->va_crtime, crtime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL,
+		    crtime, sizeof (crtime));
+	}
+
+	if (projid != ZFS_INVALID_PROJID) {
+		zp->z_projid = projid;
+		SA_ADD_BULK_ATTR(bulk, count,
+		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
+		    sizeof (zp->z_projid));
+	}
+
+	if (attrzp && mask) {
+		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
+		    sizeof (ctime));
+	}
+
+	/*
+	 * Do this after setting timestamps to prevent timestamp
+	 * update from toggling bit
+	 */
+
+	if (xoap && (mask & ATTR_XVATTR)) {
+
+		/*
+		 * restore trimmed off masks
+		 * so that return masks can be set for caller.
+		 */
+
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
+			XVA_SET_REQ(xvap, XAT_APPENDONLY);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
+			XVA_SET_REQ(xvap, XAT_NOUNLINK);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
+			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
+			XVA_SET_REQ(xvap, XAT_NODUMP);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
+			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
+			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
+		}
+		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
+			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
+		}
+
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
+			ASSERT(S_ISREG(zp->z_mode));
+
+		zfs_xvattr_set(zp, xvap, tx);
+	}
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	if (mask != 0)
+		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
+
+	mutex_exit(&zp->z_lock);
+	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+		mutex_exit(&zp->z_acl_lock);
+
+	if (attrzp) {
+		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
+			mutex_exit(&attrzp->z_acl_lock);
+		mutex_exit(&attrzp->z_lock);
+	}
+out:
+	if (err == 0 && xattr_count > 0) {
+		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
+		    xattr_count, tx);
+		ASSERT(err2 == 0);
+	}
+
+	if (aclp)
+		zfs_acl_free(aclp);
+
+	if (fuidp) {
+		zfs_fuid_info_free(fuidp);
+		fuidp = NULL;
+	}
+
+	if (err) {
+		dmu_tx_abort(tx);
+		if (attrzp)
+			zrele(attrzp);
+		if (err == ERESTART)
+			goto top;
+	} else {
+		if (count > 0)
+			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		dmu_tx_commit(tx);
+		if (attrzp) {
+			zrele(attrzp);
+		}
+	}
+
+out2:
+	if (os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+out3:
+	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
+	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
+	kmem_free(tmpxvattr, sizeof (xvattr_t));
+	ZFS_EXIT(zfsvfs);
+	return (err);
+}
+
+typedef struct zfs_zlock {
+	krwlock_t	*zl_rwlock;	/* lock we acquired */
+	znode_t		*zl_znode;	/* znode we held */
+	struct zfs_zlock *zl_next;	/* next in list */
+} zfs_zlock_t;
+
+/*
+ * Drop locks and release vnodes that were held by zfs_rename_lock().
+ */
+static void
+zfs_rename_unlock(zfs_zlock_t **zlpp)
+{
+	zfs_zlock_t *zl;
+
+	while ((zl = *zlpp) != NULL) {
+		if (zl->zl_znode != NULL)
+			zfs_zrele_async(zl->zl_znode);
+		rw_exit(zl->zl_rwlock);
+		*zlpp = zl->zl_next;
+		kmem_free(zl, sizeof (*zl));
+	}
+}
+
+/*
+ * Search back through the directory tree, using the ".." entries.
+ * Lock each directory in the chain to prevent concurrent renames.
+ * Fail any attempt to move a directory into one of its own descendants.
+ * XXX - z_parent_lock can overlap with map or grow locks
+ */
+static int
+zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
+{
+	zfs_zlock_t	*zl;
+	znode_t		*zp = tdzp;
+	uint64_t	rootid = ZTOZSB(zp)->z_root;
+	uint64_t	oidp = zp->z_id;
+	krwlock_t	*rwlp = &szp->z_parent_lock;
+	krw_t		rw = RW_WRITER;
+
+	/*
+	 * First pass write-locks szp and compares to zp->z_id.
+	 * Later passes read-lock zp and compare to zp->z_parent.
+	 */
+	do {
+		if (!rw_tryenter(rwlp, rw)) {
+			/*
+			 * Another thread is renaming in this path.
+			 * Note that if we are a WRITER, we don't have any
+			 * parent_locks held yet.
+			 */
+			if (rw == RW_READER && zp->z_id > szp->z_id) {
+				/*
+				 * Drop our locks and restart
+				 */
+				zfs_rename_unlock(&zl);
+				*zlpp = NULL;
+				zp = tdzp;
+				oidp = zp->z_id;
+				rwlp = &szp->z_parent_lock;
+				rw = RW_WRITER;
+				continue;
+			} else {
+				/*
+				 * Wait for other thread to drop its locks
+				 */
+				rw_enter(rwlp, rw);
+			}
+		}
+
+		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
+		zl->zl_rwlock = rwlp;
+		zl->zl_znode = NULL;
+		zl->zl_next = *zlpp;
+		*zlpp = zl;
+
+		if (oidp == szp->z_id)		/* We're a descendant of szp */
+			return (SET_ERROR(EINVAL));
+
+		if (oidp == rootid)		/* We've hit the top */
+			return (0);
+
+		if (rw == RW_READER) {		/* i.e. not the first pass */
+			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
+			if (error)
+				return (error);
+			zl->zl_znode = zp;
+		}
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
+		    &oidp, sizeof (oidp));
+		rwlp = &zp->z_parent_lock;
+		rw = RW_READER;
+
+	} while (zp->z_id != sdzp->z_id);
+
+	return (0);
+}
+
+/*
+ * Move an entry from the provided source directory to the target
+ * directory.  Change the entry name as indicated.
+ *
+ *	IN:	sdzp	- Source directory containing the "old entry".
+ *		snm	- Old entry name.
+ *		tdzp	- Target directory to contain the "new entry".
+ *		tnm	- New entry name.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	sdzp,tdzp - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
+    cred_t *cr, int flags)
+{
+	znode_t		*szp, *tzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(sdzp);
+	zilog_t		*zilog;
+	uint64_t addtime[2];
+	zfs_dirlock_t	*sdl, *tdl;
+	dmu_tx_t	*tx;
+	zfs_zlock_t	*zl;
+	int		cmp, serr, terr;
+	int		error = 0;
+	int		zflg = 0;
+	boolean_t	waited = B_FALSE;
+
+	if (snm == NULL || tnm == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(sdzp);
+	zilog = zfsvfs->z_log;
+
+	ZFS_VERIFY_ZP(tdzp);
+
+	/*
+	 * We check i_sb because snapshots and the ctldir must have different
+	 * super blocks.
+	 */
+	// Can't we use zp->z_zfsvfs in place of zp->vp->v_vfs ?
+	if (VTOM(ZTOV(tdzp)) != VTOM(ZTOV(sdzp)) ||
+	    zfsctl_is_node(ZTOV(tdzp))) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(tnm,
+	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+
+top:
+	szp = NULL;
+	tzp = NULL;
+	zl = NULL;
+
+	/*
+	 * This is to prevent the creation of links into attribute space
+	 * by renaming a linked file into/outof an attribute directory.
+	 * See the comment in zfs_link() for why this is considered bad.
+	 */
+	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Lock source and target directory entries.  To prevent deadlock,
+	 * a lock ordering must be defined.  We lock the directory with
+	 * the smallest object id first, or if it's a tie, the one with
+	 * the lexically first name.
+	 */
+	if (sdzp->z_id < tdzp->z_id) {
+		cmp = -1;
+	} else if (sdzp->z_id > tdzp->z_id) {
+		cmp = 1;
+	} else {
+		/*
+		 * First compare the two name arguments without
+		 * considering any case folding.
+		 */
+		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
+
+		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
+		ASSERT(error == 0 || !zfsvfs->z_utf8);
+		if (cmp == 0) {
+			/*
+			 * POSIX: "If the old argument and the new argument
+			 * both refer to links to the same existing file,
+			 * the rename() function shall return successfully
+			 * and perform no other action."
+			 */
+			ZFS_EXIT(zfsvfs);
+			return (0);
+		}
+		/*
+		 * If the file system is case-folding, then we may
+		 * have some more checking to do.  A case-folding file
+		 * system is either supporting mixed case sensitivity
+		 * access or is completely case-insensitive.  Note
+		 * that the file system is always case preserving.
+		 *
+		 * In mixed sensitivity mode case sensitive behavior
+		 * is the default.  FIGNORECASE must be used to
+		 * explicitly request case insensitive behavior.
+		 *
+		 * If the source and target names provided differ only
+		 * by case (e.g., a request to rename 'tim' to 'Tim'),
+		 * we will treat this as a special case in the
+		 * case-insensitive mode: as long as the source name
+		 * is an exact match, we will allow this to proceed as
+		 * a name-change request.
+		 */
+		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
+		    flags & FIGNORECASE)) &&
+		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
+		    &error) == 0) {
+			/*
+			 * case preserving rename request, require exact
+			 * name matches
+			 */
+			zflg |= ZCIEXACT;
+			zflg &= ~ZCILOOK;
+		}
+	}
+
+	/*
+	 * If the source and destination directories are the same, we should
+	 * grab the z_name_lock of that directory only once.
+	 */
+	if (sdzp == tdzp) {
+		zflg |= ZHAVELOCK;
+		rw_enter(&sdzp->z_name_lock, RW_READER);
+	}
+
+	if (cmp < 0) {
+		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
+		    ZEXISTS | zflg, NULL, NULL);
+		terr = zfs_dirent_lock(&tdl,
+		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
+	} else {
+		terr = zfs_dirent_lock(&tdl,
+		    tdzp, tnm, &tzp, zflg, NULL, NULL);
+		serr = zfs_dirent_lock(&sdl,
+		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
+		    NULL, NULL);
+	}
+
+	if (serr) {
+		/*
+		 * Source entry invalid or not there.
+		 */
+		if (!terr) {
+			zfs_dirent_unlock(tdl);
+			if (tzp)
+				zrele(tzp);
+		}
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0)
+			serr = EINVAL;
+		ZFS_EXIT(zfsvfs);
+		return (serr);
+	}
+	if (terr) {
+		zfs_dirent_unlock(sdl);
+		zrele(szp);
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (strcmp(tnm, "..") == 0)
+			terr = EINVAL;
+		ZFS_EXIT(zfsvfs);
+		return (terr);
+	}
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow renames into our tree when the project
+	 * IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		error = SET_ERROR(EXDEV);
+		goto out;
+	}
+
+	/*
+	 * Must have write access at the source to remove the old entry
+	 * and write access at the target to create the new entry.
+	 * Note that if target and source are the same, this can be
+	 * done in a single check.
+	 */
+
+	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+		goto out;
+
+	if (S_ISDIR(szp->z_mode)) {
+		/*
+		 * Check to make sure rename is valid.
+		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
+		 */
+		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
+			goto out;
+	}
+
+	/*
+	 * Does target exist?
+	 */
+	if (tzp) {
+		/*
+		 * Source and target must be the same type.
+		 */
+		if (S_ISDIR(szp->z_mode)) {
+			if (!S_ISDIR(tzp->z_mode)) {
+				error = SET_ERROR(ENOTDIR);
+				goto out;
+			}
+		} else {
+			if (S_ISDIR(tzp->z_mode)) {
+				error = SET_ERROR(EISDIR);
+				goto out;
+			}
+		}
+		/*
+		 * POSIX dictates that when the source and target
+		 * entries refer to the same file object, rename
+		 * must do nothing and exit without error.
+		 */
+		if (szp->z_id == tzp->z_id) {
+			error = 0;
+			goto out;
+		}
+
+#if defined(MAC_OS_X_VERSION_10_12) &&	\
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12)
+		/* If renamex(VFS_RENAME_EXCL) is used, error out */
+		if (flags & VFS_RENAME_EXCL) {
+			error = EEXIST;
+			goto out;
+		}
+#endif
+
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
+	if (sdzp != tdzp) {
+		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tdzp);
+	}
+	if (tzp) {
+		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tzp);
+	}
+
+	zfs_sa_upgrade_txholds(tx, szp);
+	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		if (zl != NULL)
+			zfs_rename_unlock(&zl);
+		zfs_dirent_unlock(sdl);
+		zfs_dirent_unlock(tdl);
+
+		if (sdzp == tdzp)
+			rw_exit(&sdzp->z_name_lock);
+
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			zrele(szp);
+			if (tzp)
+				zrele(tzp);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		zrele(szp);
+		if (tzp)
+			zrele(tzp);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (tzp)	/* Attempt to remove the existing target */
+		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+
+	if (error == 0) {
+		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+		if (error == 0) {
+			szp->z_pflags |= ZFS_AV_MODIFIED;
+			if (tdzp->z_pflags & ZFS_PROJINHERIT)
+				szp->z_pflags |= ZFS_PROJINHERIT;
+
+			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+			ASSERT0(error);
+
+			/*
+			 * If we moved an entry into a different directory
+			 * (sdzp != tdzp) then we also need to update ADDEDTIME
+			 * (ADDTIME) property for FinderInfo. We are already
+			 * inside error == 0 conditional
+			 */
+			if ((sdzp != tdzp) &&
+			    zfsvfs->z_use_sa == B_TRUE) {
+				timestruc_t now;
+				gethrestime(&now);
+				ZFS_TIME_ENCODE(&now, addtime);
+				error = sa_update(szp->z_sa_hdl,
+				    SA_ZPL_ADDTIME(zfsvfs), (void *)&addtime,
+				    sizeof (addtime), tx);
+			}
+
+			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+			if (error == 0) {
+				zfs_log_rename(zilog, tx, TX_RENAME |
+				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
+				    sdl->dl_name, tdzp, tdl->dl_name, szp);
+
+				/*
+				 * Update cached name - for vget, and access
+				 * without calling vnop_lookup first - it is
+				 * easier to clear it out and let getattr
+				 * look it up if needed.
+				 */
+				if (tzp) {
+					mutex_enter(&tzp->z_lock);
+					tzp->z_name_cache[0] = 0;
+					mutex_exit(&tzp->z_lock);
+				}
+				if (szp) {
+					mutex_enter(&szp->z_lock);
+					szp->z_name_cache[0] = 0;
+					mutex_exit(&szp->z_lock);
+				}
+
+			} else {
+				/*
+				 * At this point, we have successfully created
+				 * the target name, but have failed to remove
+				 * the source name.  Since the create was done
+				 * with the ZRENAMING flag, there are
+				 * complications; for one, the link count is
+				 * wrong.  The easiest way to deal with this
+				 * is to remove the newly created target, and
+				 * return the original error.  This must
+				 * succeed; fortunately, it is very unlikely to
+				 * fail, since we just created it.
+				 */
+				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
+				    ZRENAMING, NULL), ==, 0);
+			}
+		} else {
+			/*
+			 * If we had removed the existing target, subsequent
+			 * call to zfs_link_create() to add back the same entry
+			 * but, the new dnode (szp) should not fail.
+			 */
+			ASSERT(tzp == NULL);
+		}
+	}
+
+	dmu_tx_commit(tx);
+out:
+	if (zl != NULL)
+		zfs_rename_unlock(&zl);
+
+	zfs_dirent_unlock(sdl);
+	zfs_dirent_unlock(tdl);
+
+	if (sdzp == tdzp)
+		rw_exit(&sdzp->z_name_lock);
+
+	zrele(szp);
+	if (tzp) {
+		zrele(tzp);
+	}
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Insert the indicated symbolic reference entry into the directory.
+ *
+ *	IN:	dzp	- Directory to contain new symbolic link.
+ *		name	- Name of directory entry in dip.
+ *		vap	- Attributes of new entry.
+ *		link	- Name for new symlink entry.
+ *		cr	- credentials of caller.
+ *		flags	- case flags
+ *
+ *	OUT:	zpp	- Znode for new symbolic link.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	dip - ctime|mtime updated
+ */
+/*ARGSUSED*/
+int
+zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
+    znode_t **zpp, cred_t *cr, int flags)
+{
+	znode_t		*zp;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
+	zilog_t		*zilog;
+	uint64_t	len = strlen(link);
+	int		error;
+	int		zflg = ZNEW;
+	zfs_acl_ids_t	acl_ids;
+	boolean_t	fuid_dirtied;
+	uint64_t	txtype = TX_SYMLINK;
+	boolean_t	waited = B_FALSE;
+
+	ASSERT(S_ISLNK(vap->va_mode));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
+	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zflg |= ZCILOOK;
+
+	if (len > MAXPATHLEN) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(ENAMETOOLONG));
+	}
+
+	if ((error = zfs_acl_ids_create(dzp, 0,
+	    vap, cr, NULL, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+top:
+	*zpp = NULL;
+
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
+	if (error) {
+		zfs_acl_ids_free(&acl_ids);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EDQUOT));
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE + len);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		zfs_acl_ids_free(&acl_ids);
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	/*
+	 * Create a new object for the symlink.
+	 * for version 4 ZPL datsets the symlink will be an SA attribute
+	 */
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_is_sa)
+		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
+		    link, len, tx);
+	else
+		zfs_sa_symlink(zp, link, len, tx);
+	mutex_exit(&zp->z_lock);
+
+	zp->z_size = len;
+	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx);
+	/*
+	 * Insert the new object into the directory.
+	 */
+	error = zfs_link_create(dl, zp, tx, ZNEW);
+	if (error != 0) {
+		zfs_znode_delete(zp, tx);
+	} else {
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
+	}
+
+	zfs_acl_ids_free(&acl_ids);
+
+	dmu_tx_commit(tx);
+
+	zfs_dirent_unlock(dl);
+
+	/*
+	 * OS X - attach the vnode _after_ committing the transaction
+	 */
+	zfs_znode_getvnode(zp, zfsvfs);
+
+	if (error == 0) {
+		*zpp = zp;
+
+		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+			zil_commit(zilog, 0);
+	} else {
+		zrele(zp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Return, in the buffer contained in the provided uio structure,
+ * the symbolic path referred to by ip.
+ *
+ *	IN:	ip	- inode of symbolic link
+ *		uio	- structure to contain the link path.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	ip - atime updated
+ */
+/* ARGSUSED */
+int
+zfs_readlink(struct vnode *vp, uio_t *uio, cred_t *cr)
+{
+	znode_t		*zp = VTOZ(vp);
+	zfsvfs_t	*zfsvfs = ITOZSB(vp);
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_is_sa)
+		error = sa_lookup_uio(zp->z_sa_hdl,
+		    SA_ZPL_SYMLINK(zfsvfs), uio);
+	else
+		error = zfs_sa_readlink(zp, uio);
+	mutex_exit(&zp->z_lock);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * Insert a new entry into directory tdzp referencing szp.
+ *
+ *	IN:	tdzp	- Directory to contain new entry.
+ *		szp	- znode of new entry.
+ *		name	- name of new entry.
+ *		cr	- credentials of caller.
+ *		flags	- case flags.
+ *
+ *	RETURN:	0 if success
+ *		error code if failure
+ *
+ * Timestamps:
+ *	tdzp - ctime|mtime updated
+ *	 szp - ctime updated
+ */
+/* ARGSUSED */
+int
+zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
+    int flags)
+{
+	struct vnode *svp = ZTOV(szp);
+	znode_t		*tzp;
+	zfsvfs_t	*zfsvfs = ZTOZSB(tdzp);
+	zilog_t		*zilog;
+	zfs_dirlock_t	*dl;
+	dmu_tx_t	*tx;
+	int		error;
+	int		zf = ZNEW;
+	uint64_t	parent;
+	uid_t		owner;
+	boolean_t	waited = B_FALSE;
+	boolean_t	is_tmpfile = 0;
+	uint64_t	txg;
+
+	ASSERT(S_ISDIR(tdzp->z_mode));
+
+	if (name == NULL)
+		return (SET_ERROR(EINVAL));
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(tdzp);
+	zilog = zfsvfs->z_log;
+
+#ifdef __APPLE__
+	if (VTOM(svp) != VTOM(ZTOV(tdzp))) {
+		ZFS_EXIT(zfsvfs);
+		return (EXDEV);
+	}
+#endif
+
+	/*
+	 * POSIX dictates that we return EPERM here.
+	 * Better choices include ENOTSUP or EISDIR.
+	 */
+	if (vnode_isdir(svp)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	ZFS_VERIFY_ZP(szp);
+
+	/*
+	 * If we are using project inheritance, means if the directory has
+	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
+	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
+	 * such case, we only allow hard link creation in our tree when the
+	 * project IDs are the same.
+	 */
+	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
+	    tdzp->z_projid != szp->z_projid) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EXDEV));
+	}
+
+	/* Prevent links to .zfs/shares files */
+
+	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (uint64_t))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+	if (parent == zfsvfs->z_shares_dir) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if (zfsvfs->z_utf8 && u8_validate(name,
+	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EILSEQ));
+	}
+	if (flags & FIGNORECASE)
+		zf |= ZCILOOK;
+
+	/*
+	 * We do not support links between attributes and non-attributes
+	 * because of the potential security risk of creating links
+	 * into "normal" file space in order to circumvent restrictions
+	 * imposed in attribute space.
+	 */
+	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(szp->z_uid),
+	    cr, ZFS_OWNER);
+	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EPERM));
+	}
+
+	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+top:
+	/*
+	 * Attempt to lock directory; fail if entry already exists.
+	 */
+	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
+	if (error) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
+	if (is_tmpfile)
+		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+	zfs_sa_upgrade_txholds(tx, szp);
+	zfs_sa_upgrade_txholds(tx, tdzp);
+	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			waited = B_TRUE;
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	error = zfs_link_create(dl, szp, tx, 0);
+
+	if (error == 0) {
+		uint64_t txtype = TX_LINK;
+		if (flags & FIGNORECASE)
+			txtype |= TX_CI;
+		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
+	} else if (is_tmpfile) {
+		/* restore z_unlinked since when linking failed */
+		szp->z_unlinked = B_TRUE;
+	}
+	txg = dmu_tx_get_txg(tx);
+	dmu_tx_commit(tx);
+
+	zfs_dirent_unlock(dl);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*ARGSUSED*/
+void
+zfs_inactive(struct vnode *vp)
+{
+	znode_t	*zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ITOZSB(vp);
+	int error;
+
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+	if (zp->z_sa_hdl == NULL) {
+		/*
+		 * The fs has been unmounted, or we did a
+		 * suspend/resume and this file no longer exists.
+		 */
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		vnode_recycle(vp);
+		return;
+	}
+
+	if (zp->z_unlinked) {
+		/*
+		 * Fast path to recycle a vnode of a removed file.
+		 */
+		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		vnode_recycle(vp);
+		return;
+	}
+
+	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
+		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
+
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, zp);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
+			zp->z_atime_dirty = 0;
+			dmu_tx_commit(tx);
+		}
+	}
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+}
+
+static int
+zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
+    caller_context_t *ct)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error;
+	// boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	boolean_t skipaclchk = B_FALSE;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+
+int
+zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error;
+	// boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	boolean_t skipaclchk = B_FALSE;
+	zilog_t *zilog = zfsvfs->z_log;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
+
+	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+		zil_commit(zilog, 0);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+
+/*
+ * Free or allocate space in a file.  Currently, this function only
+ * supports the `F_FREESP' command.  However, this command is somewhat
+ * misnamed, as its functionality includes the ability to allocate as
+ * well as free space.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		cmd	- action to take (only F_FREESP supported).
+ *		bfp	- section of file to free/alloc.
+ *		flag	- current file open mode flags.
+ *		offset	- current file offset.
+ *		cr	- credentials of caller.
+ *
+ *	RETURN:	0 on success, error code on failure.
+ *
+ * Timestamps:
+ *	zp - ctime|mtime updated
+ */
+/* ARGSUSED */
+int
+zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
+    offset_t offset, cred_t *cr)
+{
+	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
+	uint64_t	off, len;
+	int		error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (cmd != F_FREESP) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Callers might not be able to detect properly that we are read-only,
+	 * so check it explicitly here.
+	 */
+	if (zfs_is_readonly(zfsvfs)) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EROFS));
+	}
+
+	if (bfp->l_len < 0) {
+		ZFS_EXIT(zfsvfs);
+		return (SET_ERROR(EINVAL));
+	}
+
+	/*
+	 * Permissions aren't checked on Solaris because on this OS
+	 * zfs_space() can only be called with an opened file handle.
+	 * On Linux we can get here through truncate_range() which
+	 * operates directly on inodes, so we need to check access rights.
+	 */
+	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	off = bfp->l_start;
+	len = bfp->l_len; /* 0 means from off to end of file */
+
+	error = zfs_freesp(zp, off, len, flag, TRUE);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
diff --git a/module/os/macos/zfs/zfs_vnops_osx.c b/module/os/macos/zfs/zfs_vnops_osx.c
new file mode 100644
index 0000000000..341b7e9463
--- /dev/null
+++ b/module/os/macos/zfs/zfs_vnops_osx.c
@@ -0,0 +1,5281 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013 Will Andrews <will@firepipe.net>
+ * Copyright (c) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+
+/*
+ * XXX GENERAL COMPATIBILITY ISSUES
+ *
+ * 'name' is a common argument, but in OS X (and FreeBSD), we need to pass
+ * the componentname pointer, so other things can use them.  We should
+ * change the 'name' argument to be an opaque name pointer, and define
+ * OS-dependent macros that yield the desired results when needed.
+ *
+ * On OS X, VFS performs access checks before calling anything, so
+ * zfs_zaccess_* calls are not used.  Not true on FreeBSD, though.  Perhaps
+ * those calls should be conditionally #if 0'd?
+ *
+ * On OS X, VFS & I/O objects are often opaque, e.g. uio_t and struct vnode
+ * require using functions to access elements of an object.  Should convert
+ * the Solaris code to use macros on other platforms.
+ *
+ * OS X and FreeBSD appear to use similar zfs-vfs interfaces; see Apple's
+ * comment in zfs_remove() about the fact that VFS holds the last ref while
+ * in Solaris it's the ZFS code that does.  On FreeBSD, the code Apple
+ * refers to here results in a panic if the branch is actually taken.
+ *
+ */
+
+#include <sys/cred.h>
+#include <sys/vnode.h>
+#include <miscfs/fifofs/fifo.h>
+#include <miscfs/specfs/specdev.h>
+#include <vfs/vfs_support.h>
+#include <sys/ioccom.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/dbuf.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/zfs_vnops.h>
+#include <sys/vfs.h>
+#include <sys/zfs_vfsops.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_ctldir.h>
+
+#include <sys/xattr.h>
+#include <sys/utfconv.h>
+#include <sys/ubc.h>
+#include <sys/callb.h>
+#include <sys/unistd.h>
+
+
+
+
+#ifdef _KERNEL
+#include <sys/sysctl.h>
+#include <sys/hfs_internal.h>
+
+unsigned int debug_vnop_osx_printf = 0;
+unsigned int zfs_vnop_ignore_negatives = 0;
+unsigned int zfs_vnop_ignore_positives = 0;
+unsigned int zfs_vnop_create_negatives = 1;
+#endif
+
+#define	DECLARE_CRED(ap) \
+	cred_t *cr = (cred_t *)vfs_context_ucred((ap)->a_context)
+#define	DECLARE_CONTEXT(ap) \
+	caller_context_t *ct = (caller_context_t *)(ap)->a_context
+#define	DECLARE_CRED_AND_CONTEXT(ap)	\
+	DECLARE_CRED(ap);		\
+	DECLARE_CONTEXT(ap)
+
+#undef dprintf
+#define	dprintf printf
+
+/* Empty FinderInfo struct */
+static u_int32_t emptyfinfo[8] = {0};
+
+/*
+ * zfs vfs operations.
+ */
+static struct vfsops zfs_vfsops_template = {
+	zfs_vfs_mount,
+	zfs_vfs_start,
+	zfs_vfs_unmount,
+	zfs_vfs_root,
+	zfs_vfs_quotactl,
+	zfs_vfs_getattr,
+	zfs_vfs_sync,
+	zfs_vfs_vget,
+	zfs_vfs_fhtovp,
+	zfs_vfs_vptofh,
+	zfs_vfs_init,
+	zfs_vfs_sysctl,
+	zfs_vfs_setattr,
+#if defined(MAC_OS_X_VERSION_10_12) &&	\
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12)
+	NULL, /* vfs_ioctl */
+	NULL, /* vfs_vget_snapdir */
+	NULL
+#else
+	{NULL}
+#endif
+};
+
+#define	ZFS_VNOP_TBL_CNT	6
+
+static struct vnodeopv_desc *zfs_vnodeop_opv_desc_list[ZFS_VNOP_TBL_CNT] =
+{
+	&zfs_dvnodeop_opv_desc,
+	&zfs_fvnodeop_opv_desc,
+	&zfs_symvnodeop_opv_desc,
+	&zfs_xdvnodeop_opv_desc,
+	&zfs_fifonodeop_opv_desc,
+	&zfs_ctldir_opv_desc,
+};
+
+static vfstable_t zfs_vfsconf;
+
+int
+zfs_vnop_removexattr_int(zfsvfs_t *zfsvfs, znode_t *zp, const char *name,
+    cred_t *cr);
+
+int
+zfs_vfs_init(__unused struct vfsconf *vfsp)
+{
+	return (0);
+}
+
+int
+zfs_vfs_start(__unused struct mount *mp, __unused int flags,
+    __unused vfs_context_t context)
+{
+	return (0);
+}
+
+int
+zfs_vfs_quotactl(__unused struct mount *mp, __unused int cmds,
+    __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
+{
+	dprintf("%s ENOTSUP\n", __func__);
+	return (ENOTSUP);
+}
+
+static kmutex_t		zfs_findernotify_lock;
+static kcondvar_t	zfs_findernotify_thread_cv;
+static boolean_t	zfs_findernotify_thread_exit;
+
+#define	VNODE_EVENT_ATTRIB	0x00000008
+
+static int
+zfs_findernotify_callback(mount_t mp, __unused void *arg)
+{
+	/* Do some quick checks to see if it is ZFS */
+	struct vfsstatfs *vsf = vfs_statfs(mp);
+
+	// Filesystem ZFS?
+	if (vsf->f_fssubtype == MNTTYPE_ZFS_SUBTYPE) {
+		vfs_context_t kernelctx = spl_vfs_context_kernel();
+		struct vnode *rootvp, *vp;
+
+		/*
+		 * Since potentially other filesystems could be using "our"
+		 * fssubtype, and we don't always announce as "zfs" due to
+		 * hfs-mimic requirements, we have to make extra care here to
+		 * make sure this "mp" really is ZFS.
+		 */
+		zfsvfs_t *zfsvfs;
+
+		zfsvfs = vfs_fsprivate(mp);
+
+		/*
+		 * The first entry in struct zfsvfs is the vfs ptr, so they
+		 * should be equal if it is ZFS
+		 */
+		if (!zfsvfs ||
+		    (mp != zfsvfs->z_vfs))
+			return (VFS_RETURNED);
+
+		/* Guard against unmount */
+		ZFS_ENTER_ERROR(zfsvfs, VFS_RETURNED);
+
+		/* Check if space usage has changed enough to bother updating */
+		uint64_t refdbytes, availbytes, usedobjs, availobjs;
+		uint64_t delta;
+		dmu_objset_space(zfsvfs->z_os,
+		    &refdbytes, &availbytes, &usedobjs, &availobjs);
+		if (availbytes >= zfsvfs->z_findernotify_space) {
+			delta = availbytes - zfsvfs->z_findernotify_space;
+		} else {
+			delta = zfsvfs->z_findernotify_space - availbytes;
+		}
+
+#define	ZFS_FINDERNOTIFY_THRESHOLD (1ULL<<20)
+
+		/* Under the limit ? */
+		if (delta <= ZFS_FINDERNOTIFY_THRESHOLD) goto out;
+
+		/* Over threadhold, so we will notify finder, remember value */
+		zfsvfs->z_findernotify_space = availbytes;
+
+		/* If old value is zero (first run), don't bother */
+		if (availbytes == delta)
+			goto out;
+
+		dprintf("ZFS: findernotify %p space delta %llu\n", mp, delta);
+
+		// Grab the root zp
+		if (!VFS_ROOT(mp, 0, &rootvp)) {
+
+			struct componentname cn;
+			char *tmpname = ".fseventsd";
+
+			bzero(&cn, sizeof (cn));
+			cn.cn_nameiop = LOOKUP;
+			cn.cn_flags = ISLASTCN;
+			// cn.cn_context = kernelctx;
+			cn.cn_pnbuf = tmpname;
+			cn.cn_pnlen = sizeof (tmpname);
+			cn.cn_nameptr = cn.cn_pnbuf;
+			cn.cn_namelen = strlen(tmpname);
+
+			// Attempt to lookup .Trashes
+			if (!VOP_LOOKUP(rootvp, &vp, &cn, kernelctx)) {
+
+				// Send the event to wake up Finder
+				struct vnode_attr vattr;
+				// Also calls VATTR_INIT
+				spl_vfs_get_notify_attributes(&vattr);
+				// Fill in vap
+				vnode_getattr(vp, &vattr, kernelctx);
+				// Send event
+				spl_vnode_notify(vp, VNODE_EVENT_ATTRIB,
+				    &vattr);
+
+				// Cleanup vp
+				vnode_put(vp);
+
+			} // VNOP_LOOKUP
+
+			// Cleanup rootvp
+			vnode_put(rootvp);
+
+		} // VFS_ROOT
+
+out:
+		ZFS_EXIT(zfsvfs);
+
+	} // SUBTYPE_ZFS
+
+	return (VFS_RETURNED);
+}
+
+
+static void
+zfs_findernotify_thread(void *notused)
+{
+	callb_cpr_t		cpr;
+
+	dprintf("ZFS: findernotify thread start\n");
+	CALLB_CPR_INIT(&cpr, &zfs_findernotify_lock, callb_generic_cpr, FTAG);
+
+	mutex_enter(&zfs_findernotify_lock);
+	while (!zfs_findernotify_thread_exit) {
+
+		/* Sleep 32 seconds */
+		CALLB_CPR_SAFE_BEGIN(&cpr);
+		(void) cv_timedwait(&zfs_findernotify_thread_cv,
+		    &zfs_findernotify_lock, ddi_get_lbolt() + (hz<<5));
+		CALLB_CPR_SAFE_END(&cpr, &zfs_findernotify_lock);
+
+		if (!zfs_findernotify_thread_exit)
+			vfs_iterate(LK_NOWAIT, zfs_findernotify_callback, NULL);
+
+	}
+
+	zfs_findernotify_thread_exit = FALSE;
+	cv_broadcast(&zfs_findernotify_thread_cv);
+	CALLB_CPR_EXIT(&cpr);		/* drops arc_reclaim_lock */
+	dprintf("ZFS: findernotify thread exit\n");
+	thread_exit();
+}
+
+void
+zfs_start_notify_thread(void)
+{
+	mutex_init(&zfs_findernotify_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&zfs_findernotify_thread_cv, NULL, CV_DEFAULT, NULL);
+	zfs_findernotify_thread_exit = FALSE;
+	(void) thread_create(NULL, 0, zfs_findernotify_thread, NULL, 0, &p0,
+	    TS_RUN, minclsyspri);
+}
+
+
+void
+zfs_stop_notify_thread(void)
+{
+	mutex_enter(&zfs_findernotify_lock);
+	zfs_findernotify_thread_exit = TRUE;
+	/*
+	 * The reclaim thread will set arc_reclaim_thread_exit back to
+	 * FALSE when it is finished exiting; we're waiting for that.
+	 */
+	while (zfs_findernotify_thread_exit) {
+		cv_signal(&zfs_findernotify_thread_cv);
+		cv_wait(&zfs_findernotify_thread_cv, &zfs_findernotify_lock);
+	}
+	mutex_exit(&zfs_findernotify_lock);
+	mutex_destroy(&zfs_findernotify_lock);
+	cv_destroy(&zfs_findernotify_thread_cv);
+}
+
+int
+zfs_vfs_sysctl(int *name, __unused uint_t namelen, user_addr_t oldp,
+    size_t *oldlenp, user_addr_t newp, size_t newlen,
+    __unused vfs_context_t context)
+{
+#if 0
+	int error;
+	switch (name[0]) {
+	case ZFS_SYSCTL_FOOTPRINT: {
+		zfs_footprint_stats_t *footprint;
+		size_t copyinsize;
+		size_t copyoutsize;
+		int max_caches;
+		int act_caches;
+
+		if (newp) {
+			return (EINVAL);
+		}
+		if (!oldp) {
+			*oldlenp = sizeof (zfs_footprint_stats_t);
+			return (0);
+		}
+		copyinsize = *oldlenp;
+		if (copyinsize < sizeof (zfs_footprint_stats_t)) {
+			*oldlenp = sizeof (zfs_footprint_stats_t);
+			return (ENOMEM);
+		}
+		footprint = kmem_alloc(copyinsize, KM_SLEEP);
+
+		max_caches = copyinsize - sizeof (zfs_footprint_stats_t);
+		max_caches += sizeof (kmem_cache_stats_t);
+		max_caches /= sizeof (kmem_cache_stats_t);
+
+		footprint->version = ZFS_FOOTPRINT_VERSION;
+
+		footprint->memory_stats.current = zfs_footprint.current;
+		footprint->memory_stats.target = zfs_footprint.target;
+		footprint->memory_stats.highest = zfs_footprint.highest;
+		footprint->memory_stats.maximum = zfs_footprint.maximum;
+
+		arc_get_stats(&footprint->arc_stats);
+
+		kmem_cache_stats(&footprint->cache_stats[0], max_caches,
+		    &act_caches);
+		footprint->caches_count = act_caches;
+		footprint->thread_count = zfs_threads;
+
+		copyoutsize = sizeof (zfs_footprint_stats_t) +
+		    ((act_caches - 1) * sizeof (kmem_cache_stats_t));
+
+		error = ddi_copyout(footprint, oldp, copyoutsize, 0);
+
+		kmem_free(footprint, copyinsize);
+
+		return (error);
+	}
+
+	case ZFS_SYSCTL_CONFIG_DEBUGMSG:
+		error = sysctl_int(oldp, oldlenp, newp, newlen,
+		    &zfs_msg_buf_enabled);
+		return (error);
+
+	case ZFS_SYSCTL_CONFIG_zdprintf:
+#ifdef ZFS_DEBUG
+		error = sysctl_int(oldp, oldlenp, newp, newlen,
+		    &zfs_zdprintf_enabled);
+#else
+		error = ENOTSUP;
+#endif
+		return (error);
+	}
+#endif
+	return (ENOTSUP);
+}
+
+/*
+ * All these functions could be declared as 'static' but to assist with
+ * dtrace debugging, we do not.
+ */
+
+int
+zfs_vnop_open(struct vnop_open_args *ap)
+#if 0
+	struct vnop_open_args {
+		struct vnode	*a_vp;
+		int		a_mode;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	int err = 0;
+
+	err = zfs_open(ap->a_vp, ap->a_mode, 0, cr);
+
+	if (err) dprintf("zfs_open() failed %d\n", err);
+	return (err);
+}
+
+int
+zfs_vnop_close(struct vnop_close_args *ap)
+#if 0
+	struct vnop_close_args {
+		struct vnode	*a_vp;
+		int		a_fflag;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	int count = 1;
+//	int offset = 0;
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+
+	return (zfs_close(ap->a_vp, ap->a_fflag, cr));
+}
+
+int
+zfs_vnop_ioctl(struct vnop_ioctl_args *ap)
+#if 0
+	struct vnop_ioctl_args {
+		struct vnode	*a_vp;
+		ulong_t		a_command;
+		caddr_t		a_data;
+		int		a_fflag;
+		kauth_cred_t	a_cred;
+		struct proc	*a_p;
+	};
+#endif
+{
+	/* OS X has no use for zfs_ioctl(). */
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error = 0;
+	DECLARE_CRED_AND_CONTEXT(ap);
+
+	dprintf("vnop_ioctl %08lx: VTYPE %d\n", ap->a_command,
+			vnode_vtype(ZTOV(zp)));
+
+	ZFS_ENTER(zfsvfs);
+	if (IFTOVT((mode_t)zp->z_mode) == VFIFO) {
+		dprintf("ZFS: FIFO ioctl  %02lx ('%lu' + %lu)\n",
+			ap->a_command, (ap->a_command&0xff00)>>8,
+		    ap->a_command&0xff);
+		error = fifo_ioctl(ap);
+		error = 0;
+		ZFS_EXIT(zfsvfs);
+		goto out;
+	}
+
+	if ((IFTOVT((mode_t)zp->z_mode) == VBLK) ||
+		(IFTOVT((mode_t)zp->z_mode) == VCHR)) {
+		dprintf("ZFS: spec ioctl  %02lx ('%lu' + %lu)\n",
+		    ap->a_command, (ap->a_command&0xff00)>>8,
+		    ap->a_command&0xff);
+		error = spec_ioctl(ap);
+		ZFS_EXIT(zfsvfs);
+		goto out;
+	}
+	ZFS_EXIT(zfsvfs);
+
+	switch (ap->a_command) {
+
+		/* ioctl supported by ZFS and POSIX */
+
+		case F_FULLFSYNC:
+			dprintf("%s F_FULLFSYNC\n", __func__);
+#ifdef F_BARRIERFSYNC
+		case F_BARRIERFSYNC:
+			dprintf("%s F_BARRIERFSYNC\n", __func__);
+#endif
+			error = zfs_fsync(VTOZ(ap->a_vp), /* flag */0, cr);
+			break;
+
+		case F_CHKCLEAN:
+			dprintf("%s F_CHKCLEAN\n", __func__);
+			/*
+			 * normally calls http://fxr.watson.org/fxr/source/bsd/
+			 * vfs/vfs_cluster.c?v=xnu-2050.18.24#L5839
+			 */
+			/* XXX Why don't we? */
+			off_t fsize = zp->z_size;
+			error = is_file_clean(ap->a_vp, fsize);
+			break;
+
+		case F_RDADVISE:
+			dprintf("%s F_RDADVISE\n", __func__);
+			uint64_t file_size;
+			struct radvisory *ra;
+			int len;
+
+			ra = (struct radvisory *)(ap->a_data);
+
+			file_size = zp->z_size;
+			len = ra->ra_count;
+
+			/* XXX Check request size */
+			if (ra->ra_offset > file_size) {
+				dprintf("invalid request offset\n");
+				error = EFBIG;
+				break;
+			}
+
+			if ((ra->ra_offset + len) > file_size) {
+				len = file_size - ra->ra_offset;
+				dprintf("%s truncating F_RDADVISE from"
+				    " %08x -> %08x\n", __func__,
+				    ra->ra_count, len);
+			}
+
+			/*
+			 * Rather than advisory_read (which calls
+			 * cluster_io->VNOP_BLOCKMAP), prefetch
+			 * the level 0 metadata and level 1 data
+			 * at the requested offset + length.
+			 */
+			// error = advisory_read(ap->a_vp, file_size,
+			//    ra->ra_offset, len);
+			dmu_prefetch(zfsvfs->z_os, zp->z_id,
+			    0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+			dmu_prefetch(zfsvfs->z_os, zp->z_id,
+			    1, ra->ra_offset, len,
+			    ZIO_PRIORITY_SYNC_READ);
+#if 0
+	{
+		const char *name = vnode_getname(ap->a_vp);
+		printf("%s F_RDADVISE: prefetch issued for "
+		    "[%s](0x%016llx) (0x%016llx 0x%08x)\n", __func__,
+		    (name ? name : ""), zp->z_id,
+		    ra->ra_offset, len);
+		if (name) vnode_putname(name);
+	}
+#endif
+
+			break;
+
+		case SPOTLIGHT_GET_MOUNT_TIME:
+		case SPOTLIGHT_IOC_GET_MOUNT_TIME:
+		case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+			dprintf("%s SPOTLIGHT_GET_MOUNT_TIME\n", __func__);
+			*(uint32_t *)ap->a_data = zfsvfs->z_mount_time;
+			break;
+		case SPOTLIGHT_GET_UNMOUNT_TIME:
+			dprintf("%s SPOTLIGHT_GET_UNMOUNT_TIME\n", __func__);
+			*(uint32_t *)ap->a_data = zfsvfs->z_last_unmount_time;
+			break;
+		case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+		case SPOTLIGHT_IOC_GET_LAST_MTIME:
+			dprintf("%s SPOTLIGHT_FSCTL_GET_LAST_MTIME\n",
+			    __func__);
+			*(uint32_t *)ap->a_data = zfsvfs->z_last_unmount_time;
+			break;
+
+		case HFS_SET_ALWAYS_ZEROFILL:
+			dprintf("%s HFS_SET_ALWAYS_ZEROFILL\n", __func__);
+			/* Required by Spotlight search */
+			break;
+		case HFS_EXT_BULKACCESS_FSCTL:
+			dprintf("%s HFS_EXT_BULKACCESS_FSCTL\n", __func__);
+			/* Required by Spotlight search */
+			break;
+
+		/* ioctl required to simulate HFS mimic behavior */
+		case 0x80005802:
+			dprintf("%s 0x80005802 unknown\n", __func__);
+			/* unknown - from subsystem read, 'X', 2 */
+			break;
+
+		case HFS_GETPATH:
+		case HFSIOC_GETPATH:
+			dprintf("%s HFS_GETPATH\n", __func__);
+				{
+				struct vfsstatfs *vfsp;
+				struct vnode *file_vp;
+				ino64_t cnid;
+				int  outlen;
+				char *bufptr;
+				int flags = 0;
+
+				/* Caller must be owner of file system. */
+				vfsp = vfs_statfs(zfsvfs->z_vfs);
+				if (proc_suser(current_proc()) &&
+				    kauth_cred_getuid((kauth_cred_t)cr) !=
+				    vfsp->f_owner) {
+					error = EACCES;
+					goto out;
+				}
+				/* Target vnode must be file system's root. */
+				if (!vnode_isvroot(ap->a_vp)) {
+					error = EINVAL;
+					goto out;
+				}
+
+				/* We are passed a string containing inode # */
+				bufptr = (char *)ap->a_data;
+				cnid = strtoul(bufptr, NULL, 10);
+				if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
+					flags |= BUILDPATH_VOLUME_RELATIVE;
+				}
+
+				if ((error = zfs_vfs_vget(zfsvfs->z_vfs, cnid,
+				    &file_vp, (vfs_context_t)ct))) {
+					goto out;
+				}
+				error = build_path(file_vp, bufptr, MAXPATHLEN,
+				    &outlen, flags, (vfs_context_t)ct);
+				vnode_put(file_vp);
+
+				dprintf("ZFS: HFS_GETPATH done %d : '%s'\n",
+				    error, error ? "" : bufptr);
+			}
+			break;
+
+		case HFS_TRANSFER_DOCUMENT_ID:
+		case HFSIOC_TRANSFER_DOCUMENT_ID:
+			dprintf("%s HFS_TRANSFER_DOCUMENT_ID\n", __func__);
+		    {
+				u_int32_t to_fd = *(u_int32_t *)ap->a_data;
+				file_t *to_fp;
+				struct vnode *to_vp;
+				znode_t *to_zp;
+
+				to_fp = getf(to_fd);
+				if (to_fp == NULL) {
+					error = EBADF;
+					goto out;
+				}
+
+				to_vp = getf_vnode(to_fp);
+
+				if ((error = vnode_getwithref(to_vp))) {
+					releasef(to_fd);
+					goto out;
+				}
+
+				/* Confirm it is inside our mount */
+				if (((zfsvfs_t *)vfs_fsprivate(
+				    vnode_mount(to_vp))) != zfsvfs) {
+					error = EXDEV;
+					goto transfer_out;
+				}
+
+				to_zp = VTOZ(to_vp);
+
+				/* Source should have UF_TRACKED */
+				if (!(zp->z_pflags & ZFS_TRACKED)) {
+					dprintf("ZFS: source is not TRACKED\n");
+					error = EINVAL;
+					/* dest should NOT have UF_TRACKED */
+				} else if (to_zp->z_pflags & ZFS_TRACKED) {
+					dprintf("ZFS: dest already TRACKED\n");
+					error = EEXIST;
+					/* should be valid types */
+				} else if (
+				    (IFTOVT((mode_t)zp->z_mode) == VDIR) ||
+				    (IFTOVT((mode_t)zp->z_mode) == VREG) ||
+				    (IFTOVT((mode_t)zp->z_mode) == VLNK)) {
+					/*
+					 * Make sure source has a document id
+					 *  - although it can't
+					 */
+					if (!zp->z_document_id)
+						zfs_setattr_generate_id(zp, 0,
+							NULL);
+
+					/* transfer over */
+					to_zp->z_document_id =
+					    zp->z_document_id;
+					zp->z_document_id = 0;
+					to_zp->z_pflags |= ZFS_TRACKED;
+					zp->z_pflags &= ~ZFS_TRACKED;
+
+					/* Commit to disk */
+					zfs_setattr_set_documentid(to_zp,
+					    B_TRUE);
+					zfs_setattr_set_documentid(zp,
+					    B_TRUE); /* also update flags */
+					dprintf("ZFS: Moved docid %u from "
+					    "id %llu to id %llu\n",
+					    to_zp->z_document_id, zp->z_id,
+					    to_zp->z_id);
+				}
+transfer_out:
+				vnode_put(to_vp);
+				releasef(to_fd);
+			}
+			break;
+
+
+		case F_MAKECOMPRESSED:
+			dprintf("%s F_MAKECOMPRESSED\n", __func__);
+			/*
+			 * Not entirely sure what this does, but HFS comments
+			 * include: "Make the file compressed; truncate &
+			 * toggle BSD bits"
+			 * makes compressed copy of allocated blocks
+			 * shortens file to new length
+			 * sets BSD bits to indicate per-file compression
+			 *
+			 * On HFS, locks cnode and compresses its data. ZFS
+			 * inband compression makes this obsolete.
+			 */
+			if (vfs_isrdonly(zfsvfs->z_vfs) ||
+			    !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+				error = EROFS;
+				goto out;
+			}
+
+			/* Are there any other usecounts/FDs? */
+			if (vnode_isinuse(ap->a_vp, 1)) {
+				error = EBUSY;
+				goto out;
+			}
+
+			if (zp->z_pflags & ZFS_IMMUTABLE) {
+				error = EINVAL;
+				goto out;
+			}
+
+			/* Return failure */
+			error = EINVAL;
+			break;
+
+		case HFS_PREV_LINK:
+		case HFS_NEXT_LINK:
+		case HFSIOC_PREV_LINK:
+		case HFSIOC_NEXT_LINK:
+			dprintf("%s HFS_PREV/NEXT_LINK\n", __func__);
+		{
+			/*
+			 * Find sibling linkids with hardlinks. a_data points
+			 * to the "current" linkid, and look up either prev
+			 * or next (a_command) linkid. Return in a_data.
+			 */
+			uint32_t linkfileid;
+			struct vfsstatfs *vfsp;
+			/* Caller must be owner of file system. */
+			vfsp = vfs_statfs(zfsvfs->z_vfs);
+			if ((kauth_cred_getuid(cr) == 0) &&
+				kauth_cred_getuid(cr) != vfsp->f_owner) {
+				error = EACCES;
+				goto out;
+			}
+			/* Target vnode must be file system's root. */
+			if (!vnode_isvroot(ap->a_vp)) {
+				error = EINVAL;
+				goto out;
+			}
+			linkfileid = *(uint32_t *)ap->a_data;
+			if (linkfileid < 16) { /* kHFSFirstUserCatalogNodeID */
+				error = EINVAL;
+				goto out;
+			}
+
+			/*
+			 * Attempt to find the linkid in the hardlink_link
+			 * AVL tree. If found, call to get prev or next.
+			 */
+			hardlinks_t *searchnode, *findnode, *sibling;
+			avl_index_t loc;
+
+			searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+			searchnode->hl_linkid = linkfileid;
+
+			rw_enter(&zfsvfs->z_hardlinks_lock, RW_READER);
+			findnode = avl_find(&zfsvfs->z_hardlinks_linkid,
+			    searchnode, &loc);
+			kmem_free(searchnode, sizeof (hardlinks_t));
+
+			if (!findnode) {
+				rw_exit(&zfsvfs->z_hardlinks_lock);
+				*(uint32_t *)ap->a_data = 0;
+				dprintf("ZFS: HFS_NEXT_LINK/HFS_PREV_LINK %u "
+				    "not found\n", linkfileid);
+				goto out;
+			}
+
+			if (ap->a_command != HFS_NEXT_LINK) {
+
+				while ((sibling =
+				    AVL_NEXT(&zfsvfs->z_hardlinks_linkid,
+				    findnode)) != NULL) {
+					if (findnode->hl_fileid ==
+					    sibling->hl_fileid)
+						break;
+				}
+
+			} else {
+
+				while ((sibling =
+				    AVL_PREV(&zfsvfs->z_hardlinks_linkid,
+				    findnode)) != NULL) {
+					if (findnode->hl_fileid ==
+					    sibling->hl_fileid)
+						break;
+				}
+
+			}
+			rw_exit(&zfsvfs->z_hardlinks_lock);
+
+			dprintf("ZFS: HFS_%s_LINK %u sibling %u\n",
+			    (ap->a_command != HFS_NEXT_LINK) ? "NEXT" : "PREV",
+			    linkfileid,
+			    sibling ? sibling->hl_linkid : 0);
+
+			// Did we get a new node?
+			if (sibling == NULL) {
+				*(uint32_t *)ap->a_data = 0;
+				goto out;
+			}
+
+			*(uint32_t *)ap->a_data = sibling->hl_linkid;
+			error = 0;
+		}
+			break;
+
+		case HFS_RESIZE_PROGRESS:
+		case HFSIOC_RESIZE_PROGRESS:
+			dprintf("%s HFS_RESIZE_PROGRESS\n", __func__);
+			/* fail as if requested of non-root fs */
+			error = EINVAL;
+			break;
+
+		case HFS_RESIZE_VOLUME:
+		case HFSIOC_RESIZE_VOLUME:
+			dprintf("%s HFS_RESIZE_VOLUME\n", __func__);
+			/* fail as if requested of non-root fs */
+			error = EINVAL;
+			break;
+
+		case HFS_CHANGE_NEXT_ALLOCATION:
+		case HFSIOC_CHANGE_NEXT_ALLOCATION:
+			dprintf("%s HFS_CHANGE_NEXT_ALLOCATION\n", __func__);
+			/* fail as if requested of non-root fs */
+			error = EINVAL;
+			break;
+
+		case HFS_CHANGE_NEXTCNID:
+		case HFSIOC_CHANGE_NEXTCNID:
+			dprintf("%s HFS_CHANGE_NEXTCNID\n", __func__);
+			/* FIXME : fail as though read only */
+			error = EROFS;
+			break;
+
+		case F_FREEZE_FS:
+			dprintf("%s F_FREEZE_FS\n", __func__);
+			/* Dont support freeze */
+			error = ENOTSUP;
+			break;
+
+		case F_THAW_FS:
+			dprintf("%s F_THAW_FS\n", __func__);
+			/* dont support fail as though insufficient privilege */
+			error = EACCES;
+			break;
+
+		case HFS_BULKACCESS_FSCTL:
+		case HFSIOC_BULKACCESS:
+			dprintf("%s HFS_BULKACCESS_FSCTL\n", __func__);
+			/* Respond as if HFS_STANDARD flag is set */
+			error = EINVAL;
+			break;
+
+		case HFS_FSCTL_GET_VERY_LOW_DISK:
+		case HFSIOC_GET_VERY_LOW_DISK:
+			dprintf("%s HFS_FSCTL_GET_VERY_LOW_DISK\n", __func__);
+			*(uint32_t *)ap->a_data =
+			    zfsvfs->z_freespace_notify_dangerlimit;
+			break;
+
+		case HFS_FSCTL_SET_VERY_LOW_DISK:
+		case HFSIOC_SET_VERY_LOW_DISK:
+			dprintf("%s HFS_FSCTL_SET_VERY_LOW_DISK\n", __func__);
+			if (*(uint32_t *)ap->a_data >=
+			    zfsvfs->z_freespace_notify_warninglimit) {
+				error = EINVAL;
+			} else {
+				zfsvfs->z_freespace_notify_dangerlimit =
+				    *(uint32_t *)ap->a_data;
+			}
+			break;
+
+		case HFS_FSCTL_GET_LOW_DISK:
+		case HFSIOC_GET_LOW_DISK:
+			dprintf("%s HFS_FSCTL_GET_LOW_DISK\n", __func__);
+			*(uint32_t *)ap->a_data =
+			    zfsvfs->z_freespace_notify_warninglimit;
+			break;
+
+		case HFS_FSCTL_SET_LOW_DISK:
+		case HFSIOC_SET_LOW_DISK:
+			dprintf("%s HFS_FSCTL_SET_LOW_DISK\n", __func__);
+			if (*(uint32_t *)ap->a_data >=
+			    zfsvfs->z_freespace_notify_desiredlevel ||
+			    *(uint32_t *)ap->a_data <=
+			    zfsvfs->z_freespace_notify_dangerlimit) {
+				error = EINVAL;
+			} else {
+				zfsvfs->z_freespace_notify_warninglimit =
+				    *(uint32_t *)ap->a_data;
+			}
+			break;
+
+		case HFS_FSCTL_GET_DESIRED_DISK:
+		case HFSIOC_GET_DESIRED_DISK:
+			dprintf("%s HFS_FSCTL_GET_DESIRED_DISK\n", __func__);
+			*(uint32_t *)ap->a_data =
+			    zfsvfs->z_freespace_notify_desiredlevel;
+			break;
+
+		case HFS_FSCTL_SET_DESIRED_DISK:
+		case HFSIOC_SET_DESIRED_DISK:
+			dprintf("%s HFS_FSCTL_SET_DESIRED_DISK\n", __func__);
+			if (*(uint32_t *)ap->a_data <=
+			    zfsvfs->z_freespace_notify_warninglimit) {
+				error = EINVAL;
+			} else {
+				zfsvfs->z_freespace_notify_desiredlevel =
+				    *(uint32_t *)ap->a_data;
+			}
+			break;
+
+		case HFS_VOLUME_STATUS:
+		case HFSIOC_VOLUME_STATUS:
+			dprintf("%s HFS_VOLUME_STATUS\n", __func__);
+			/* For now we always reply "all ok" */
+			*(uint32_t *)ap->a_data =
+			    zfsvfs->z_notification_conditions;
+			break;
+
+		case HFS_SET_BOOT_INFO:
+			dprintf("%s HFS_SET_BOOT_INFO\n", __func__);
+			/*
+			 * ZFS booting is not supported, mimic selection
+			 * of a non-root HFS volume
+			 */
+			*(uint32_t *)ap->a_data = 0;
+			error = EINVAL;
+			break;
+		case HFS_GET_BOOT_INFO:
+			{
+				u_int32_t vcbFndrInfo[8];
+				printf("%s HFS_GET_BOOT_INFO\n", __func__);
+				/*
+				 * ZFS booting is not supported, mimic selection
+				 * of a non-root HFS volume
+				 */
+				memset(vcbFndrInfo, 0, sizeof (vcbFndrInfo));
+				struct vfsstatfs *vfsstatfs;
+				vfsstatfs = vfs_statfs(zfsvfs->z_vfs);
+				vcbFndrInfo[6] = vfsstatfs->f_fsid.val[0];
+				vcbFndrInfo[7] = vfsstatfs->f_fsid.val[1];
+				bcopy(vcbFndrInfo, ap->a_data,
+				    sizeof (vcbFndrInfo));
+			}
+			break;
+		case HFS_MARK_BOOT_CORRUPT:
+			dprintf("%s HFS_MARK_BOOT_CORRUPT\n", __func__);
+			/*
+			 * ZFS booting is not supported, mimic selection
+			 * of a non-root HFS volume
+			 */
+			*(uint32_t *)ap->a_data = 0;
+			error = EINVAL;
+			break;
+
+		case HFS_FSCTL_GET_JOURNAL_INFO:
+		case HFSIOC_GET_JOURNAL_INFO:
+			dprintf("%s HFS_FSCTL_GET_JOURNAL_INFO\n", __func__);
+			/*
+			 * XXX We're setting the mount as 'Journaled'
+			 * so this might conflict
+			 * Respond as though journal is empty/disabled
+			 */
+		{
+		    struct hfs_journal_info *jip;
+		    jip = (struct hfs_journal_info *)ap->a_data;
+		    jip->jstart = 0;
+		    jip->jsize = 0;
+		}
+		break;
+
+		case HFS_DISABLE_METAZONE:
+			dprintf("%s HFS_DISABLE_METAZONE\n", __func__);
+			/* fail as though insufficient privs */
+			error = EACCES;
+			break;
+
+#ifdef HFS_GET_FSINFO
+		case HFS_GET_FSINFO:
+		case HFSIOC_GET_FSINFO:
+			dprintf("%s HFS_GET_FSINFO\n", __func__);
+			break;
+#endif
+
+#ifdef HFS_REPIN_HOTFILE_STATE
+		case HFS_REPIN_HOTFILE_STATE:
+		case HFSIOC_REPIN_HOTFILE_STATE:
+			dprintf("%s HFS_REPIN_HOTFILE_STATE\n", __func__);
+			break;
+#endif
+
+#ifdef HFS_SET_HOTFILE_STATE
+		case HFS_SET_HOTFILE_STATE:
+		case HFSIOC_SET_HOTFILE_STATE:
+			dprintf("%s HFS_SET_HOTFILE_STATE\n", __func__);
+			break;
+#endif
+
+#ifdef APFSIOC_GET_NEAR_LOW_DISK
+		case APFSIOC_GET_NEAR_LOW_DISK:
+			dprintf("%s APFSIOC_GET_NEAR_LOW_DISK\n", __func__);
+			*(uint32_t *)ap->a_data =
+			    zfsvfs->z_freespace_notify_warninglimit;
+			break;
+#endif
+
+#ifdef APFSIOC_SET_NEAR_LOW_DISK
+		case APFSIOC_SET_NEAR_LOW_DISK:
+			dprintf("%s APFSIOC_SET_NEAR_LOW_DISK\n", __func__);
+			if (*(uint32_t *)ap->a_data >=
+			    zfsvfs->z_freespace_notify_desiredlevel ||
+			    *(uint32_t *)ap->a_data <=
+			    zfsvfs->z_freespace_notify_dangerlimit) {
+				error = EINVAL;
+			} else {
+				zfsvfs->z_freespace_notify_warninglimit =
+				    *(uint32_t *)ap->a_data;
+			}
+			break;
+#endif
+
+			/* End HFS mimic ioctl */
+
+		default:
+			dprintf("%s: Unknown ioctl %02lx ('%lu' + %lu)\n",
+			    __func__, ap->a_command, (ap->a_command&0xff00)>>8,
+			    ap->a_command&0xff);
+			error = ENOTTY;
+	}
+
+out:
+	if (error) {
+		dprintf("%s: failing ioctl: %02lx ('%lu' + %lu) returned %d\n",
+		    __func__, ap->a_command, (ap->a_command&0xff00)>>8,
+		    ap->a_command&0xff, error);
+	}
+
+	return (error);
+}
+
+
+int
+zfs_vnop_read(struct vnop_read_args *ap)
+#if 0
+	struct vnop_read_args {
+		struct vnode	*a_vp;
+		struct uio	*a_uio;
+		int		a_ioflag;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int ioflag = zfs_ioflags(ap->a_ioflag);
+	int error;
+	/* uint64_t resid; */
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+
+	/* resid = uio_resid(ap->a_uio); */
+	error = zfs_read(ap->a_vp, ap->a_uio, ioflag, cr);
+
+	if (error) dprintf("vnop_read %d\n", error);
+	return (error);
+}
+
+int
+zfs_vnop_write(struct vnop_write_args *ap)
+#if 0
+	struct vnop_write_args {
+		struct vnode	*a_vp;
+		struct uio	*a_uio;
+		int		a_ioflag;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int ioflag = zfs_ioflags(ap->a_ioflag);
+	int error;
+	DECLARE_CRED(ap);
+
+	// dprintf("zfs_vnop_write(vp %p, offset 0x%llx size 0x%llx\n",
+	//    ap->a_vp, uio_offset(ap->a_uio), uio_resid(ap->a_uio));
+
+	error = zfs_write(ap->a_vp, ap->a_uio, ioflag, cr);
+
+	/*
+	 * Mac OS X: pageout requires that the UBC file size be current.
+	 * Possibly, we could update it only if size has changed.
+	 */
+
+	/* if (tx_bytes != 0) { */
+	if (!error) {
+		ubc_setsize(ap->a_vp, VTOZ(ap->a_vp)->z_size);
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+int
+zfs_vnop_access(struct vnop_access_args *ap)
+#if 0
+	struct vnop_access_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	a_vp;
+		int		a_action;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error = ENOTSUP;
+	int action = ap->a_action;
+	int mode = 0;
+	DECLARE_CRED(ap);
+
+	/*
+	 * KAUTH_VNODE_READ_EXTATTRIBUTES, as well?
+	 * KAUTH_VNODE_WRITE_EXTATTRIBUTES
+	 */
+	if (action & KAUTH_VNODE_READ_DATA)
+		mode |= VREAD;
+	if (action & KAUTH_VNODE_WRITE_DATA)
+		mode |= VWRITE;
+	if (action & KAUTH_VNODE_EXECUTE)
+		mode |= VEXEC;
+
+	dprintf("vnop_access: action %04x -> mode %04x\n", action, mode);
+	error = zfs_access(ap->a_vp, mode, 0, cr);
+
+	if (error) dprintf("%s: error %d\n", __func__, error);
+	return (error);
+}
+
+
+/*
+ * hard link references?
+ * Read the comment in zfs_getattr_znode_unlocked for the reason
+ * for this hackery. Since getattr(VA_NAME) is extremely common
+ * call in OSX, we opt to always save the name. We need to be careful
+ * as zfs_dirlook can return ctldir node as well (".zfs").
+ * Hardlinks also need to be able to return the correct parentid.
+ */
+static void zfs_cache_name(struct vnode *vp, struct vnode *dvp, char *filename)
+{
+	znode_t *zp;
+	if (!vp ||
+	    !filename ||
+	    !filename[0] ||
+	    zfsctl_is_node(vp) ||
+	    !VTOZ(vp))
+		return;
+
+	// Only cache files, or we might end up caching "."
+	if (!vnode_isreg(vp))
+		return;
+
+	zp = VTOZ(vp);
+
+	mutex_enter(&zp->z_lock);
+
+	strlcpy(zp->z_name_cache, filename,
+	    MAXPATHLEN);
+
+	// If hardlink, remember the parentid.
+	if (((zp->z_links > 1) || (zp->z_finder_hardlink)) &&
+	    (IFTOVT((mode_t)zp->z_mode) == VREG) && dvp) {
+		zp->z_finder_parentid = VTOZ(dvp)->z_id;
+	}
+
+	mutex_exit(&zp->z_lock);
+}
+
+
+int
+zfs_vnop_lookup(struct vnop_lookup_args *ap)
+#if 0
+	struct vnop_lookup_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct componentname *cnp = ap->a_cnp;
+	DECLARE_CRED(ap);
+	int error;
+	int negative_cache = 0;
+	znode_t *zp = NULL;
+	int direntflags = 0;
+	char filename[MAXNAMELEN];
+
+	*ap->a_vpp = NULL;	/* In case we return an error */
+
+	/*
+	 * Darwin uses namelen as an optimisation, for example it can be
+	 * set to 5 for the string "alpha/beta" to look up "alpha". In this
+	 * case we need to copy it out to null-terminate.
+	 */
+	bcopy(cnp->cn_nameptr, filename, cnp->cn_namelen);
+	filename[cnp->cn_namelen] = '\0';
+
+#if 1
+	/*
+	 * cache_lookup() returns 0 for no-entry
+	 * -1 for cache found (a_vpp set)
+	 * ENOENT for negative cache
+	 */
+	error = cache_lookup(ap->a_dvp, ap->a_vpp, cnp);
+	if (error) {
+		/* We found a cache entry, positive or negative. */
+		if (error == -1) {	/* Positive entry? */
+			if (!zfs_vnop_ignore_positives) {
+				error = 0;
+				goto exit;	/* Positive cache, return it */
+			}
+			/* Release iocount held by cache_lookup */
+			vnode_put(*ap->a_vpp);
+		}
+		/* Negatives are only followed if not CREATE, from HFS+. */
+		if (cnp->cn_nameiop != CREATE) {
+			if (!zfs_vnop_ignore_negatives) {
+				goto exit; /* Negative cache hit */
+			}
+			negative_cache = 1;
+		}
+	}
+#endif
+
+	dprintf("+vnop_lookup '%s' %s\n", filename,
+			negative_cache ? "negative_cache":"");
+
+	/*
+	 * 'cnp' passed to us is 'readonly' as XNU does not expect a return
+	 * name, but most likely expects it correct in getattr.
+	 */
+	struct componentname cn2;
+	cn2.cn_nameptr = filename;
+	cn2.cn_namelen = MAXNAMELEN;
+	cn2.cn_nameiop = cnp->cn_nameiop;
+	cn2.cn_flags = cnp->cn_flags;
+
+	error = zfs_lookup(VTOZ(ap->a_dvp), filename, &zp, /* flags */ 0, cr,
+	    &direntflags, &cn2);
+	/* flags can be LOOKUP_XATTR | FIGNORECASE */
+
+#if 1
+	/*
+	 * It appears that VFS layer adds negative cache entries for us, so
+	 * we do not need to add them here, or they are duplicated.
+	 */
+	if ((error == ENOENT) && zfs_vnop_create_negatives) {
+		if ((ap->a_cnp->cn_nameiop == CREATE ||
+		    ap->a_cnp->cn_nameiop == RENAME) &&
+		    (cnp->cn_flags & ISLASTCN)) {
+			error = EJUSTRETURN;
+			goto exit;
+		}
+		/* Insert name into cache (as non-existent) if appropriate. */
+		if ((cnp->cn_flags & MAKEENTRY) &&
+		    ap->a_cnp->cn_nameiop != CREATE) {
+			cache_enter(ap->a_dvp, NULL, ap->a_cnp);
+			dprintf("Negative-cache made for '%s'\n",
+			    filename);
+		}
+	} /* ENOENT */
+#endif
+
+exit:
+
+	if (error == 0 && (zp != NULL)) {
+		printf("back with zp %p: name '%s'\n", zp, filename);
+
+		*ap->a_vpp = ZTOV(zp);
+
+		zfs_cache_name(*ap->a_vpp, ap->a_dvp, filename);
+
+	}
+
+	dprintf("-vnop_lookup %d : dvp %llu '%s'\n", error,
+	    VTOZ(ap->a_dvp)->z_id, filename);
+
+	return (error);
+}
+
+int
+zfs_vnop_create(struct vnop_create_args *ap)
+#if 0
+	struct vnop_create_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct componentname *cnp = ap->a_cnp;
+	vattr_t *vap = ap->a_vap;
+	DECLARE_CRED(ap);
+	vcexcl_t excl;
+	int mode = 0;	/* FIXME */
+	int error;
+	znode_t *zp = NULL;
+
+	dprintf("vnop_create: '%s'\n", cnp->cn_nameptr);
+
+	/*
+	 * extern int zfs_create(struct vnode *dvp, char *name, vattr_t *vap,
+	 *     int excl, int mode, struct vnode **vpp, cred_t *cr);
+	 */
+	excl = (vap->va_vaflags & VA_EXCLUSIVE) ? EXCL : NONEXCL;
+
+	error = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, excl, mode,
+	    &zp, cr, 0, NULL);
+	if (!error) {
+		cache_purge_negatives(ap->a_dvp);
+		*ap->a_vpp = ZTOV(zp);
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+
+static int zfs_remove_hardlink(struct vnode *vp, struct vnode *dvp, char *name)
+{
+	/*
+	 * Because we store hash of hardlinks in an AVLtree, we need to remove
+	 * any entries in it upon deletion. Since it is complicated to know
+	 * if an entry was a hardlink, we simply check if the avltree has the
+	 * name.
+	 */
+	hardlinks_t *searchnode, *findnode;
+	avl_index_t loc;
+
+	if (!vp || !VTOZ(vp))
+		return (1);
+	if (!dvp || !VTOZ(dvp))
+		return (1);
+	znode_t *zp = VTOZ(vp);
+	znode_t *dzp = VTOZ(dvp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int ishardlink = 0;
+
+	ishardlink = ((zp->z_links > 1) &&
+	    (IFTOVT((mode_t)zp->z_mode) == VREG)) ? 1 : 0;
+	if (zp->z_finder_hardlink)
+		ishardlink = 1;
+
+	if (!ishardlink)
+		return (0);
+
+	dprintf("ZFS: removing hash (%llu,%llu,'%s')\n",
+	    dzp->z_id, zp->z_id, name);
+
+	// Attempt to remove from hardlink avl, if its there
+	searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+	searchnode->hl_parent = dzp->z_id == zfsvfs->z_root ? 2 : dzp->z_id;
+	searchnode->hl_fileid = zp->z_id;
+	strlcpy(searchnode->hl_name, name, PATH_MAX);
+
+	rw_enter(&zfsvfs->z_hardlinks_lock, RW_READER);
+	findnode = avl_find(&zfsvfs->z_hardlinks, searchnode, &loc);
+	rw_exit(&zfsvfs->z_hardlinks_lock);
+	kmem_free(searchnode, sizeof (hardlinks_t));
+
+	// Found it? remove it
+	if (findnode) {
+		rw_enter(&zfsvfs->z_hardlinks_lock, RW_WRITER);
+		avl_remove(&zfsvfs->z_hardlinks, findnode);
+		avl_remove(&zfsvfs->z_hardlinks_linkid, findnode);
+		rw_exit(&zfsvfs->z_hardlinks_lock);
+		kmem_free(findnode, sizeof (*findnode));
+		dprintf("ZFS: removed hash '%s'\n", name);
+		mutex_enter(&zp->z_lock);
+		zp->z_name_cache[0] = 0;
+		zp->z_finder_parentid = 0;
+		mutex_exit(&zp->z_lock);
+		return (1);
+	}
+	return (0);
+}
+
+
+static int zfs_rename_hardlink(struct vnode *vp, struct vnode *tvp,
+    struct vnode *fdvp, struct vnode *tdvp,
+    char *from, char *to)
+{
+	/*
+	 * Because we store hash of hardlinks in an AVLtree, we need to update
+	 * any entries in it upon rename. Since it is complicated to know
+	 * if an entry was a hardlink, we simply check if the avltree has the
+	 * name.
+	 */
+	hardlinks_t *searchnode, *findnode, *delnode;
+	avl_index_t loc;
+	uint64_t parent_fid, parent_tid;
+	int ishardlink = 0;
+
+	if (!vp || !VTOZ(vp))
+		return (0);
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	ishardlink = ((zp->z_links > 1) &&
+	    (IFTOVT((mode_t)zp->z_mode) == VREG)) ? 1 : 0;
+	if (zp->z_finder_hardlink)
+		ishardlink = 1;
+
+	if (!ishardlink)
+		return (0);
+
+	if (!fdvp || !VTOZ(fdvp))
+		return (0);
+	parent_fid = VTOZ(fdvp)->z_id;
+	parent_fid = parent_fid == zfsvfs->z_root ? 2 : parent_fid;
+
+	if (!tdvp || !VTOZ(tdvp)) {
+		parent_tid = parent_fid;
+	} else {
+		parent_tid = VTOZ(tdvp)->z_id;
+		parent_tid = parent_tid == zfsvfs->z_root ? 2 : parent_tid;
+	}
+
+	dprintf("ZFS: looking to rename hardlinks (%llu,%llu,%s)\n",
+	    parent_fid, zp->z_id, from);
+
+
+	// Attempt to remove from hardlink avl, if its there
+	searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+	searchnode->hl_parent = parent_fid;
+	searchnode->hl_fileid = zp->z_id;
+	strlcpy(searchnode->hl_name, from, PATH_MAX);
+
+	rw_enter(&zfsvfs->z_hardlinks_lock, RW_READER);
+	findnode = avl_find(&zfsvfs->z_hardlinks, searchnode, &loc);
+	rw_exit(&zfsvfs->z_hardlinks_lock);
+
+	// Found it? update it
+	if (findnode) {
+
+		rw_enter(&zfsvfs->z_hardlinks_lock, RW_WRITER);
+
+		// Technically, we do not need to re-do the _linkid AVL here.
+		avl_remove(&zfsvfs->z_hardlinks, findnode);
+		avl_remove(&zfsvfs->z_hardlinks_linkid, findnode);
+
+		// If we already have a hashid for "to" and the rename
+		// presumably unlinked it, we need to remove it first.
+		searchnode->hl_parent = parent_tid;
+		strlcpy(searchnode->hl_name, to, PATH_MAX);
+		delnode = avl_find(&zfsvfs->z_hardlinks, searchnode, &loc);
+		if (delnode) {
+			dprintf("ZFS: apparently %llu:'%s' exists, deleting\n",
+			    parent_tid, to);
+			avl_remove(&zfsvfs->z_hardlinks, delnode);
+			avl_remove(&zfsvfs->z_hardlinks_linkid, delnode);
+			kmem_free(delnode, sizeof (*delnode));
+		}
+
+		dprintf("ZFS: renamed hash %llu (%llu:'%s' to %llu:'%s'): %s\n",
+		    zp->z_id,
+		    parent_fid, from,
+		    parent_tid, to,
+		    delnode ? "deleted":"");
+
+		// Update source node to new hash, and name.
+		findnode->hl_parent = parent_tid;
+		strlcpy(findnode->hl_name, to, PATH_MAX);
+		// zp->z_finder_parentid = parent_tid;
+
+		avl_add(&zfsvfs->z_hardlinks, findnode);
+		avl_add(&zfsvfs->z_hardlinks_linkid, findnode);
+
+		rw_exit(&zfsvfs->z_hardlinks_lock);
+		kmem_free(searchnode, sizeof (hardlinks_t));
+
+		return (1);
+	}
+
+	kmem_free(searchnode, sizeof (hardlinks_t));
+	return (0);
+}
+
+
+int
+zfs_vnop_remove(struct vnop_remove_args *ap)
+#if 0
+	struct vnop_remove_args {
+		struct vnode	*a_dvp;
+		struct vnode	*a_vp;
+		struct componentname *a_cnp;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_remove: %p (%s)\n", ap->a_vp, ap->a_cnp->cn_nameptr);
+
+	/*
+	 * extern int zfs_remove ( struct vnode *dvp, char *name, cred_t *cr,
+	 *     caller_context_t *ct, int flags);
+	 */
+	error = zfs_remove(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, cr,
+	    /* flags */0);
+	if (!error) {
+		cache_purge(ap->a_vp);
+
+		zfs_remove_hardlink(ap->a_vp,
+							ap->a_dvp,
+							ap->a_cnp->cn_nameptr);
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+int
+zfs_vnop_mkdir(struct vnop_mkdir_args *ap)
+#if 0
+	struct vnop_mkdir_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_mkdir '%s'\n", ap->a_cnp->cn_nameptr);
+
+#if 0
+	/* Let's deny OS X fseventd for now */
+	if (ap->a_cnp->cn_nameptr &&
+	    strcmp(ap->a_cnp->cn_nameptr, ".fseventsd") == 0)
+		return (EINVAL);
+#endif
+
+#if 0
+	/* spotlight for now */
+	if (ap->a_cnp->cn_nameptr &&
+	    strcmp(ap->a_cnp->cn_nameptr, ".Spotlight-V100") == 0)
+		return (EINVAL);
+#endif
+	/*
+	 * extern int zfs_mkdir(struct vnode *dvp, char *dirname, vattr_t *vap,
+	 *     struct vnode **vpp, cred_t *cr, caller_context_t *ct, int flags,
+	 *     vsecattr_t *vsecp);
+	 */
+	znode_t *zp = NULL;
+	error = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, ap->a_vap,
+	    &zp, cr, /* flags */0, /* vsecp */NULL);
+	if (!error) {
+		*ap->a_vpp = ZTOV(zp);
+		cache_purge_negatives(ap->a_dvp);
+		vnode_update_identity(*ap->a_vpp, ap->a_dvp,
+		    (const char *)ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen,
+		    0, VNODE_UPDATE_NAME);
+
+		VERIFY3P(zp->z_zfsvfs, ==,
+		    vfs_fsprivate(vnode_mount(*ap->a_vpp)));
+
+
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+int
+zfs_vnop_rmdir(struct vnop_rmdir_args *ap)
+#if 0
+	struct vnop_rmdir_args {
+		struct vnode	*a_dvp;
+		struct vnode	*a_vp;
+		struct componentname *a_cnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_rmdir\n");
+
+	/*
+	 * extern int zfs_rmdir(struct vnode *dvp, char *name,
+	 *     struct vnode *cwd, cred_t *cr, caller_context_t *ct, int flags);
+	 */
+	error = zfs_rmdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr,
+		/* cwd */NULL, cr, /* flags */0);
+	if (!error) {
+		cache_purge(ap->a_vp);
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+int
+zfs_vnop_readdir(struct vnop_readdir_args *ap)
+#if 0
+	struct vnop_readdir_args {
+		struct vnode	a_vp;
+		struct uio	*a_uio;
+		int		a_flags;
+		int		*a_eofflag;
+		int		*a_numdirent;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error;
+	DECLARE_CRED(ap);
+
+	dprintf("+readdir: %p\n", ap->a_vp);
+
+	/*
+	 * XXX This interface needs vfs_has_feature.
+	 * XXX zfs_readdir() also needs to grow support for passing back the
+	 * number of entries (OS X/FreeBSD) and cookies (FreeBSD). However,
+	 * it should be the responsibility of the OS caller to malloc/free
+	 * space for that.
+	 */
+
+	/*
+	 * extern int zfs_readdir(struct vnode *vp, uio_t *uio, cred_t *cr,
+	 *     int *eofp, int flags, int *a_numdirent);
+	 */
+	*ap->a_numdirent = 0;
+
+	error = zfs_readdir(ap->a_vp, ap->a_uio, cr, ap->a_eofflag, ap->a_flags,
+	    ap->a_numdirent);
+
+	/* .zfs dirs can be completely empty */
+	if (*ap->a_numdirent == 0)
+		*ap->a_numdirent = 2; /* . and .. */
+
+	if (error) {
+		dprintf("-readdir %d (nument %d)\n", error, *ap->a_numdirent);
+	}
+	return (error);
+}
+
+int
+zfs_vnop_fsync(struct vnop_fsync_args *ap)
+#if 0
+	struct vnop_fsync_args {
+		struct vnode	*a_vp;
+		int		a_waitfor;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	znode_t *zp = VTOZ(ap->a_vp);
+	zfsvfs_t *zfsvfs;
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int err;
+
+	/*
+	 * Check if this znode has already been synced, freed, and recycled
+	 * by znode_pageout_func.
+	 *
+	 * XXX What is this? Substitute for Illumos vn_has_cached_data()?
+	 */
+	if (zp == NULL)
+		return (0);
+
+	zfsvfs = zp->z_zfsvfs;
+
+	if (!zfsvfs)
+		return (0);
+
+	/*
+	 * If we come here via vnode_create()->vclean() we can not end up in
+	 * zil_commit() or we will deadlock. But we know that vnop_reclaim will
+	 * be called next, so we just return success.
+	 */
+	if (vnode_isrecycled(ap->a_vp))
+		return (0);
+
+	err = zfs_fsync(VTOZ(ap->a_vp), /* flag */0, cr);
+
+	if (err) dprintf("%s err %d\n", __func__, err);
+
+	return (err);
+}
+
+int
+zfs_vnop_getattr(struct vnop_getattr_args *ap)
+#if 0
+	struct vnop_getattr_args {
+		struct vnode	*a_vp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int error;
+	DECLARE_CRED_AND_CONTEXT(ap);
+
+	/* dprintf("+vnop_getattr zp %p vp %p\n", VTOZ(ap->a_vp), ap->a_vp); */
+
+	error = zfs_getattr(ap->a_vp, ap->a_vap, /* flags */0, cr, ct);
+
+	if (error == 0) {
+		error = zfs_getattr_znode_unlocked(ap->a_vp, ap->a_vap);
+	}
+	if (error)
+		dprintf("-vnop_getattr '%p' %d\n", (ap->a_vp), error);
+
+	return (error);
+}
+
+int
+zfs_vnop_setattr(struct vnop_setattr_args *ap)
+#if 0
+	struct vnop_setattr_args {
+		struct vnode	*a_vp;
+		struct vnode_vattr *a_vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	vattr_t *vap = ap->a_vap;
+	uint_t mask = vap->va_mask;
+	int error = 0;
+	int hfscompression = 0;
+	znode_t *zp = VTOZ(ap->a_vp);
+
+	/* Translate OS X requested mask to ZFS */
+	mask = vap->va_mask;
+
+	/*
+	 * Both 'flags' and 'acl' can come to setattr, but without 'mode' set.
+	 * However, ZFS assumes 'mode' is also set. We need to look up 'mode' in
+	 * this case.
+	 */
+	if ((VATTR_IS_ACTIVE(vap, va_flags) || VATTR_IS_ACTIVE(vap, va_acl)) &&
+	    !VATTR_IS_ACTIVE(vap, va_mode)) {
+		uint64_t mode;
+
+		mask |= ATTR_MODE;
+
+		dprintf("fetching MODE for FLAGS or ACL\n");
+		ZFS_ENTER(zp->z_zfsvfs);
+		ZFS_VERIFY_ZP(zp);
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zp->z_zfsvfs), &mode,
+		    sizeof (mode));
+		vap->va_mode = mode;
+		ZFS_EXIT(zp->z_zfsvfs);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_flags)) {
+
+		/*
+		 * If TRACKED is wanted, and not previously set,
+		 * go set DocumentID
+		 */
+		if ((vap->va_flags & UF_TRACKED) &&
+		    !(zp->z_pflags & ZFS_TRACKED)) {
+			zfs_setattr_generate_id(zp, 0, NULL);
+			/* flags updated in vnops */
+			zfs_setattr_set_documentid(zp, B_FALSE);
+		}
+
+		/* If they are trying to turn on compression.. */
+		if (vap->va_flags & UF_COMPRESSED) {
+			zp->z_skip_truncate_undo_decmpfs = B_TRUE;
+			printf("setattr trying to set COMPRESSED!\n");
+		}
+		/* Map OS X file flags to zfs file flags */
+		zfs_setbsdflags(zp, vap->va_flags);
+		dprintf("OS X flags %08x changed to ZFS %04llx\n",
+		    vap->va_flags, zp->z_pflags);
+		vap->va_flags = zp->z_pflags;
+
+	}
+
+	vap->va_mask = mask;
+
+	/*
+	 * If z_skip_truncate_undo_decmpfs is set, and they are trying to
+	 * va_size == 0 (truncate), we undo the decmpfs work here. This is
+	 * because we can not stop (no error, or !feature works) macOS from
+	 * using decmpfs.
+	 */
+#ifndef DECMPFS_XATTR_NAME
+#define	DECMPFS_XATTR_NAME "com.apple.decmpfs"
+#endif
+	if ((VATTR_IS_ACTIVE(vap, va_total_size) ||
+	    VATTR_IS_ACTIVE(vap, va_data_size)) &&
+	    zp->z_skip_truncate_undo_decmpfs) {
+		zp->z_skip_truncate_undo_decmpfs = B_FALSE;
+
+		printf("setattr setsize with compress attempted\n");
+
+		if (zfs_vnop_removexattr_int(zp->z_zfsvfs, zp,
+		    DECMPFS_XATTR_NAME, NULL) == 0) {
+			/* Successfully deleted the XATTR - skip truncate */
+			VATTR_CLEAR_ACTIVE(vap, va_total_size);
+			VATTR_CLEAR_ACTIVE(vap, va_data_size);
+			printf("setattr skipping truncate!\n");
+		}
+	}
+
+	error = zfs_setattr(VTOZ(ap->a_vp), ap->a_vap, /* flag */0, cr);
+
+	dprintf("vnop_setattr: called on vp %p with mask %04x, err=%d\n",
+	    ap->a_vp, mask, error);
+
+	if (!error) {
+		/* If successful, tell OS X which fields ZFS set. */
+		if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+			dprintf("ZFS: setattr new size %llx %llx\n",
+			    vap->va_size, ubc_getsize(ap->a_vp));
+			ubc_setsize(ap->a_vp, vap->va_size);
+			VATTR_SET_SUPPORTED(vap, va_data_size);
+		}
+		if (VATTR_IS_ACTIVE(vap, va_mode))
+			VATTR_SET_SUPPORTED(vap, va_mode);
+		if (VATTR_IS_ACTIVE(vap, va_acl))
+			VATTR_SET_SUPPORTED(vap, va_acl);
+		if (VATTR_IS_ACTIVE(vap, va_uid))
+			VATTR_SET_SUPPORTED(vap, va_uid);
+		if (VATTR_IS_ACTIVE(vap, va_gid))
+			VATTR_SET_SUPPORTED(vap, va_gid);
+		if (VATTR_IS_ACTIVE(vap, va_access_time))
+			VATTR_SET_SUPPORTED(vap, va_access_time);
+		if (VATTR_IS_ACTIVE(vap, va_modify_time))
+			VATTR_SET_SUPPORTED(vap, va_modify_time);
+		if (VATTR_IS_ACTIVE(vap, va_change_time))
+			VATTR_SET_SUPPORTED(vap, va_change_time);
+		if (VATTR_IS_ACTIVE(vap, va_create_time))
+			VATTR_SET_SUPPORTED(vap, va_create_time);
+		if (VATTR_IS_ACTIVE(vap, va_backup_time))
+			VATTR_SET_SUPPORTED(vap, va_backup_time);
+		if (VATTR_IS_ACTIVE(vap, va_flags)) {
+			VATTR_SET_SUPPORTED(vap, va_flags);
+		}
+
+	}
+
+#if 1
+	uint64_t missing = 0;
+	missing = (vap->va_active ^ (vap->va_active & vap->va_supported));
+	if (missing != 0) {
+		printf("vnop_setattr:: asked %08llx replied %08llx "
+		    "missing %08llx\n", vap->va_active,
+		    vap->va_supported, missing);
+	}
+#endif
+
+	if (error)
+		dprintf("ZFS: vnop_setattr return failure %d\n", error);
+	return (error);
+}
+
+int
+zfs_vnop_rename(struct vnop_rename_args *ap)
+#if 0
+	struct vnop_rename_args {
+		struct vnode	*a_fdvp;
+		struct vnode	*a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode	*a_tdvp;
+		struct vnode	*a_tvp;
+		struct componentname *a_tcnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_rename\n");
+
+	/*
+	 * extern int zfs_rename(struct vnode *sdvp, char *snm,
+	 *     struct vnode *tdvp, char *tnm, cred_t *cr, caller_context_t *ct,
+	 *     int flags);
+	 */
+	error = zfs_rename(VTOZ(ap->a_fdvp), ap->a_fcnp->cn_nameptr,
+		VTOZ(ap->a_tdvp), ap->a_tcnp->cn_nameptr, cr, /* flags */0);
+
+	if (!error) {
+		cache_purge_negatives(ap->a_fdvp);
+		cache_purge_negatives(ap->a_tdvp);
+		cache_purge(ap->a_fvp);
+
+		zfs_rename_hardlink(ap->a_fvp, ap->a_tvp,
+							ap->a_fdvp, ap->a_tdvp,
+							ap->a_fcnp->cn_nameptr,
+							ap->a_tcnp->cn_nameptr);
+		if (ap->a_tvp) {
+			cache_purge(ap->a_tvp);
+		}
+
+#ifdef __APPLE__
+		/*
+		 * After a rename, the VGET path /.vol/$fsid/$ino fails for
+		 * a short period on hardlinks (until someone calls lookup).
+		 * So until we can figure out exactly why this is, we drive
+		 * a lookup here to ensure that vget will work
+		 * (Finder/Spotlight).
+		 */
+		if (ap->a_fvp && VTOZ(ap->a_fvp) &&
+		    VTOZ(ap->a_fvp)->z_finder_hardlink) {
+			struct vnode *vp;
+			if (VOP_LOOKUP(ap->a_tdvp, &vp, ap->a_tcnp,
+			    spl_vfs_context_kernel()) == 0)
+				vnode_put(vp);
+		}
+#endif
+
+	}
+
+	if (error) dprintf("%s: error %d\n", __func__, error);
+	return (error);
+}
+
+#if defined(MAC_OS_X_VERSION_10_12) && \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12)
+int
+zfs_vnop_renamex(struct vnop_renamex_args *ap)
+#if 0
+	struct vnop_renamex_args {
+		struct vnode	*a_fdvp;
+		struct vnode	*a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode	*a_tdvp;
+		struct vnode	*a_tvp;
+		struct componentname *a_tcnp;
+		struct vnode_attr *a_vap; // Reserved for future use
+		vfs_rename_flags_t a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_renamex\n");
+
+	/*
+	 * extern int zfs_rename(struct vnode *sdvp, char *snm,
+	 *     struct vnode *tdvp, char *tnm, cred_t *cr, caller_context_t *ct,
+	 *     int flags);
+	 *
+	 * Currently, hfs only supports one flag, VFS_RENAME_EXCL, so
+	 * we will do the same. Since zfs_rename() only has logic for
+	 * FIGNORECASE, passing VFS_RENAME_EXCL should be ok, if a bit
+	 * hacky.
+	 */
+	error = zfs_rename(VTOZ(ap->a_fdvp), ap->a_fcnp->cn_nameptr,
+		VTOZ(ap->a_tdvp), ap->a_tcnp->cn_nameptr, cr,
+		(ap->a_flags&VFS_RENAME_EXCL));
+
+	if (!error) {
+		cache_purge_negatives(ap->a_fdvp);
+		cache_purge_negatives(ap->a_tdvp);
+		cache_purge(ap->a_fvp);
+
+		zfs_rename_hardlink(ap->a_fvp, ap->a_tvp,
+							ap->a_fdvp, ap->a_tdvp,
+							ap->a_fcnp->cn_nameptr,
+							ap->a_tcnp->cn_nameptr);
+		if (ap->a_tvp) {
+			cache_purge(ap->a_tvp);
+		}
+
+#ifdef __APPLE__
+		/*
+		 * After a rename, the VGET path /.vol/$fsid/$ino fails for
+		 * a short period on hardlinks (until someone calls lookup).
+		 * So until we can figure out exactly why this is, we drive
+		 * a lookup here to ensure that vget will work
+		 * (Finder/Spotlight).
+		 */
+		if (ap->a_fvp && VTOZ(ap->a_fvp) &&
+		    VTOZ(ap->a_fvp)->z_finder_hardlink) {
+			struct vnode *vp;
+			if (VOP_LOOKUP(ap->a_tdvp, &vp, ap->a_tcnp,
+			    spl_vfs_context_kernel()) == 0)
+				vnode_put(vp);
+		}
+#endif
+
+	}
+
+	if (error) dprintf("%s: error %d\n", __func__, error);
+	return (error);
+}
+#endif // vnop_renamex_args
+
+int
+zfs_vnop_symlink(struct vnop_symlink_args *ap)
+#if 0
+	struct vnop_symlink_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_vattr *a_vap;
+		char		*a_target;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_symlink\n");
+
+	/*
+	 * extern int zfs_symlink(struct vnode *dvp, struct vnode **vpp,
+	 *     char *name, vattr_t *vap, char *link, cred_t *cr);
+	 */
+
+	/* OS X doesn't need to set vap->va_mode? */
+	znode_t *zp = NULL;
+	error = zfs_symlink(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr,
+	    ap->a_vap, ap->a_target, &zp, cr, 0);
+	if (!error) {
+		*ap->a_vpp = ZTOV(zp);
+		cache_purge_negatives(ap->a_dvp);
+	} else {
+		dprintf("%s: error %d\n", __func__, error);
+	}
+	/* XXX zfs_attach_vnode()? */
+	return (error);
+}
+
+
+int
+zfs_vnop_readlink(struct vnop_readlink_args *ap)
+#if 0
+	struct vnop_readlink_args {
+		struct vnode	*vp;
+		struct uio	*uio;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+
+	dprintf("vnop_readlink\n");
+
+	/*
+	 * extern int zfs_readlink(struct vnode *vp, uio_t *uio, cred_t *cr,
+	 *     caller_context_t *ct);
+	 */
+	return (zfs_readlink(ap->a_vp, ap->a_uio, cr));
+}
+
+int
+zfs_vnop_link(struct vnop_link_args *ap)
+#if 0
+	struct vnop_link_args {
+		struct vnode	*a_vp;
+		struct vnode	*a_tdvp;
+		struct componentname *a_cnp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+//	DECLARE_CRED_AND_CONTEXT(ap);
+	DECLARE_CRED(ap);
+	int error;
+
+	dprintf("vnop_link\n");
+
+	/* XXX Translate this inside zfs_link() instead. */
+	if (vnode_mount(ap->a_vp) != vnode_mount(ap->a_tdvp)) {
+		dprintf("%s: vp and tdvp on different mounts\n", __func__);
+		return (EXDEV);
+	}
+
+	/*
+	 * XXX Understand why Apple made this comparison in so many places where
+	 * others do not.
+	 */
+	if (ap->a_cnp->cn_namelen >= ZAP_MAXNAMELEN) {
+		dprintf("%s: name too long %d\n", __func__,
+		    ap->a_cnp->cn_namelen);
+		return (ENAMETOOLONG);
+	}
+
+	/*
+	 * extern int zfs_link(struct vnode *tdvp, struct vnode *svp,
+	 *     char *name, cred_t *cr, caller_context_t *ct, int flags);
+	 */
+
+	error = zfs_link(VTOZ(ap->a_tdvp), VTOZ(ap->a_vp),
+		ap->a_cnp->cn_nameptr, cr, 0);
+	if (!error) {
+		// Set source vnode to multipath too, zfs_get_vnode()
+		// handles the target
+		vnode_setmultipath(ap->a_vp);
+		cache_purge(ap->a_vp);
+		cache_purge_negatives(ap->a_tdvp);
+	} else {
+		dprintf("%s error %d\n", __func__, error);
+	}
+
+	return (error);
+}
+
+int
+zfs_vnop_pagein(struct vnop_pagein_args *ap)
+#if 0
+	struct vnop_pagein_args {
+		struct vnode	*a_vp;
+		upl_t		a_pl;
+		vm_offset_t	a_pl_offset;
+		off_t		a_foffset;
+		size_t		a_size;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	/* XXX Crib this from the Apple zfs_vnops.c. */
+	struct vnode *vp = ap->a_vp;
+	offset_t off = ap->a_f_offset;
+	size_t len = ap->a_size;
+	upl_t upl = ap->a_pl;
+	vm_offset_t upl_offset = ap->a_pl_offset;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	caddr_t vaddr = NULL;
+	/* vm_offset_t vaddr = NULL; */
+	int flags = ap->a_flags;
+	int need_unlock = 0;
+	int error = 0;
+	uint64_t file_sz;
+
+	dprintf("+vnop_pagein: %p/%p off 0x%llx size 0x%lx filesz 0x%llx\n",
+			zp, vp, off, len, zp->z_size);
+
+	if (upl == (upl_t)NULL)
+		panic("zfs_vnop_pagein: no upl!");
+
+	if (len <= 0) {
+		dprintf("zfs_vnop_pagein: invalid size %ld", len);
+		if (!(flags & UPL_NOCOMMIT))
+			(void) ubc_upl_abort(upl, 0);
+		return (EINVAL);
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	file_sz = zp->z_size;
+
+	ASSERT(vn_has_cached_data(vp));
+	/* ASSERT(zp->z_dbuf_held && zp->z_phys); */
+	/* can't fault passed EOF */
+	if ((off < 0) || (off >= file_sz) ||
+		(len & PAGE_MASK) || (upl_offset & PAGE_MASK)) {
+		dprintf("passed EOF or size error\n");
+		ZFS_EXIT(zfsvfs);
+		if (!(flags & UPL_NOCOMMIT))
+			ubc_upl_abort_range(upl, upl_offset, len,
+			    (UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY));
+		return (EFAULT);
+	}
+
+	/*
+	 * If we already own the lock, then we must be page faulting in the
+	 * middle of a write to this file (i.e., we are writing to this file
+	 * using data from a mapped region of the file).
+	 */
+	if (!rw_write_held(&zp->z_map_lock)) {
+		rw_enter(&zp->z_map_lock, RW_WRITER);
+		need_unlock = TRUE;
+	}
+
+
+	if (ubc_upl_map(upl, (vm_offset_t *)&vaddr) != KERN_SUCCESS) {
+		dprintf("zfs_vnop_pagein: failed to ubc_upl_map");
+		if (!(flags & UPL_NOCOMMIT))
+			(void) ubc_upl_abort(upl, 0);
+		if (need_unlock)
+			rw_exit(&zp->z_map_lock);
+		ZFS_EXIT(zfsvfs);
+		return (ENOMEM);
+	}
+
+	dprintf("vaddr %p with upl_off 0x%lx\n", vaddr, upl_offset);
+	vaddr += upl_offset;
+
+	/* Can't read beyond EOF - but we need to zero those extra bytes. */
+	if (off + len > file_sz) {
+		uint64_t newend = file_sz - off;
+
+		dprintf("ZFS: pagein zeroing offset 0x%llx for 0x%llx bytes.\n",
+				newend, len - newend);
+		memset(&vaddr[newend], 0, len - newend);
+		len = newend;
+	}
+	/*
+	 * Fill pages with data from the file.
+	 */
+	while (len > 0) {
+		uint64_t readlen;
+
+		readlen = MIN(PAGESIZE, len);
+
+		dprintf("pagein from off 0x%llx len 0x%llx into "
+		    "address %p (len 0x%lx)\n",
+		    off, readlen, vaddr, len);
+
+		error = dmu_read(zp->z_zfsvfs->z_os, zp->z_id, off, readlen,
+		    (void *)vaddr, DMU_READ_PREFETCH);
+		if (error) {
+			printf("zfs_vnop_pagein: dmu_read err %d\n", error);
+			break;
+		}
+		off += readlen;
+		vaddr += readlen;
+		len -= readlen;
+	}
+	ubc_upl_unmap(upl);
+
+	if (!(flags & UPL_NOCOMMIT)) {
+		if (error)
+			ubc_upl_abort_range(upl, upl_offset, ap->a_size,
+			    (UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY));
+		else
+			ubc_upl_commit_range(upl, upl_offset, ap->a_size,
+			    (UPL_COMMIT_CLEAR_DIRTY |
+			    UPL_COMMIT_FREE_ON_EMPTY));
+	}
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
+	/*
+	 * We can't grab the range lock for the page as reader which would stop
+	 * truncation as this leads to deadlock. So we need to recheck the file
+	 * size.
+	 */
+	if (ap->a_f_offset >= file_sz)
+		error = EFAULT;
+	if (need_unlock)
+		rw_exit(&zp->z_map_lock);
+
+	ZFS_EXIT(zfsvfs);
+	if (error) dprintf("%s error %d\n", __func__, error);
+	return (error);
+}
+
+
+
+
+static int
+zfs_pageout(zfsvfs_t *zfsvfs, znode_t *zp, upl_t upl, vm_offset_t upl_offset,
+    offset_t off, size_t size, int flags)
+{
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	uint64_t filesz;
+	int err = 0;
+	size_t len = size;
+
+	dprintf("+vnop_pageout: %p/%p off 0x%llx len 0x%lx upl_off 0x%lx: "
+	    "blksz 0x%x, z_size 0x%llx upl %p flags 0x%x\n", zp, ZTOV(zp),
+	    off, len, upl_offset, zp->z_blksz,
+	    zp->z_size, upl, flags);
+
+	if (upl == (upl_t)NULL) {
+		dprintf("ZFS: vnop_pageout: failed on NULL upl\n");
+		return (EINVAL);
+	}
+	/*
+	 * We can't leave this function without either calling upl_commit or
+	 * upl_abort. So use the non-error version.
+	 */
+	ZFS_ENTER_IFERROR(zfsvfs) {
+		if (!(flags & UPL_NOCOMMIT))
+			(void) ubc_upl_abort(upl,
+			    UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
+		dprintf("ZFS: vnop_pageout: abort on z_unmounted\n");
+		ZFS_EXIT(zfsvfs);
+		return (EIO);
+	}
+
+
+	ASSERT(vn_has_cached_data(ZTOV(zp)));
+	/* ASSERT(zp->z_dbuf_held); */ /* field no longer present in znode. */
+
+	if (len <= 0) {
+		if (!(flags & UPL_NOCOMMIT))
+			(void) ubc_upl_abort(upl,
+			    UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
+		err = EINVAL;
+		goto exit;
+	}
+	if (vnode_vfsisrdonly(ZTOV(zp))) {
+		if (!(flags & UPL_NOCOMMIT))
+			ubc_upl_abort_range(upl, upl_offset, len,
+			    UPL_ABORT_FREE_ON_EMPTY);
+		err = EROFS;
+		goto exit;
+	}
+
+	filesz = zp->z_size; /* get consistent copy of zp_size */
+
+	if (off < 0 || off >= filesz || (off & PAGE_MASK_64) ||
+	    (len & PAGE_MASK)) {
+		if (!(flags & UPL_NOCOMMIT))
+			ubc_upl_abort_range(upl, upl_offset, len,
+			    UPL_ABORT_FREE_ON_EMPTY);
+		err = EINVAL;
+		goto exit;
+	}
+
+	uint64_t pgsize = roundup(filesz, PAGESIZE);
+
+	/* Any whole pages beyond the end of the file while we abort */
+	if ((size + off) > pgsize) {
+		printf("ZFS: pageout abort outside pages (rounded 0x%llx > "
+		    "UPLlen 0x%llx\n", pgsize, size + off);
+		ubc_upl_abort_range(upl, pgsize,
+		    pgsize - (size + off),
+		    UPL_ABORT_FREE_ON_EMPTY);
+	}
+
+	dprintf("ZFS: starting with size %lx\n", len);
+
+top:
+	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
+	/*
+	 * can't push pages passed end-of-file
+	 */
+	filesz = zp->z_size;
+	if (off >= filesz) {
+		/* ignore all pages */
+		err = 0;
+		goto out;
+	} else if (off + len > filesz) {
+#if 0
+		int npages = btopr(filesz - off);
+		page_t *trunc;
+
+		page_list_break(&pp, &trunc, npages);
+		/* ignore pages past end of file */
+		if (trunc)
+			pvn_write_done(trunc, flags);
+#endif
+		len = filesz - off;
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	if (!tx) {
+		printf("ZFS: zfs_vnops_osx: NULL TX encountered!\n");
+		if (!(flags & UPL_NOCOMMIT))
+			ubc_upl_abort_range(upl, upl_offset, len,
+			    UPL_ABORT_FREE_ON_EMPTY);
+		err = EINVAL;
+		goto exit;
+	}
+	dmu_tx_hold_write(tx, zp->z_id, off, len);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err != 0) {
+		if (err == ERESTART) {
+			zfs_rangelock_exit(lr);
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	caddr_t va;
+
+	if (ubc_upl_map(upl, (vm_offset_t *)&va) != KERN_SUCCESS) {
+		err = EINVAL;
+		goto out;
+	}
+
+	va += upl_offset;
+	while (len >= PAGESIZE) {
+		ssize_t sz = PAGESIZE;
+
+		dprintf("pageout: dmu_write off 0x%llx size 0x%lx\n", off, sz);
+
+		dmu_write(zfsvfs->z_os, zp->z_id, off, sz, va, tx);
+		va += sz;
+		off += sz;
+		len -= sz;
+	}
+
+	/*
+	 * The last, possibly partial block needs to have the data zeroed that
+	 * would extend past the size of the file.
+	 */
+	if (len > 0) {
+		ssize_t sz = len;
+
+		dprintf("pageout: dmu_writeX off 0x%llx size 0x%lx\n", off, sz);
+		dmu_write(zfsvfs->z_os, zp->z_id, off, sz, va, tx);
+
+		va += sz;
+		off += sz;
+		len -= sz;
+
+		/*
+		 * Zero out the remainder of the PAGE that didn't fit within
+		 * the file size.
+		 */
+		// bzero(va, PAGESIZE-sz);
+		// dprintf("zero last 0x%lx bytes.\n", PAGESIZE-sz);
+
+	}
+	ubc_upl_unmap(upl);
+
+	if (err == 0) {
+		uint64_t mtime[2], ctime[2];
+		sa_bulk_attr_t bulk[3];
+		int count = 0;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    &mtime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+		    &zp->z_pflags, 8);
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		ASSERT0(err);
+		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0,
+		    NULL, NULL);
+	}
+	dmu_tx_commit(tx);
+
+out:
+	zfs_rangelock_exit(lr);
+	if (flags & UPL_IOSYNC)
+		zil_commit(zfsvfs->z_log, zp->z_id);
+
+	if (!(flags & UPL_NOCOMMIT)) {
+		if (err)
+			ubc_upl_abort_range(upl, upl_offset, size,
+			    (UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY));
+		else
+			ubc_upl_commit_range(upl, upl_offset, size,
+			    (UPL_COMMIT_CLEAR_DIRTY |
+			    UPL_COMMIT_FREE_ON_EMPTY));
+	}
+exit:
+	ZFS_EXIT(zfsvfs);
+	if (err) dprintf("%s err %d\n", __func__, err);
+	return (err);
+}
+
+
+
+int
+zfs_vnop_pageout(struct vnop_pageout_args *ap)
+#if 0
+	struct vnop_pageout_args {
+		struct vnode	*a_vp;
+		upl_t		a_pl;
+		vm_offset_t	a_pl_offset;
+		off_t		a_foffset;
+		size_t		a_size;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	int flags = ap->a_flags;
+	upl_t upl = ap->a_pl;
+	vm_offset_t upl_offset = ap->a_pl_offset;
+	size_t len = ap->a_size;
+	offset_t off = ap->a_f_offset;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = NULL;
+	int error;
+
+	if (!zp || !zp->z_zfsvfs) {
+		if (!(flags & UPL_NOCOMMIT))
+			ubc_upl_abort(upl,
+			    (UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY));
+		printf("ZFS: vnop_pageout: null zp or zfsvfs\n");
+		return (ENXIO);
+	}
+
+	zfsvfs = zp->z_zfsvfs;
+
+	dprintf("+vnop_pageout: off 0x%llx len 0x%lx upl_off 0x%lx: "
+	    "blksz 0x%x, z_size 0x%llx\n", off, len, upl_offset, zp->z_blksz,
+	    zp->z_size);
+
+	/*
+	 * XXX Crib this too, although Apple uses parts of zfs_putapage().
+	 * Break up that function into smaller bits so it can be reused.
+	 */
+	error = zfs_pageout(zfsvfs, zp, upl, upl_offset, ap->a_f_offset,
+	    len, flags);
+
+	return (error);
+}
+
+
+static int bluster_pageout(zfsvfs_t *zfsvfs, znode_t *zp, upl_t upl,
+    upl_offset_t upl_offset, off_t f_offset, int size,
+    uint64_t filesize, int flags, caddr_t vaddr,
+    dmu_tx_t *tx)
+{
+	int		io_size;
+	int		rounded_size;
+	off_t	max_size;
+	int		is_clcommit = 0;
+
+	if ((flags & UPL_NOCOMMIT) == 0)
+		is_clcommit = 1;
+
+	/*
+	 * If they didn't specify any I/O, then we are done...
+	 * we can't issue an abort because we don't know how
+	 * big the upl really is
+	 */
+	if (size <= 0) {
+		dprintf("%s invalid size %d\n", __func__, size);
+		return (EINVAL);
+	}
+
+	if (vnode_vfsisrdonly(ZTOV(zp))) {
+		if (is_clcommit)
+			ubc_upl_abort_range(upl, upl_offset, size,
+			    UPL_ABORT_FREE_ON_EMPTY);
+		dprintf("%s: readonly fs\n", __func__);
+		return (EROFS);
+	}
+
+	/*
+	 * can't page-in from a negative offset
+	 * or if we're starting beyond the EOF
+	 * or if the file offset isn't page aligned
+	 * or the size requested isn't a multiple of PAGE_SIZE
+	 */
+	if (f_offset < 0 || f_offset >= filesize ||
+	    (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
+		if (is_clcommit)
+			ubc_upl_abort_range(upl, upl_offset, size,
+			    UPL_ABORT_FREE_ON_EMPTY);
+		dprintf("%s: invalid offset or size\n", __func__);
+		return (EINVAL);
+	}
+	max_size = filesize - f_offset;
+
+	if (size < max_size)
+		io_size = size;
+	else
+		io_size = max_size;
+
+	rounded_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
+
+	if (size > rounded_size) {
+		if (is_clcommit)
+			ubc_upl_abort_range(upl, upl_offset + rounded_size,
+			    size - rounded_size, UPL_ABORT_FREE_ON_EMPTY);
+	}
+
+#if 1
+	if (f_offset + size > filesize) {
+		dprintf("ZFS: lowering size %u to %llu\n",
+		    size, f_offset > filesize ? 0 : filesize - f_offset);
+		if (f_offset > filesize)
+			size = 0;
+		else
+			size = filesize - f_offset;
+	}
+#endif
+
+	dmu_write(zfsvfs->z_os, zp->z_id, f_offset, size,
+	    &vaddr[upl_offset], tx);
+
+	return (0);
+}
+
+
+
+
+/*
+ * In V2 of vnop_pageout, we are given a NULL upl, so that we can
+ * grab the file locks first, then request the upl to lock down pages.
+ */
+int
+zfs_vnop_pageoutv2(struct vnop_pageout_args *ap)
+#if 0
+	struct vnop_pageout_args {
+		struct vnode	*a_vp;
+		upl_t		a_pl;
+		vm_offset_t	a_pl_offset;
+		off_t		a_foffset;
+		size_t		a_size;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	int a_flags = ap->a_flags;
+	vm_offset_t	a_pl_offset = ap->a_pl_offset;
+	size_t a_size = ap->a_size;
+	upl_t upl = ap->a_pl;
+	upl_page_info_t *pl;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = NULL;
+	int error = 0;
+	uint64_t filesize;
+	zfs_locked_range_t *lr;
+	dmu_tx_t *tx;
+	caddr_t vaddr = NULL;
+	int merror = 0;
+
+	/*
+	 * We can still get into this function as non-v2 style, by the default
+	 * pager (ie, swap - when we eventually support it)
+	 */
+	if (upl) {
+		dprintf("ZFS: Relaying vnop_pageoutv2 to vnop_pageout\n");
+		return (zfs_vnop_pageout(ap));
+	}
+
+	if (!zp || !zp->z_zfsvfs) {
+		printf("ZFS: vnop_pageout: null zp or zfsvfs\n");
+		return (ENXIO);
+	}
+
+	if (ZTOV(zp) == NULL) {
+		printf("ZFS: vnop_pageout: null vp\n");
+		return (ENXIO);
+	}
+
+	// XNU can call us with iocount == 0 && usecount == 0. Grab
+	// a ref now so the vp doesn't reclaim while we are in here.
+	if (vnode_get(ZTOV(zp)) != 0) {
+		printf("ZFS: vnop_pageout: vnode_ref failed.\n");
+		return (ENXIO);
+	}
+
+	mutex_enter(&zp->z_lock);
+
+	sa_handle_t *z_sa_hdl;
+	z_sa_hdl = zp->z_sa_hdl;
+	if (!z_sa_hdl) {
+		mutex_exit(&zp->z_lock);
+		vnode_put(ZTOV(zp));
+		printf("ZFS: vnop_pageout: null sa_hdl\n");
+		return (ENXIO);
+	}
+
+	zfsvfs = zp->z_zfsvfs;
+
+	mutex_exit(&zp->z_lock);
+
+	if (error) {
+		printf("ZFS: %s: can't hold_sa: %d\n", __func__, error);
+		vnode_put(ZTOV(zp));
+		return (ENXIO);
+	}
+
+	dprintf("+vnop_pageout2: off 0x%llx len 0x%lx upl_off 0x%lx: "
+	    "blksz 0x%x, z_size 0x%llx\n", ap->a_f_offset, a_size,
+	    a_pl_offset, zp->z_blksz,
+	    zp->z_size);
+
+
+	/* Start the pageout request */
+	/*
+	 * We can't leave this function without either calling upl_commit or
+	 * upl_abort. So use the non-error version.
+	 */
+	ZFS_ENTER_IFERROR(zfsvfs) {
+		dprintf("ZFS: vnop_pageoutv2: abort on z_unmounted\n");
+		error = EIO;
+		goto exit_abort;
+	}
+	if (vfs_flags(zfsvfs->z_vfs) & MNT_RDONLY) {
+		dprintf("ZFS: vnop_pageoutv2: readonly\n");
+		error = EROFS;
+		goto exit_abort;
+	}
+	ASSERT(vn_has_cached_data(ZTOV(zp)));
+
+	lr = zfs_rangelock_enter(&zp->z_rangelock, ap->a_f_offset, a_size,
+	    RL_WRITER);
+
+	/* Grab UPL now */
+	int request_flags;
+
+	/*
+	 * we're in control of any UPL we commit
+	 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
+	 */
+	a_flags &= ~UPL_NOCOMMIT;
+	a_pl_offset = 0;
+
+	if (a_flags & UPL_MSYNC) {
+		request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+	} else {
+		request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+	}
+
+	error = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl,
+	    request_flags);
+	if (error || (upl == NULL)) {
+		dprintf("ZFS: Failed to create UPL! %d\n", error);
+		goto pageout_done;
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_write(tx, zp->z_id, ap->a_f_offset, ap->a_size);
+
+	// NULL z_sa_hdl
+	if (z_sa_hdl != NULL)
+		dmu_tx_hold_sa(tx, z_sa_hdl, B_FALSE);
+
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error != 0) {
+		dmu_tx_abort(tx);
+		ubc_upl_abort(upl, (UPL_ABORT_ERROR|UPL_ABORT_FREE_ON_EMPTY));
+		goto pageout_done;
+	}
+
+	off_t f_offset;
+	int64_t offset;
+	int64_t isize;
+	int64_t pg_index;
+
+	filesize = zp->z_size; /* get consistent copy of zp_size */
+
+	isize = ap->a_size;
+	f_offset = ap->a_f_offset;
+
+	/*
+	 * Scan from the back to find the last page in the UPL, so that we
+	 * aren't looking at a UPL that may have already been freed by the
+	 * preceding aborts/completions.
+	 */
+	for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0; ) {
+		if (upl_page_present(pl, --pg_index))
+			break;
+		if (pg_index == 0) {
+			dprintf("ZFS: failed on pg_index\n");
+			dmu_tx_commit(tx);
+			ubc_upl_abort_range(upl, 0, isize,
+			    UPL_ABORT_FREE_ON_EMPTY);
+			goto pageout_done;
+		}
+	}
+
+	dprintf("ZFS: isize %llu pg_index %llu\n", isize, pg_index);
+	/*
+	 * initialize the offset variables before we touch the UPL.
+	 * a_f_offset is the position into the file, in bytes
+	 * offset is the position into the UPL, in bytes
+	 * pg_index is the pg# of the UPL we're operating on.
+	 * isize is the offset into the UPL of the last non-clean page.
+	 */
+	isize = ((pg_index + 1) * PAGE_SIZE);
+
+	offset = 0;
+	pg_index = 0;
+	while (isize > 0) {
+		int64_t  xsize;
+		int64_t  num_of_pages;
+
+		// printf("isize %d for page %d\n", isize, pg_index);
+
+		if (!upl_page_present(pl, pg_index)) {
+			/*
+			 * we asked for RET_ONLY_DIRTY, so it's possible
+			 * to get back empty slots in the UPL.
+			 * just skip over them
+			 */
+			f_offset += PAGE_SIZE;
+			offset   += PAGE_SIZE;
+			isize    -= PAGE_SIZE;
+			pg_index++;
+
+			continue;
+		}
+		if (!upl_dirty_page(pl, pg_index)) {
+			/*
+			 * hfs has a call to panic here, but we trigger this
+			 * *a lot* so unsure what is going on
+			 */
+			dprintf("zfs_vnop_pageoutv2: unforeseen clean page "
+			    "@ index %lld for UPL %p\n", pg_index, upl);
+			f_offset += PAGE_SIZE;
+			offset   += PAGE_SIZE;
+			isize    -= PAGE_SIZE;
+			pg_index++;
+			continue;
+		}
+
+		/*
+		 * We know that we have at least one dirty page.
+		 * Now checking to see how many in a row we have
+		 */
+		num_of_pages = 1;
+		xsize = isize - PAGE_SIZE;
+
+		while (xsize > 0) {
+			if (!upl_dirty_page(pl, pg_index + num_of_pages))
+				break;
+			num_of_pages++;
+			xsize -= PAGE_SIZE;
+		}
+		xsize = num_of_pages * PAGE_SIZE;
+
+		if (!vnode_isswap(vp)) {
+			off_t end_of_range;
+
+			end_of_range = f_offset + xsize - 1;
+			if (end_of_range >= filesize) {
+				end_of_range = (off_t)(filesize - 1);
+			}
+		}
+
+		// Map it if needed
+		if (!vaddr) {
+			if ((ubc_upl_map(upl, (vm_offset_t *)&vaddr) !=
+				    KERN_SUCCESS) || vaddr == NULL) {
+				error = EINVAL;
+				vaddr = NULL;
+				dprintf("ZFS: unable to map\n");
+				goto out;
+			}
+			dprintf("ZFS: Mapped %p\n", vaddr);
+		}
+
+
+		dprintf("ZFS: bluster offset %lld fileoff %lld size %lld "
+			"filesize %lld\n", offset, f_offset, xsize, filesize);
+		merror = bluster_pageout(zfsvfs, zp, upl, offset, f_offset,
+		    xsize, filesize, a_flags, vaddr, tx);
+		/* remember the first error */
+		if ((error == 0) && (merror))
+			error = merror;
+
+		f_offset += xsize;
+		offset   += xsize;
+		isize    -= xsize;
+		pg_index += num_of_pages;
+	} // while isize
+
+	/* finish off transaction */
+	if (error == 0) {
+		uint64_t mtime[2], ctime[2];
+		sa_bulk_attr_t bulk[3];
+		int count = 0;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    &mtime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+		    &zp->z_pflags, 8);
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, ap->a_f_offset,
+		    a_size, 0, NULL, NULL);
+	}
+	dmu_tx_commit(tx);
+
+	// unmap
+	if (vaddr) {
+		ubc_upl_unmap(upl);
+		vaddr = NULL;
+	}
+out:
+	zfs_rangelock_exit(lr);
+	if (a_flags & UPL_IOSYNC)
+		zil_commit(zfsvfs->z_log, zp->z_id);
+
+	if (error)
+		ubc_upl_abort(upl, (UPL_ABORT_ERROR|UPL_ABORT_FREE_ON_EMPTY));
+	else
+		ubc_upl_commit_range(upl, 0, a_size, UPL_COMMIT_FREE_ON_EMPTY);
+
+	upl = NULL;
+
+	vnode_put(ZTOV(zp));
+
+	ZFS_EXIT(zfsvfs);
+	if (error)
+		dprintf("ZFS: pageoutv2 failed %d\n", error);
+	return (error);
+
+pageout_done:
+	zfs_rangelock_exit(lr);
+
+exit_abort:
+	dprintf("ZFS: pageoutv2 aborted %d\n", error);
+	// VERIFY(ubc_create_upl(vp, off, len, &upl, &pl, flags) == 0);
+	// ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY);
+
+	vnode_put(ZTOV(zp));
+
+	if (zfsvfs)
+		ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+
+
+
+
+
+int
+zfs_vnop_mmap(struct vnop_mmap_args *ap)
+#if 0
+	struct vnop_mmap_args {
+		struct vnode	*a_vp;
+		int		a_fflags;
+		kauth_cred_t	a_cred;
+		struct proc	*a_p;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs;
+
+	if (!zp)
+		return (ENODEV);
+
+	zfsvfs = zp->z_zfsvfs;
+
+	dprintf("+vnop_mmap: %p\n", ap->a_vp);
+
+	ZFS_ENTER(zfsvfs);
+
+	if (!vnode_isreg(vp)) {
+		ZFS_EXIT(zfsvfs);
+		return (ENODEV);
+	}
+	mutex_enter(&zp->z_lock);
+	zp->z_is_mapped = 1;
+	mutex_exit(&zp->z_lock);
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-vnop_mmap\n");
+	return (0);
+}
+
+int
+zfs_vnop_mnomap(struct vnop_mnomap_args *ap)
+#if 0
+	struct vnop_mnomap_args {
+		struct vnode	*a_vp;
+		int		a_fflags;
+		kauth_cred_t	a_cred;
+		struct proc	*a_p;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	dprintf("+vnop_mnomap: %p\n", ap->a_vp);
+
+	ZFS_ENTER(zfsvfs);
+
+	if (!vnode_isreg(vp)) {
+		ZFS_EXIT(zfsvfs);
+		return (ENODEV);
+	}
+	mutex_enter(&zp->z_lock);
+	/*
+	 * If a file as been mmaped even once, it needs to keep "z_is_mapped"
+	 * high because it will potentially keep pages in the UPL cache we need
+	 * to update on writes. We can either drop the UPL pages here, or simply
+	 * keep updating both places on zfs_write().
+	 */
+	/* zp->z_is_mapped = 0; */
+	mutex_exit(&zp->z_lock);
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-vnop_mnomap\n");
+	return (0);
+}
+
+
+
+
+int
+zfs_vnop_inactive(struct vnop_inactive_args *ap)
+#if 0
+	struct vnop_inactive_args {
+		struct vnode	*a_vp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	zfs_inactive(vp);
+	return (0);
+}
+
+
+
+#ifdef _KERNEL
+uint64_t vnop_num_reclaims = 0;
+uint64_t vnop_num_vnodes = 0;
+#endif
+
+
+int
+zfs_vnop_reclaim(struct vnop_reclaim_args *ap)
+#if 0
+	struct vnop_reclaim_args {
+		struct vnode	*a_vp;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	/*
+	 * Care needs to be taken here, we may already have called reclaim
+	 * from vnop_inactive, if so, very little needs to be done.
+	 */
+
+	struct vnode	*vp = ap->a_vp;
+	znode_t	*zp = NULL;
+	zfsvfs_t *zfsvfs = NULL;
+
+	/* Destroy the vm object and flush associated pages. */
+#ifndef __APPLE__
+	vnode_destroy_vobject(vp);
+#endif
+
+	/* Already been released? */
+	zp = VTOZ(vp);
+	ASSERT(zp != NULL);
+	dprintf("+vnop_reclaim zp %p/%p type %d\n", zp, vp, vnode_vtype(vp));
+	if (!zp) goto out;
+
+	zfsvfs = zp->z_zfsvfs;
+
+	if (!zfsvfs) {
+		printf("ZFS: vnop_reclaim with zfsvfs == NULL\n");
+		return (0);
+	}
+
+	if (zfsctl_is_node(vp)) {
+		printf("ZFS: vnop_reclaim with ctldir node\n");
+		return (0);
+	}
+
+	ZTOV(zp) = NULL;
+
+	/*
+	 * Purge old data structures associated with the denode.
+	 */
+	vnode_clearfsnode(vp); /* vp->v_data = NULL */
+	vnode_removefsref(vp); /* ADDREF from vnode_create */
+	atomic_dec_64(&vnop_num_vnodes);
+
+	dprintf("+vnop_reclaim zp %p/%p unlinked %d unmount "
+		"%d sa_hdl %p\n", zp, vp, zp->z_unlinked,
+	    zfsvfs->z_unmounted, zp->z_sa_hdl);
+
+	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+	if (zp->z_sa_hdl == NULL) {
+		zfs_znode_free(zp);
+	} else {
+		zfs_zinactive(zp);
+		zfs_znode_free(zp);
+	}
+	rw_exit(&zfsvfs->z_teardown_inactive_lock);
+
+#ifdef _KERNEL
+	atomic_inc_64(&vnop_num_reclaims);
+#endif
+
+out:
+	return (0);
+}
+
+
+
+
+
+int
+zfs_vnop_mknod(struct vnop_mknod_args *ap)
+#if 0
+	struct vnop_mknod_args {
+		struct vnode	*a_dvp;
+		struct vnode	**a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_vattr *vap;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnop_create_args create_ap;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	bzero(&create_ap, sizeof (struct vnop_create_args));
+
+	create_ap.a_dvp = ap->a_dvp;
+	create_ap.a_vpp = ap->a_vpp;
+	create_ap.a_cnp = ap->a_cnp;
+	create_ap.a_vap = ap->a_vap;
+	create_ap.a_context = ap->a_context;
+
+	error = zfs_vnop_create(&create_ap);
+	if (error) dprintf("%s error %d\n", __func__, error);
+	return (error);
+}
+
+int
+zfs_vnop_allocate(struct vnop_allocate_args *ap)
+#if 0
+	struct vnop_allocate_args {
+		struct vnode	*a_vp;
+		off_t		a_length;
+		u_int32_t	a_flags;
+		off_t		*a_bytesallocated;
+		off_t		a_offset;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs;
+	uint64_t wantedsize = 0, filesize = 0;
+	int err = 0;
+
+	dprintf("%s %llu %d %llu %llu: '%s'\n", __func__, ap->a_length,
+	    ap->a_flags, (ap->a_bytesallocated ? *ap->a_bytesallocated : 0),
+	    ap->a_offset, zp->z_name_cache);
+
+	/*
+	 * This code has been reverted:
+	 * https://github.com/openzfsonosx/zfs/issues/631
+	 * Most likely not correctly aligned, and too-large offsets.
+	 */
+	return (0);
+
+	if (!zp || !zp->z_sa_hdl)
+		return (ENODEV);
+
+//	*ap->a_bytesallocated = 0;
+
+	if (!vnode_isreg(vp)) {
+		ZFS_EXIT(zfsvfs);
+		return (ENODEV);
+	}
+
+	filesize = zp->z_size;
+	wantedsize = ap->a_length;
+
+	if (ap->a_flags & ALLOCATEFROMPEOF)
+		wantedsize += filesize;
+	else if (ap->a_flags & ALLOCATEFROMVOL)
+		/* blockhint = ap->a_offset / blocksize */  // yeah, no idea
+		printf("%s: help, allocatefromvolume set?\n", __func__);
+
+	dprintf("%s: filesize %llu wantedsize %llu\n", __func__,
+		filesize, wantedsize);
+
+	// If we are extending
+	if (wantedsize > filesize) {
+
+		err = zfs_freesp(zp, wantedsize, 0, FWRITE, B_TRUE);
+
+		// If we are truncating, Apple claims this code is never called.
+	} else if (wantedsize < filesize) {
+
+		printf("%s: file shrinking branch taken?\n", __func__);
+
+	}
+
+	if (!err) {
+		*(ap->a_bytesallocated) = wantedsize - filesize;
+	}
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-%s: %d\n", __func__, err);
+	return (err);
+}
+
+int
+zfs_vnop_whiteout(struct vnop_whiteout_args *ap)
+#if 0
+	struct vnop_whiteout_args {
+		struct vnode	*a_dvp;
+		struct componentname *a_cnp;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	dprintf("vnop_whiteout: ENOTSUP\n");
+
+	return (ENOTSUP);
+}
+
+int
+zfs_vnop_pathconf(struct vnop_pathconf_args *ap)
+#if 0
+	struct vnop_pathconf_args {
+		struct vnode	*a_vp;
+		int		a_name;
+		register_t	*a_retval;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	int32_t  *valp = ap->a_retval;
+	int error = 0;
+
+	dprintf("+vnop_pathconf a_name %d\n", ap->a_name);
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*valp = INT_MAX;
+		break;
+	case _PC_PIPE_BUF:
+		*valp = PIPE_BUF;
+		break;
+	case _PC_CHOWN_RESTRICTED:
+		*valp = 200112;  /* POSIX */
+		break;
+	case _PC_NO_TRUNC:
+		*valp = 200112;  /* POSIX */
+		break;
+	case _PC_NAME_MAX:
+	case _PC_NAME_CHARS_MAX:
+		*valp = ZAP_MAXNAMELEN - 1;  /* 255 */
+		break;
+	case _PC_PATH_MAX:
+	case _PC_SYMLINK_MAX:
+		*valp = PATH_MAX;  /* 1024 */
+		break;
+	case _PC_CASE_SENSITIVE:
+	{
+		znode_t *zp = VTOZ(ap->a_vp);
+		*valp = 1;
+		if (zp && zp->z_zfsvfs) {
+			zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+			*valp = (zfsvfs->z_case == ZFS_CASE_SENSITIVE) ? 1 : 0;
+		}
+	}
+		break;
+	case _PC_CASE_PRESERVING:
+		*valp = 1;
+		break;
+/*
+ * OS X 10.6 does not define this.
+ */
+#ifndef	_PC_XATTR_SIZE_BITS
+#define	_PC_XATTR_SIZE_BITS   26
+#endif
+/*
+ * Even though ZFS has 64 bit limit on XATTR size, there would appear to be a
+ * limit in SMB2 that the bit size returned has to be 18, or we will get an
+ * error from most XATTR calls (STATUS_ALLOTTED_SPACE_EXCEEDED).
+ */
+#ifndef	AD_XATTR_SIZE_BITS
+#define	AD_XATTR_SIZE_BITS 18
+#endif
+	case _PC_XATTR_SIZE_BITS:
+		*valp = AD_XATTR_SIZE_BITS;
+		break;
+	case _PC_FILESIZEBITS:
+		*valp = 64;
+		break;
+	default:
+		printf("ZFS: unknown pathconf %d called.\n", ap->a_name);
+		error = EINVAL;
+	}
+
+	if (error) dprintf("%s vp %p : %d\n", __func__, ap->a_vp, error);
+	return (error);
+}
+
+int
+zfs_vnop_getxattr(struct vnop_getxattr_args *ap)
+#if 0
+	struct vnop_getxattr_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	*a_vp;
+		char		*a_name;
+		struct uio	*a_uio;
+		size_t		*a_size;
+		int		a_options;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+	struct uio *uio = ap->a_uio;
+	struct componentname cn = { 0 };
+	int  error = 0;
+	int size = 0;
+	uint64_t resid = uio ? uio_resid(uio) : 0;
+	znode_t *xdzp = NULL, *xzp = NULL;
+
+	dprintf("+getxattr vp %p: '%s'\n", ap->a_vp, ap->a_name);
+
+	/* xattrs disabled? */
+	if (zfsvfs->z_xattr == B_FALSE) {
+		return (ENOTSUP);
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	if (zfsvfs->z_use_sa && zfsvfs->z_xattr_sa && zp->z_is_sa) {
+		char *value = NULL;
+
+		rw_enter(&zp->z_xattr_lock, RW_READER);
+		if (zp->z_xattr_cached == NULL)
+			error = -zfs_sa_get_xattr(zp);
+		rw_exit(&zp->z_xattr_lock);
+
+		if (!resid) { /* Lookup size */
+
+			rw_enter(&zp->z_xattr_lock, RW_READER);
+			size = zpl_xattr_get_sa(vp, ap->a_name, NULL, 0);
+			rw_exit(&zp->z_xattr_lock);
+			if (size > 0) {
+				*ap->a_size = size;
+				goto out;
+			}
+		}
+
+		if (resid) {
+			value = kmem_alloc(resid, KM_SLEEP);
+			rw_enter(&zp->z_xattr_lock, RW_READER);
+			size = zpl_xattr_get_sa(vp, ap->a_name, value, resid);
+			rw_exit(&zp->z_xattr_lock);
+
+			/* Finderinfo checks */
+			if (!error && resid &&
+				bcmp(ap->a_name, XATTR_FINDERINFO_NAME,
+					sizeof (XATTR_FINDERINFO_NAME)) == 0) {
+
+				/* Must be 32 bytes */
+				if (resid != sizeof (emptyfinfo) ||
+					size != sizeof (emptyfinfo)) {
+					error = ERANGE;
+					kmem_free(value, resid);
+					goto out;
+				}
+
+				/* If FinderInfo is empty > it doesn't exist */
+				if (bcmp(value, emptyfinfo,
+				    sizeof (emptyfinfo)) == 0) {
+					error = ENOATTR;
+					kmem_free(value, resid);
+					goto out;
+				}
+
+				/* According to HFS zero out some fields */
+				finderinfo_update((uint8_t *)value, zp);
+			}
+
+			if (size > 0)
+				error = uiomove((const char *)value, size, 0,
+				    uio);
+
+			kmem_free(value, resid);
+
+			goto out;
+		}
+	}
+
+	/* Legacy xattr */
+
+	/* Grab the hidden attribute directory vnode. */
+	if ((error = zfs_get_xattrdir(zp, &xdzp, cr, 0))) {
+		goto out;
+	}
+
+	cn.cn_namelen = strlen(ap->a_name) + 1;
+	cn.cn_nameptr = (char *)kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+	/* Lookup the attribute name. */
+	if ((error = zfs_dirlook(xdzp, (char *)ap->a_name, &xzp, 0, NULL,
+	    &cn))) {
+		goto out;
+	}
+
+	/*
+	 * If we are dealing with FinderInfo, we duplicate the UIO first
+	 * so that we can uiomove to/from it to modify contents.
+	 */
+	if (!error && uio &&
+	    bcmp(ap->a_name, XATTR_FINDERINFO_NAME,
+	    sizeof (XATTR_FINDERINFO_NAME)) == 0) {
+		ssize_t local_resid;
+		zfs_file_t zf;
+		u_int8_t finderinfo[32];
+		static u_int32_t emptyfinfo[8] = {0};
+
+		/* Read the attribute data. */
+		/* FinderInfo is 32 bytes */
+		if ((user_size_t)uio_resid(uio) < 32) {
+			error = ERANGE;
+			goto out;
+		}
+
+		/* Use the convenience wrappers to read to SYSSPACE */
+		zf.f_vnode = ZTOV(xzp);
+		zf.f_fd = -1;
+
+		error = zfs_file_pread(&zf, &finderinfo,
+			sizeof (finderinfo), 0ULL, &local_resid);
+
+		if (local_resid != 0) {
+			error = ERANGE;
+		} else {
+
+			/* Update size if requested */
+			if (ap->a_size)
+				*ap->a_size = (size_t)sizeof (finderinfo);
+
+			/* According to HFS we are to zero out some fields */
+			finderinfo_update((uint8_t *)&finderinfo, zp);
+
+			/* If Finder Info is empty then it doesn't exist. */
+			if (bcmp(finderinfo, emptyfinfo,
+			    sizeof (emptyfinfo)) == 0) {
+				error = ENOATTR;
+			} else {
+
+				/* Copy out the data we just modified */
+				error = uiomove((const char *)&finderinfo,
+				    sizeof (finderinfo), 0, uio);
+
+			} /* Not empty */
+		} /* Correct size */
+
+		/* We are done */
+		goto out;
+	} /* Is finder info */
+
+	/* If NOT finderinfo */
+
+	if (uio == NULL) {
+
+		/* Query xattr size. */
+		if (ap->a_size) {
+			mutex_enter(&xzp->z_lock);
+			*ap->a_size = (size_t)xzp->z_size;
+			mutex_exit(&xzp->z_lock);
+		}
+
+	} else {
+
+		/* Read xattr */
+		error = zfs_read(ZTOV(xzp), uio, 0, cr);
+
+		if (ap->a_size && uio) {
+			*ap->a_size = (size_t)resid - uio_resid(ap->a_uio);
+		}
+
+	}
+
+out:
+
+	if (error == ENOENT)
+		error = ENOATTR;
+
+	if (cn.cn_nameptr)
+		kmem_free(cn.cn_nameptr, cn.cn_namelen);
+	if (xzp) {
+		zrele(xzp);
+	}
+	if (xdzp) {
+		zrele(xdzp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-getxattr vp %p : %d size %lu: %s\n", ap->a_vp, error,
+		!error && ap->a_size ? *ap->a_size : 0,
+		ap->a_uio == NULL ? "sizelookup" : "xattrread");
+	return (error);
+}
+
+int
+zfs_vnop_setxattr(struct vnop_setxattr_args *ap)
+#if 0
+	struct vnop_setxattr_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	*a_vp;
+		char		*a_name;
+		struct uio	*a_uio;
+		int		a_options;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	struct vnode *xvp = NULLVP;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+	struct uio *uio = ap->a_uio;
+	int  flag;
+	int  error = 0;
+	znode_t *xdzp = NULL;
+
+	dprintf("+setxattr vp %p '%s' (enabled: %d) resid %llu\n", ap->a_vp,
+		ap->a_name, zfsvfs->z_xattr, uio_resid(ap->a_uio));
+
+	/* xattrs disabled? */
+	if (zfsvfs->z_xattr == B_FALSE) {
+		return (ENOTSUP);
+	}
+
+	if (ap->a_name == NULL || ap->a_name[0] == '\0') {
+		return (EINVAL);  /* invalid name */
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	if (strlen(ap->a_name) >= ZAP_MAXNAMELEN) {
+		error = ENAMETOOLONG;
+		goto out;
+	}
+
+	if (ap->a_options & XATTR_CREATE)
+		flag = ZNEW;	 /* expect no pre-existing entry */
+	else if (ap->a_options & XATTR_REPLACE)
+		flag = ZEXISTS;  /* expect an existing entry */
+	else
+		flag = 0;
+
+
+	/* Preferentially store the xattr as a SA for better performance */
+	if (zfsvfs->z_use_sa && zfsvfs->z_xattr_sa && zp->z_is_sa) {
+		char *value;
+		uint64_t size;
+
+		rw_enter(&zp->z_xattr_lock, RW_READER);
+		if (zp->z_xattr_cached == NULL)
+			error = -zfs_sa_get_xattr(zp);
+		rw_exit(&zp->z_xattr_lock);
+
+		rw_enter(&zp->z_xattr_lock, RW_WRITER);
+
+		/* New, expect it to not exist .. */
+		if ((flag & ZNEW) &&
+			(zpl_xattr_get_sa(vp, ap->a_name, NULL, 0) > 0)) {
+			error = EEXIST;
+			rw_exit(&zp->z_xattr_lock);
+			goto out;
+		}
+
+		/* Replace, XATTR must exist .. */
+		if ((flag & ZEXISTS) &&
+		    ((error =
+			    zpl_xattr_get_sa(vp, ap->a_name, NULL, 0)) <= 0) &&
+			error == -ENOENT) {
+			error = ENOATTR;
+			rw_exit(&zp->z_xattr_lock);
+			goto out;
+		}
+
+		size = uio_resid(uio);
+		value = kmem_alloc(size, KM_SLEEP);
+
+		size_t bytes;
+
+		/* Copy in the xattr value */
+		uiocopy((const char *)value, size, UIO_WRITE,
+			uio, &bytes);
+
+
+		/* Finderinfo checks */
+		if (!error && bytes &&
+		    bcmp(ap->a_name, XATTR_FINDERINFO_NAME,
+		    sizeof (XATTR_FINDERINFO_NAME)) == 0) {
+
+			/* Must be 32 bytes */
+			if (bytes != sizeof (emptyfinfo)) {
+				error = ERANGE;
+				rw_exit(&zp->z_xattr_lock);
+				kmem_free(value, size);
+				goto out;
+			}
+
+			/* According to HFS we are to zero out some fields */
+			finderinfo_update((uint8_t *)value, zp);
+		}
+
+		error = zpl_xattr_set_sa(vp, ap->a_name,
+		    value, bytes,
+		    flag, cr);
+		rw_exit(&zp->z_xattr_lock);
+		kmem_free(value, size);
+
+		goto out;
+	}
+
+	/* Legacy xattr */
+
+	if ((error = zfs_get_xattrdir(zp, &xdzp, cr, CREATE_XATTR_DIR))) {
+		goto out;
+	}
+
+	/* Lookup or create the named attribute. */
+	error = zpl_obtain_xattr(xdzp, ap->a_name, VTOZ(vp)->z_mode, cr,
+	    &xvp, flag);
+	if (error)
+		goto out;
+
+	/* Write the attribute data. */
+	ASSERT(uio != NULL);
+
+	/* OsX setxattr() replaces xattrs */
+	error = zfs_freesp(VTOZ(xvp), 0, 0, VTOZ(vp)->z_mode, TRUE);
+
+	/* Special case for Finderinfo */
+	if (!error && uio &&
+	    bcmp(ap->a_name, XATTR_FINDERINFO_NAME,
+	    sizeof (XATTR_FINDERINFO_NAME)) == 0) {
+
+		u_int8_t finderinfo[32];
+
+		/* Read the attribute data. */
+		/* FinderInfo is 32 bytes */
+		if ((user_size_t)uio_resid(uio) < 32) {
+			error = ERANGE;
+			goto out;
+		}
+
+		/* Copy in the finderinfo to our space */
+		error = uiomove((const char *)&finderinfo,
+			sizeof (finderinfo), 0, uio);
+		if (error)
+			goto out;
+
+		/* Zero out some fields, according to HFS */
+		finderinfo_update((uint8_t *)&finderinfo, zp);
+
+		/*
+		 * TODO:
+		 * When writing FINDERINFO, we need to replace the
+		 * ADDEDTIME date with actual crtime and not let
+		 * userland overwrite it.
+		 */
+
+		/* Empty Finderinfo is non-existent. */
+		if (bcmp(finderinfo, emptyfinfo, sizeof (emptyfinfo)) == 0) {
+			/* Attempt to delete it? */
+			error = zfs_remove(xdzp, (char *)ap->a_name, cr, 0);
+			goto out;
+		}
+
+		/* Build a new uio to call zfs_write() to make it go in txg */
+		uio_t *luio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
+		if (luio == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		uio_addiov(luio, (user_addr_t)&finderinfo, sizeof (finderinfo));
+
+		error = zfs_write(xvp, luio, 0, cr);
+
+		if (uio_resid(luio) != 0)
+			error = ERANGE;
+
+		uio_free(luio);
+
+		goto out;
+	} /* Finderinfo */
+
+	/* Write XATTR to disk */
+	error = zfs_write(xvp, uio, 0, cr);
+
+out:
+
+	if (error == ENOENT)
+		error = ENOATTR;
+
+	if (xdzp) {
+		zrele(xdzp);
+	}
+	if (xvp) {
+		VN_RELE(xvp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-setxattr vp %p: err %d: resid %llx\n", ap->a_vp, error,
+	    uio_resid(ap->a_uio));
+	return (error);
+}
+
+int
+zfs_vnop_removexattr_int(zfsvfs_t *zfsvfs, znode_t *zp, const char *name,
+    cred_t *cr)
+{
+	struct vnode *vp = ZTOV(zp);
+	struct componentname cn = { 0 };
+	int error;
+	uint64_t xattr;
+	znode_t *xdzp = NULL, *xzp = NULL;
+
+	dprintf("+removexattr_int vp %p '%s'\n", vp, name);
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Recursive attributes are not allowed.
+	 */
+	if (zp->z_pflags & ZFS_XATTR) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (zfsvfs->z_use_sa && zfsvfs->z_xattr_sa && zp->z_is_sa) {
+		nvlist_t *nvl;
+
+		rw_enter(&zp->z_xattr_lock, RW_READER);
+		if (zp->z_xattr_cached == NULL)
+			error = -zfs_sa_get_xattr(zp);
+		rw_exit(&zp->z_xattr_lock);
+
+		nvl = zp->z_xattr_cached;
+
+		rw_enter(&zp->z_xattr_lock, RW_WRITER);
+		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
+
+		dprintf("ZFS: removexattr nvlist_remove said %d\n", error);
+		if (!error) {
+			/* Update the SA for adds, modss, and removals. */
+			error = -zfs_sa_set_xattr(zp);
+			rw_exit(&zp->z_xattr_lock);
+			goto out;
+		}
+		rw_exit(&zp->z_xattr_lock);
+	}
+
+	sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr, sizeof (xattr));
+	if (xattr == 0) {
+		error = ENOATTR;
+		goto out;
+	}
+
+	/* Grab the hidden attribute directory vnode. */
+	if ((error = zfs_get_xattrdir(zp, &xdzp, cr, 0))) {
+		goto out;
+	}
+
+	cn.cn_namelen = strlen(name)+1;
+	cn.cn_nameptr = (char *)kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+	/* Lookup the attribute name. */
+	if ((error = zfs_dirlook(xdzp, (char *)name, &xzp, 0, NULL,
+	    &cn))) {
+		if (error == ENOENT)
+			error = ENOATTR;
+		goto out;
+	}
+
+	error = zfs_remove(xdzp, (char *)name, cr, /* flags */0);
+
+out:
+	if (cn.cn_nameptr)
+		kmem_free(cn.cn_nameptr, cn.cn_namelen);
+
+	if (xzp) {
+		zrele(xzp);
+	}
+	if (xdzp) {
+		zrele(xdzp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	if (error) dprintf("%s vp %p: error %d\n", __func__, vp, error);
+	return (error);
+}
+
+int
+zfs_vnop_removexattr(struct vnop_removexattr_args *ap)
+#if 0
+	struct vnop_removexattr_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	*a_vp;
+		char		*a_name;
+		int		a_options;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+
+	dprintf("+removexattr vp %p '%s'\n", ap->a_vp, ap->a_name);
+
+	/* xattrs disabled? */
+	if (zfsvfs->z_xattr == B_FALSE) {
+		return (ENOTSUP);
+	}
+
+	return (zfs_vnop_removexattr_int(zfsvfs, zp, ap->a_name, cr));
+}
+
+
+int
+zfs_vnop_listxattr(struct vnop_listxattr_args *ap)
+#if 0
+	struct vnop_listxattr_args {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		uio_t a_uio;
+		size_t *a_size;
+		int a_options;
+		vfs_context_t a_context;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+	struct uio *uio = ap->a_uio;
+	zap_cursor_t  zc;
+	zap_attribute_t  za;
+	objset_t  *os;
+	size_t size = 0;
+	char  *nameptr;
+	char  nfd_name[ZAP_MAXNAMELEN];
+	size_t  namelen;
+	int  error = 0;
+	uint64_t xattr;
+	int force_formd_normalized_output;
+	znode_t *xdzp = NULL;
+
+	dprintf("+listxattr vp %p: \n", ap->a_vp);
+
+	/* xattrs disabled? */
+	if (zfsvfs->z_xattr == B_FALSE) {
+		return (EINVAL);
+	}
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Recursive attributes are not allowed.
+	 */
+	if (zp->z_pflags & ZFS_XATTR) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (zfsvfs->z_use_sa && zp->z_is_sa && zp->z_xattr_cached) {
+		nvpair_t *nvp = NULL;
+
+		rw_enter(&zp->z_xattr_lock, RW_READER);
+		if (zp->z_xattr_cached == NULL)
+			error = -zfs_sa_get_xattr(zp);
+		rw_exit(&zp->z_xattr_lock);
+
+		while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) !=
+		    NULL) {
+			ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
+
+			namelen = strlen(nvpair_name(nvp)) + 1; /* Null byte */
+
+			/* Just checking for space requirements? */
+			if (uio == NULL) {
+				size += namelen;
+			} else {
+				if (namelen > uio_resid(uio)) {
+					error = ERANGE;
+					break;
+				}
+				dprintf("ZFS: listxattr '%s'\n",
+				    nvpair_name(nvp));
+				error = uiomove((caddr_t)nvpair_name(nvp),
+				    namelen, UIO_READ, uio);
+				if (error)
+					break;
+			}
+		} /* while nvlist */
+	} /* SA xattr */
+	if (error) goto out;
+
+	/* Do we even have any attributes? */
+	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr,
+	    sizeof (xattr)) || (xattr == 0)) {
+		goto out;  /* all done */
+	}
+
+	/* Grab the hidden attribute directory vnode. */
+	if (zfs_get_xattrdir(zp, &xdzp, cr, 0) != 0) {
+		goto out;
+	}
+	os = zfsvfs->z_os;
+
+	for (zap_cursor_init(&zc, os, xdzp->z_id);
+	    zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
+		if (xattr_protected(za.za_name))
+			continue;	 /* skip */
+		/*
+		 * Mac OS X: non-ascii names are UTF-8 NFC on disk
+		 * so convert to NFD before exporting them.
+		 */
+		namelen = strlen(za.za_name);
+
+		if (force_formd_normalized_output &&
+		    !is_ascii_str(za.za_name))
+			force_formd_normalized_output = 1;
+		else
+			force_formd_normalized_output = 0;
+
+		if (force_formd_normalized_output &&
+		    utf8_normalizestr((const u_int8_t *)za.za_name, namelen,
+		    (u_int8_t *)nfd_name, &namelen, sizeof (nfd_name),
+		    UTF_DECOMPOSED) == 0) {
+			nameptr = nfd_name;
+		} else {
+			nameptr = &za.za_name[0];
+		}
+		++namelen;  /* account for NULL termination byte */
+		if (uio == NULL) {
+			size += namelen;
+		} else {
+			if (namelen > uio_resid(uio)) {
+				error = ERANGE;
+				break;
+			}
+			error = uiomove((caddr_t)nameptr, namelen, UIO_READ,
+			    uio);
+			if (error)
+				break;
+		}
+	}
+	zap_cursor_fini(&zc);
+out:
+	if (uio == NULL) {
+		*ap->a_size = size;
+	}
+	if (xdzp) {
+		zrele(xdzp);
+	}
+
+	ZFS_EXIT(zfsvfs);
+	if (error) {
+		dprintf("%s vp %p: error %d size %ld\n", __func__,
+		    ap->a_vp, error, size);
+	}
+	return (error);
+}
+
+#ifdef HAVE_NAMED_STREAMS
+int
+zfs_vnop_getnamedstream(struct vnop_getnamedstream_args *ap)
+#if 0
+	struct vnop_getnamedstream_args {
+		struct vnode	*a_vp;
+		struct vnode	**a_svpp;
+		char		*a_name;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	struct vnode **svpp = ap->a_svpp;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+	struct componentname cn = { 0 };
+	int  error = ENOATTR;
+	znode_t *xdzp = NULL;
+	znode_t *xzp = NULL;
+
+	dprintf("+getnamedstream vp %p '%s': op %u\n", ap->a_vp, ap->a_name,
+		ap->a_operation);
+
+	*svpp = NULLVP;
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Mac OS X only supports the "com.apple.ResourceFork" stream.
+	 */
+	if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME,
+	    sizeof (XATTR_RESOURCEFORK_NAME)) != 0)
+		goto out;
+
+	/* Only regular files */
+	if (!vnode_isreg(vp)) {
+		return (EPERM);
+	}
+
+	/* Grab the hidden attribute directory vnode. */
+	if (zfs_get_xattrdir(zp, &xdzp, cr, 0) != 0)
+		goto out;
+
+	cn.cn_namelen = strlen(ap->a_name) + 1;
+	cn.cn_nameptr = (char *)kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+	/* Lookup the attribute name. */
+	if ((error = zfs_dirlook(xdzp, (char *)ap->a_name, &xzp, 0, NULL,
+	    &cn))) {
+		if (error == ENOENT)
+			error = ENOATTR;
+	} else {
+		*svpp = ZTOV(xzp);
+	}
+
+	kmem_free(cn.cn_nameptr, cn.cn_namelen);
+
+out:
+	if (xdzp)
+		zrele(xdzp);
+
+#if 0 // Disabled, not sure its required and empty vnodes are odd.
+	/*
+	 * If the lookup is NS_OPEN, they are accessing "..namedfork/rsrc"
+	 * to which we should return 0 with empty vp to empty file.
+	 * See hfs_vnop_getnamedstream()
+	 */
+	if ((error == ENOATTR) &&
+		ap->a_operation == NS_OPEN) {
+
+		if ((error = zfs_get_xattrdir(zp, &xdvp, cr,
+		    CREATE_XATTR_DIR)) == 0) {
+			/* Lookup or create the named attribute. */
+			error = zpl_obtain_xattr(VTOZ(xdvp), ap->a_name,
+			    VTOZ(vp)->z_mode, cr, ap->a_svpp,
+			    ZNEW);
+			vnode_put(xdvp);
+		}
+	}
+#endif
+
+	ZFS_EXIT(zfsvfs);
+	if (error) dprintf("%s vp %p: error %d\n", __func__, ap->a_vp, error);
+	return (error);
+}
+
+int
+zfs_vnop_makenamedstream(struct vnop_makenamedstream_args *ap)
+#if 0
+	struct vnop_makenamedstream_args {
+		struct vnode	*a_vp;
+		struct vnode	**a_svpp;
+		char		*a_name;
+	};
+#endif
+{
+	DECLARE_CRED(ap);
+	struct vnode *vp = ap->a_vp;
+	znode_t  *zp = VTOZ(vp);
+	zfsvfs_t  *zfsvfs = zp->z_zfsvfs;
+	struct componentname  cn;
+	struct vnode_attr  vattr;
+	int  error = 0;
+	znode_t *xdzp = NULL;
+	znode_t *xzp = NULL;
+
+	dprintf("+makenamedstream vp %p: '%s'\n", ap->a_vp, ap->a_name);
+
+	*ap->a_svpp = NULLVP;
+
+	ZFS_ENTER(zfsvfs);
+
+	/* Only regular files can have a resource fork stream. */
+	if (!vnode_isreg(vp)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/*
+	 * Mac OS X only supports the "com.apple.ResourceFork" stream.
+	 */
+	if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME,
+	    sizeof (XATTR_RESOURCEFORK_NAME)) != 0) {
+		error = ENOATTR;
+		goto out;
+	}
+
+	/* Grab the hidden attribute directory vnode. */
+	if ((error = zfs_get_xattrdir(zp, &xdzp, cr, CREATE_XATTR_DIR)))
+		goto out;
+
+	bzero(&cn, sizeof (cn));
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = ISLASTCN;
+	cn.cn_nameptr = (char *)ap->a_name;
+	cn.cn_namelen = strlen(cn.cn_nameptr);
+
+	VATTR_INIT(&vattr);
+	VATTR_SET(&vattr, va_type, VREG);
+	VATTR_SET(&vattr, va_mode, VTOZ(vp)->z_mode & ~S_IFMT);
+
+	error = zfs_create(xdzp, (char *)ap->a_name, &vattr, NONEXCL,
+	    VTOZ(vp)->z_mode, &xzp, cr, 0, NULL);
+
+	if (error == 0)
+		*ap->a_svpp = ZTOV(xzp);
+
+out:
+	if (xdzp)
+		zrele(xdzp);
+
+	ZFS_EXIT(zfsvfs);
+	if (error) dprintf("%s vp %p: error %d\n", __func__, ap->a_vp, error);
+	return (error);
+}
+
+int
+zfs_vnop_removenamedstream(struct vnop_removenamedstream_args *ap)
+#if 0
+	struct vnop_removenamedstream_args {
+		struct vnode	*a_vp;
+		struct vnode	**a_svpp;
+		char		*a_name;
+	};
+#endif
+{
+	struct vnode *svp = ap->a_svp;
+	znode_t *zp = VTOZ(svp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error = 0;
+
+	dprintf("zfs_vnop_removenamedstream: %p '%s'\n",
+		svp, ap->a_name);
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Mac OS X only supports the "com.apple.ResourceFork" stream.
+	 */
+	if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME,
+	    sizeof (XATTR_RESOURCEFORK_NAME)) != 0) {
+		error = ENOATTR;
+		goto out;
+	}
+
+	/* ### MISING CODE ### */
+	/*
+	 * It turns out that even though APPLE uses makenamedstream() to
+	 * create a stream, for example compression, they use vnop_removexattr
+	 * to delete it, so this appears not in use.
+	 */
+	dprintf("zfs_vnop_removenamedstream\n");
+	error = EPERM;
+out:
+	ZFS_EXIT(zfsvfs);
+	return (ENOTSUP);
+}
+#endif /* HAVE_NAMED_STREAMS */
+
+/*
+ * The Darwin kernel's HFS+ appears to implement this by two methods,
+ *
+ * if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) is set
+ *	** Copy the data of the files over (including rsrc)
+ *
+ * if not set
+ *	** exchange FileID between the two nodes, copy over vnode information
+ *	   like that of *time records, uid/gid, flags, mode, linkcount,
+ *	   finderinfo, c_desc, c_attr, c_flag, and cache_purge().
+ *
+ * This call is deprecated in 10.8
+ */
+int
+zfs_vnop_exchange(struct vnop_exchange_args *ap)
+#if 0
+	struct vnop_exchange_args {
+		struct vnode	*a_fvp;
+		struct vnode	*a_tvp;
+		int		a_options;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	vnode_t *fvp = ap->a_fvp;
+	vnode_t *tvp = ap->a_tvp;
+	znode_t  *fzp;
+	zfsvfs_t  *zfsvfs;
+
+	/* The files must be on the same volume. */
+	if (vnode_mount(fvp) != vnode_mount(tvp)) {
+		dprintf("%s fvp and tvp not in same mountpoint\n",
+		    __func__);
+		return (EXDEV);
+	}
+
+	if (fvp == tvp) {
+		dprintf("%s fvp == tvp\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Only normal files can be exchanged. */
+	if (!vnode_isreg(fvp) || !vnode_isreg(tvp)) {
+		dprintf("%s fvp or tvp is not a regular file\n",
+		    __func__);
+		return (EINVAL);
+	}
+
+	fzp = VTOZ(fvp);
+	zfsvfs = fzp->z_zfsvfs;
+
+	ZFS_ENTER(zfsvfs);
+
+	/* ADD MISSING CODE HERE */
+
+	ZFS_EXIT(zfsvfs);
+	printf("vnop_exchange: ENOTSUP\n");
+	return (ENOTSUP);
+}
+
+int
+zfs_vnop_revoke(struct vnop_revoke_args *ap)
+#if 0
+	struct vnop_revoke_args {
+		struct vnode	*a_vp;
+		int		a_flags;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	return (vn_revoke(ap->a_vp, ap->a_flags, ap->a_context));
+}
+
+int
+zfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+#if 0
+	struct vnop_blktooff_args {
+		struct vnode	*a_vp;
+		daddr64_t	a_lblkno;
+		off_t		*a_offset;
+	};
+#endif
+{
+	dprintf("vnop_blktooff: 0\n");
+	return (ENOTSUP);
+}
+
+int
+zfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+#if 0
+	struct vnop_offtoblk_args {
+		struct vnode	*a_vp;
+		off_t		a_offset;
+		daddr64_t	*a_lblkno;
+	};
+#endif
+{
+	dprintf("+vnop_offtoblk\n");
+	return (ENOTSUP);
+}
+
+int
+zfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+#if 0
+	struct vnop_blockmap_args {
+		struct vnode	*a_vp;
+		off_t		a_foffset;
+		size_t		a_size;
+		daddr64_t	*a_bpn;
+		size_t		*a_run;
+		void		*a_poff;
+		int		a_flags;
+};
+#endif
+{
+	dprintf("+vnop_blockmap\n");
+	return (ENOTSUP);
+
+#if 0
+	znode_t *zp;
+	zfsvfs_t *zfsvfs;
+
+	ASSERT(ap);
+	ASSERT(ap->a_vp);
+	ASSERT(ap->a_size);
+
+	if (!ap->a_bpn) {
+		return (0);
+	}
+
+	if (vnode_isdir(ap->a_vp)) {
+		return (ENOTSUP);
+	}
+
+	zp = VTOZ(ap->a_vp);
+	if (!zp)
+		return (ENODEV);
+
+	zfsvfs = zp->z_zfsvfs;
+	if (!zfsvfs)
+		return (ENODEV);
+
+	/* Return full request size as contiguous */
+	if (ap->a_run) {
+		// *ap->a_run = ap->a_size;
+		*ap->a_run = 0;
+	}
+	if (ap->a_poff) {
+		*((int *)(ap->a_poff)) = 0;
+		/*
+		 * returning offset of -1 asks the
+		 * caller to zero the ranges
+		 */
+		// *((int *)(ap->a_poff)) = -1;
+	}
+	*ap->a_bpn = 0;
+//	*ap->a_bpn = (daddr64_t)(ap->a_foffset / zfsvfs->z_max_blksz);
+
+	dprintf("%s ret %lu %d %llu\n", __func__,
+	    ap->a_size, *((int *)(ap->a_poff)), *((uint64_t *)(ap->a_bpn)));
+
+	return (0);
+#endif
+}
+
+int
+zfs_vnop_strategy(struct vnop_strategy_args *ap)
+#if 0
+	struct vnop_strategy_args {
+		struct buf	*a_bp;
+	};
+#endif
+{
+	dprintf("vnop_strategy: 0\n");
+	return (ENOTSUP);
+}
+
+int
+zfs_vnop_select(struct vnop_select_args *ap)
+#if 0
+	struct vnop_select_args {
+		struct vnode	*a_vp;
+		int		a_which;
+		int		a_fflags;
+		kauth_cred_t	a_cred;
+		void		*a_wql;
+		struct proc	*a_p;
+	};
+#endif
+{
+	dprintf("vnop_select: 1\n");
+	return (1);
+}
+
+#ifdef WITH_READDIRATTR
+int
+zfs_vnop_readdirattr(struct vnop_readdirattr_args *ap)
+#if 0
+	struct vnop_readdirattr_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	*a_vp;
+		struct attrlist	*a_alist;
+		struct uio	*a_uio;
+		ulong_t		a_maxcount;
+		ulong_t		a_options;
+		ulong_t		*a_newstate;
+		int		*a_eofflag;
+		ulong_t		*a_actualcount;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	struct vnode *vp = ap->a_vp;
+	struct attrlist *alp = ap->a_alist;
+	struct uio *uio = ap->a_uio;
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zap_cursor_t zc;
+	zap_attribute_t zap;
+	attrinfo_t attrinfo;
+	int maxcount = ap->a_maxcount;
+	uint64_t offset = (uint64_t)uio_offset(uio);
+	u_int32_t fixedsize;
+	u_int32_t maxsize;
+	u_int32_t attrbufsize;
+	void *attrbufptr = NULL;
+	void *attrptr;
+	void *varptr;  /* variable-length storage area */
+	boolean_t user64 = vfs_context_is64bit(ap->a_context);
+	int prefetch = 0;
+	int error = 0;
+
+#if 0
+	dprintf("+vnop_readdirattr\n");
+#endif
+
+	*(ap->a_actualcount) = 0;
+	*(ap->a_eofflag) = 0;
+
+	/*
+	 * Check for invalid options or invalid uio.
+	 */
+	if (((ap->a_options & ~(FSOPT_NOINMEMUPDATE | FSOPT_NOFOLLOW)) != 0) ||
+		(uio_resid(uio) <= 0) || (maxcount <= 0)) {
+		dprintf("%s invalid argument\n");
+		return (EINVAL);
+	}
+	/*
+	 * Reject requests for unsupported attributes.
+	 */
+	if ((alp->bitmapcount != ZFS_ATTR_BIT_MAP_COUNT) ||
+	    (alp->commonattr & ~ZFS_ATTR_CMN_VALID) ||
+	    (alp->dirattr & ~ZFS_ATTR_DIR_VALID) ||
+	    (alp->fileattr & ~ZFS_ATTR_FILE_VALID) ||
+	    (alp->volattr != 0 || alp->forkattr != 0)) {
+		dprintf("%s unsupported attr\n");
+		return (EINVAL);
+	}
+	/*
+	 * Check if we should prefetch znodes
+	 */
+	if ((alp->commonattr & ~ZFS_DIR_ENT_ATTRS) ||
+		(alp->dirattr != 0) || (alp->fileattr != 0)) {
+		prefetch = TRUE;
+	}
+
+	/*
+	 * Setup a buffer to hold the packed attributes.
+	 */
+	fixedsize = sizeof (u_int32_t) + getpackedsize(alp, user64);
+	maxsize = fixedsize;
+	if (alp->commonattr & ATTR_CMN_NAME)
+		maxsize += ZAP_MAXNAMELEN + 1;
+	attrbufptr = (void*)kmem_alloc(maxsize, KM_SLEEP);
+	if (attrbufptr == NULL) {
+		dprintf("%s kmem_alloc failed\n");
+		return (ENOMEM);
+	}
+	attrptr = attrbufptr;
+	varptr = (char *)attrbufptr + fixedsize;
+
+	attrinfo.ai_attrlist = alp;
+	attrinfo.ai_varbufend = (char *)attrbufptr + maxsize;
+	attrinfo.ai_context = ap->a_context;
+
+	ZFS_ENTER(zfsvfs);
+
+	/*
+	 * Initialize the zap iterator cursor.
+	 */
+
+	if (offset <= 3) {
+		/*
+		 * Start iteration from the beginning of the directory.
+		 */
+		zap_cursor_init(&zc, zfsvfs->z_os, zp->z_id);
+	} else {
+		/*
+		 * The offset is a serialized cursor.
+		 */
+		zap_cursor_init_serialized(&zc, zfsvfs->z_os, zp->z_id, offset);
+	}
+
+	while (1) {
+		ino64_t objnum;
+		enum vtype vtype = VNON;
+		znode_t *tmp_zp = NULL;
+
+		/*
+		 * Note that the low 4 bits of the cookie returned by zap is
+		 * always zero. This allows us to use the low nibble for
+		 * "special" entries:
+		 * We use 0 for '.', and 1 for '..' (ignored here).
+		 * If this is the root of the filesystem, we use the offset 2
+		 * for the *'.zfs' directory.
+		 */
+		if (offset <= 1) {
+			offset = 2;
+			continue;
+		} else if (offset == 2 && zfs_show_ctldir(zp)) {
+			(void) strlcpy(zap.za_name, ZFS_CTLDIR_NAME,
+			    MAXNAMELEN);
+			objnum = ZFSCTL_INO_ROOT;
+			vtype = VDIR;
+		} else {
+			/*
+			 * Grab next entry.
+			 */
+			if ((error = zap_cursor_retrieve(&zc, &zap))) {
+				*(ap->a_eofflag) = (error == ENOENT);
+				goto update;
+			}
+
+			if (zap.za_integer_length != 8 ||
+				zap.za_num_integers != 1) {
+				error = ENXIO;
+				goto update;
+			}
+
+			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
+			vtype = DTTOVT(ZFS_DIRENT_TYPE(zap.za_first_integer));
+			/* Check if vtype is MIA */
+			if ((vtype == 0) && !prefetch && (alp->dirattr ||
+			    alp->fileattr ||
+			    (alp->commonattr & ATTR_CMN_OBJTYPE))) {
+				prefetch = 1;
+			}
+		}
+
+		/* Grab znode if required */
+		if (prefetch) {
+			dmu_prefetch(zfsvfs->z_os, objnum, 0, 0);
+			if ((error = zfs_zget(zfsvfs, objnum, &tmp_zp)) == 0) {
+				if (vtype == VNON) {
+					/* SA_LOOKUP? */
+					vtype = IFTOVT(tmp_zp->z_mode);
+				}
+			} else {
+				tmp_zp = NULL;
+				error = ENXIO;
+				goto skip_entry;
+				/*
+				 * Currently ".zfs" entry is skipped, as we have
+				 * no methods to pack that into the attrs (all
+				 * helper functions take znode_t *, and .zfs is
+				 * not a znode_t *). Add dummy .zfs code here if
+				 * it is desirable to show .zfs in Finder.
+				 */
+			}
+		}
+
+		/*
+		 * Setup for the next item's attribute list
+		 */
+		*((u_int32_t *)attrptr) = 0; /* byte count slot */
+		attrptr = ((u_int32_t *)attrptr) + 1; /* fixed attr start */
+		attrinfo.ai_attrbufpp = &attrptr;
+		attrinfo.ai_varbufpp = &varptr;
+
+		/*
+		 * Pack entries into attribute buffer.
+		 */
+		if (alp->commonattr) {
+			commonattrpack(&attrinfo, zfsvfs, tmp_zp, zap.za_name,
+			    objnum, vtype, user64);
+		}
+		if (alp->dirattr && vtype == VDIR) {
+			dirattrpack(&attrinfo, tmp_zp);
+		}
+		if (alp->fileattr && vtype != VDIR) {
+			fileattrpack(&attrinfo, zfsvfs, tmp_zp);
+		}
+		/* All done with tmp znode. */
+		if (prefetch && tmp_zp) {
+			vnode_put(ZTOV(tmp_zp));
+			tmp_zp = NULL;
+		}
+		attrbufsize = ((char *)varptr - (char *)attrbufptr);
+
+		/*
+		 * Make sure there's enough buffer space remaining.
+		 */
+		if (uio_resid(uio) < 0 ||
+			attrbufsize > (u_int32_t)uio_resid(uio)) {
+			break;
+		} else {
+			*((u_int32_t *)attrbufptr) = attrbufsize;
+			error = uiomove((caddr_t)attrbufptr, attrbufsize,
+			    UIO_READ, uio);
+			if (error != 0)
+				break;
+			attrptr = attrbufptr;
+			/* Point to variable-length storage */
+			varptr = (char *)attrbufptr + fixedsize;
+			*(ap->a_actualcount) += 1;
+
+			/*
+			 * Move to the next entry, fill in the previous offset.
+			 */
+		skip_entry:
+			if ((offset > 2) || ((offset == 2) &&
+			    !zfs_show_ctldir(zp))) {
+				zap_cursor_advance(&zc);
+				offset = zap_cursor_serialize(&zc);
+			} else {
+				offset += 1;
+			}
+
+			/* Termination checks */
+			if (--maxcount <= 0 || uio_resid(uio) < 0 ||
+			    (u_int32_t)uio_resid(uio) < (fixedsize +
+			    ZAP_AVENAMELEN)) {
+				break;
+			}
+		}
+	}
+update:
+	zap_cursor_fini(&zc);
+
+	if (attrbufptr) {
+		kmem_free(attrbufptr, maxsize);
+	}
+	if (error == ENOENT) {
+		error = 0;
+	}
+	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
+	/* XXX newstate TBD */
+	*ap->a_newstate = zp->z_atime[0] + zp->z_atime[1];
+	uio_setoffset(uio, offset);
+
+	ZFS_EXIT(zfsvfs);
+	dprintf("-readdirattr: error %d\n", error);
+	return (error);
+}
+#endif
+
+
+#ifdef WITH_SEARCHFS
+int
+zfs_vnop_searchfs(struct vnop_searchfs_args *ap)
+#if 0
+	struct vnop_searchfs_args {
+		struct vnodeop_desc *a_desc;
+		struct vnode	*a_vp;
+		void		*a_searchparams1;
+		void		*a_searchparams2;
+		struct attrlist	*a_searchattrs;
+		ulong_t		a_maxmatches;
+		struct timeval	*a_timelimit;
+		struct attrlist	*a_returnattrs;
+		ulong_t		*a_nummatches;
+		ulong_t		a_scriptcode;
+		ulong_t		a_options;
+		struct uio	*a_uio;
+		struct searchstate *a_searchstate;
+		vfs_context_t	a_context;
+	};
+#endif
+{
+	printf("vnop_searchfs called, type %d\n", vnode_vtype(ap->a_vp));
+
+	*(ap->a_nummatches) = 0;
+
+	return (ENOTSUP);
+}
+#endif
+
+
+
+/*
+ * Predeclare these here so that the compiler assumes that this is an "old
+ * style" function declaration that does not include arguments so that we won't
+ * get type mismatch errors in the initializations that follow.
+ */
+static int zfs_inval(void);
+static int zfs_isdir(void);
+
+static int
+zfs_inval()
+{
+	dprintf("ZFS: Bad vnop: returning EINVAL\n");
+	return (EINVAL);
+}
+
+static int
+zfs_isdir()
+{
+	dprintf("ZFS: Bad vnop: returning EISDIR\n");
+	return (EISDIR);
+}
+
+
+#define	VOPFUNC int (*)(void *)
+
+/* Directory vnode operations template */
+int (**zfs_dvnodeops) (void *);
+struct vnodeopv_entry_desc zfs_dvnodeops_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_lookup_desc,	(VOPFUNC)zfs_vnop_lookup},
+	{&vnop_create_desc,	(VOPFUNC)zfs_vnop_create},
+	{&vnop_whiteout_desc,	(VOPFUNC)zfs_vnop_whiteout},
+	{&vnop_mknod_desc,	(VOPFUNC)zfs_vnop_mknod},
+	{&vnop_open_desc,	(VOPFUNC)zfs_vnop_open},
+	{&vnop_close_desc,	(VOPFUNC)zfs_vnop_close},
+	{&vnop_access_desc,	(VOPFUNC)zfs_vnop_access},
+	{&vnop_getattr_desc,	(VOPFUNC)zfs_vnop_getattr},
+	{&vnop_setattr_desc,	(VOPFUNC)zfs_vnop_setattr},
+	{&vnop_read_desc,	(VOPFUNC)zfs_isdir},
+	{&vnop_write_desc,	(VOPFUNC)zfs_isdir},
+	{&vnop_ioctl_desc,	(VOPFUNC)zfs_vnop_ioctl},
+	{&vnop_select_desc,	(VOPFUNC)zfs_isdir},
+	{&vnop_bwrite_desc, (VOPFUNC)zfs_isdir},
+	{&vnop_fsync_desc,	(VOPFUNC)zfs_vnop_fsync},
+	{&vnop_remove_desc,	(VOPFUNC)zfs_vnop_remove},
+	{&vnop_link_desc,	(VOPFUNC)zfs_vnop_link},
+	{&vnop_rename_desc,	(VOPFUNC)zfs_vnop_rename},
+#if defined(MAC_OS_X_VERSION_10_12) &&        \
+	(MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12)
+	{&vnop_renamex_desc,	(VOPFUNC)zfs_vnop_renamex},
+#endif
+	{&vnop_mkdir_desc,	(VOPFUNC)zfs_vnop_mkdir},
+	{&vnop_rmdir_desc,	(VOPFUNC)zfs_vnop_rmdir},
+	{&vnop_symlink_desc,	(VOPFUNC)zfs_vnop_symlink},
+	{&vnop_readdir_desc,	(VOPFUNC)zfs_vnop_readdir},
+	{&vnop_inactive_desc,	(VOPFUNC)zfs_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfs_vnop_reclaim},
+	{&vnop_pathconf_desc,	(VOPFUNC)zfs_vnop_pathconf},
+	{&vnop_revoke_desc,	(VOPFUNC)zfs_vnop_revoke},
+	{&vnop_getxattr_desc,	(VOPFUNC)zfs_vnop_getxattr},
+	{&vnop_setxattr_desc,	(VOPFUNC)zfs_vnop_setxattr},
+	{&vnop_removexattr_desc, (VOPFUNC)zfs_vnop_removexattr},
+	{&vnop_listxattr_desc,	(VOPFUNC)zfs_vnop_listxattr},
+#ifdef WITH_READDIRATTR
+	{&vnop_readdirattr_desc, (VOPFUNC)zfs_vnop_readdirattr},
+#endif
+#ifdef WITH_SEARCHFS
+	{&vnop_searchfs_desc,	(VOPFUNC)zfs_vnop_searchfs},
+#endif
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_dvnodeop_opv_desc =
+{ &zfs_dvnodeops, zfs_dvnodeops_template };
+
+/* Regular file vnode operations template */
+int (**zfs_fvnodeops) (void *);
+struct vnodeopv_entry_desc zfs_fvnodeops_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_whiteout_desc,	(VOPFUNC)zfs_vnop_whiteout},
+	{&vnop_open_desc,	(VOPFUNC)zfs_vnop_open},
+	{&vnop_close_desc,	(VOPFUNC)zfs_vnop_close},
+	{&vnop_access_desc,	(VOPFUNC)zfs_vnop_access},
+	{&vnop_getattr_desc,	(VOPFUNC)zfs_vnop_getattr},
+	{&vnop_setattr_desc,	(VOPFUNC)zfs_vnop_setattr},
+	{&vnop_read_desc,	(VOPFUNC)zfs_vnop_read},
+	{&vnop_write_desc,	(VOPFUNC)zfs_vnop_write},
+	{&vnop_ioctl_desc,	(VOPFUNC)zfs_vnop_ioctl},
+	{&vnop_select_desc,	(VOPFUNC)zfs_vnop_select},
+	{&vnop_fsync_desc,	(VOPFUNC)zfs_vnop_fsync},
+	{&vnop_inactive_desc,	(VOPFUNC)zfs_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfs_vnop_reclaim},
+	{&vnop_pathconf_desc,	(VOPFUNC)zfs_vnop_pathconf},
+	{&vnop_bwrite_desc, (VOPFUNC)zfs_inval},
+	{&vnop_pagein_desc,	(VOPFUNC)zfs_vnop_pagein},
+#if	HAVE_PAGEOUT_V2
+	{&vnop_pageout_desc,	(VOPFUNC)zfs_vnop_pageoutv2},
+#else
+	{&vnop_pageout_desc,	(VOPFUNC)zfs_vnop_pageout},
+#endif
+	{&vnop_mmap_desc,	(VOPFUNC)zfs_vnop_mmap},
+	{&vnop_mnomap_desc,	(VOPFUNC)zfs_vnop_mnomap},
+	{&vnop_blktooff_desc,	(VOPFUNC)zfs_vnop_blktooff},
+	{&vnop_offtoblk_desc,	(VOPFUNC)zfs_vnop_offtoblk},
+	{&vnop_blockmap_desc,	(VOPFUNC)zfs_vnop_blockmap},
+	{&vnop_strategy_desc,	(VOPFUNC)zfs_vnop_strategy},
+	{&vnop_allocate_desc,   (VOPFUNC)zfs_vnop_allocate},
+	{&vnop_revoke_desc,	(VOPFUNC)zfs_vnop_revoke},
+	{&vnop_exchange_desc,	(VOPFUNC)zfs_vnop_exchange},
+	{&vnop_getxattr_desc,	(VOPFUNC)zfs_vnop_getxattr},
+	{&vnop_setxattr_desc,	(VOPFUNC)zfs_vnop_setxattr},
+	{&vnop_removexattr_desc, (VOPFUNC)zfs_vnop_removexattr},
+	{&vnop_listxattr_desc,	(VOPFUNC)zfs_vnop_listxattr},
+#ifdef HAVE_NAMED_STREAMS
+	{&vnop_getnamedstream_desc,	(VOPFUNC)zfs_vnop_getnamedstream},
+	{&vnop_makenamedstream_desc,	(VOPFUNC)zfs_vnop_makenamedstream},
+	{&vnop_removenamedstream_desc,	(VOPFUNC)zfs_vnop_removenamedstream},
+#endif
+#ifdef WITH_SEARCHFS
+	{&vnop_searchfs_desc,	(VOPFUNC)zfs_vnop_searchfs},
+#endif
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_fvnodeop_opv_desc =
+{ &zfs_fvnodeops, zfs_fvnodeops_template };
+
+/* Symbolic link vnode operations template */
+int (**zfs_symvnodeops) (void *);
+struct vnodeopv_entry_desc zfs_symvnodeops_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_open_desc,	(VOPFUNC)zfs_vnop_open},
+	{&vnop_close_desc,	(VOPFUNC)zfs_vnop_close},
+	{&vnop_access_desc,	(VOPFUNC)zfs_vnop_access},
+	{&vnop_getattr_desc,	(VOPFUNC)zfs_vnop_getattr},
+	{&vnop_setattr_desc,	(VOPFUNC)zfs_vnop_setattr},
+	{&vnop_ioctl_desc,	(VOPFUNC)zfs_vnop_ioctl},
+	{&vnop_readlink_desc,	(VOPFUNC)zfs_vnop_readlink},
+	{&vnop_inactive_desc,	(VOPFUNC)zfs_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfs_vnop_reclaim},
+	{&vnop_pathconf_desc,	(VOPFUNC)zfs_vnop_pathconf},
+	{&vnop_revoke_desc,	(VOPFUNC)zfs_vnop_revoke},
+	{&vnop_getxattr_desc,	(VOPFUNC)zfs_vnop_getxattr},
+	{&vnop_setxattr_desc,	(VOPFUNC)zfs_vnop_setxattr},
+	{&vnop_removexattr_desc, (VOPFUNC)zfs_vnop_removexattr},
+	{&vnop_listxattr_desc,	(VOPFUNC)zfs_vnop_listxattr},
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_symvnodeop_opv_desc =
+{ &zfs_symvnodeops, zfs_symvnodeops_template };
+
+/* Extended attribtue directory vnode operations template */
+int (**zfs_xdvnodeops) (void *);
+struct vnodeopv_entry_desc zfs_xdvnodeops_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_lookup_desc,	(VOPFUNC)zfs_vnop_lookup},
+	{&vnop_create_desc,	(VOPFUNC)zfs_vnop_create},
+	{&vnop_whiteout_desc,	(VOPFUNC)zfs_vnop_whiteout},
+	{&vnop_mknod_desc,	(VOPFUNC)zfs_inval},
+	{&vnop_open_desc,	(VOPFUNC)zfs_vnop_open},
+	{&vnop_close_desc,	(VOPFUNC)zfs_vnop_close},
+	{&vnop_access_desc,	(VOPFUNC)zfs_vnop_access},
+	{&vnop_getattr_desc,	(VOPFUNC)zfs_vnop_getattr},
+	{&vnop_setattr_desc,	(VOPFUNC)zfs_vnop_setattr},
+	{&vnop_read_desc,	(VOPFUNC)zfs_vnop_read},
+	{&vnop_write_desc,	(VOPFUNC)zfs_vnop_write},
+	{&vnop_ioctl_desc,	(VOPFUNC)zfs_vnop_ioctl},
+	{&vnop_select_desc,	(VOPFUNC)zfs_vnop_select},
+	{&vnop_fsync_desc,	(VOPFUNC)zfs_vnop_fsync},
+	{&vnop_remove_desc,	(VOPFUNC)zfs_vnop_remove},
+	{&vnop_link_desc,	(VOPFUNC)zfs_vnop_link},
+	{&vnop_rename_desc,	(VOPFUNC)zfs_vnop_rename},
+	{&vnop_mkdir_desc,	(VOPFUNC)zfs_inval},
+	{&vnop_rmdir_desc,	(VOPFUNC)zfs_vnop_rmdir},
+	{&vnop_symlink_desc,	(VOPFUNC)zfs_inval},
+	{&vnop_readdir_desc,	(VOPFUNC)zfs_vnop_readdir},
+	{&vnop_inactive_desc,	(VOPFUNC)zfs_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfs_vnop_reclaim},
+	{&vnop_pathconf_desc,	(VOPFUNC)zfs_vnop_pathconf},
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_xdvnodeop_opv_desc =
+{ &zfs_xdvnodeops, zfs_xdvnodeops_template };
+
+/* Error vnode operations template */
+int (**zfs_evnodeops) (void *);
+struct vnodeopv_entry_desc zfs_evnodeops_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_inactive_desc,	(VOPFUNC)zfs_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfs_vnop_reclaim},
+	{&vnop_pathconf_desc,	(VOPFUNC)zfs_vnop_pathconf},
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_evnodeop_opv_desc =
+{ &zfs_evnodeops, zfs_evnodeops_template };
+
+int (**zfs_fifonodeops)(void *);
+struct vnodeopv_entry_desc zfs_fifonodeops_template[] = {
+	{ &vnop_default_desc, (VOPFUNC)vn_default_error },
+	{ &vnop_lookup_desc, (VOPFUNC)fifo_lookup },
+	{ &vnop_create_desc, (VOPFUNC)fifo_create },
+	{ &vnop_mknod_desc, (VOPFUNC)fifo_mknod },
+	{ &vnop_open_desc, (VOPFUNC)fifo_open },
+	{ &vnop_close_desc, (VOPFUNC)fifo_close },
+	{ &vnop_getattr_desc, (VOPFUNC)zfs_vnop_getattr },
+	{ &vnop_setattr_desc, (VOPFUNC)zfs_vnop_setattr },
+	{ &vnop_read_desc, (VOPFUNC)fifo_read },
+	{ &vnop_write_desc, (VOPFUNC)fifo_write },
+	{ &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl },
+	{ &vnop_select_desc, (VOPFUNC)fifo_select },
+	{ &vnop_revoke_desc, (VOPFUNC)fifo_revoke },
+	{ &vnop_mmap_desc, (VOPFUNC)fifo_mmap },
+	{ &vnop_fsync_desc, (VOPFUNC)zfs_vnop_fsync },
+	{ &vnop_remove_desc, (VOPFUNC)fifo_remove },
+	{ &vnop_link_desc, (VOPFUNC)fifo_link },
+	{ &vnop_rename_desc, (VOPFUNC)fifo_rename },
+	{ &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir },
+	{ &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir },
+	{ &vnop_symlink_desc, (VOPFUNC)fifo_symlink },
+	{ &vnop_readdir_desc, (VOPFUNC)fifo_readdir },
+	{ &vnop_readlink_desc, (VOPFUNC)fifo_readlink },
+	{ &vnop_inactive_desc, (VOPFUNC)zfs_vnop_inactive },
+	{ &vnop_reclaim_desc, (VOPFUNC)zfs_vnop_reclaim },
+	{ &vnop_strategy_desc, (VOPFUNC)fifo_strategy },
+	{ &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf },
+	{ &vnop_advlock_desc, (VOPFUNC)err_advlock },
+	{ &vnop_bwrite_desc, (VOPFUNC)zfs_inval },
+	{ &vnop_pagein_desc, (VOPFUNC)zfs_vnop_pagein },
+#if	HAVE_PAGEOUT_V2
+	{ &vnop_pageout_desc, (VOPFUNC)zfs_vnop_pageoutv2 },
+#else
+	{ &vnop_pageout_desc, (VOPFUNC)zfs_vnop_pageout },
+#endif
+	{ &vnop_copyfile_desc, (VOPFUNC)err_copyfile },
+	{ &vnop_blktooff_desc, (VOPFUNC)zfs_vnop_blktooff },
+	{ &vnop_offtoblk_desc, (VOPFUNC)zfs_vnop_offtoblk },
+	{ &vnop_blockmap_desc, (VOPFUNC)zfs_vnop_blockmap },
+	{ &vnop_getxattr_desc, (VOPFUNC)zfs_vnop_getxattr},
+	{ &vnop_setxattr_desc, (VOPFUNC)zfs_vnop_setxattr},
+	{ &vnop_removexattr_desc, (VOPFUNC)zfs_vnop_removexattr},
+	{ &vnop_listxattr_desc, (VOPFUNC)zfs_vnop_listxattr},
+	{ (struct vnodeop_desc *)NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_fifonodeop_opv_desc =
+	{ &zfs_fifonodeops, zfs_fifonodeops_template };
+
+
+/*
+ * .zfs/snapdir vnops
+ */
+int (**zfs_ctldirops) (void *);
+struct vnodeopv_entry_desc zfs_ctldir_template[] = {
+	{&vnop_default_desc, 	(VOPFUNC)vn_default_error },
+	{&vnop_lookup_desc,		(VOPFUNC)zfsctl_vnop_lookup},
+	{&vnop_getattr_desc,	(VOPFUNC)zfsctl_vnop_getattr},
+	{&vnop_readdir_desc,	(VOPFUNC)zfsctl_vnop_readdir},
+	{&vnop_mkdir_desc,		(VOPFUNC)zfsctl_vnop_mkdir},
+	{&vnop_rmdir_desc,		(VOPFUNC)zfsctl_vnop_rmdir},
+	/* We also need to define these for the top ones to work */
+	{&vnop_open_desc,		(VOPFUNC)zfsctl_vnop_open},
+	{&vnop_close_desc,		(VOPFUNC)zfsctl_vnop_close},
+	{&vnop_access_desc,		(VOPFUNC)zfsctl_vnop_access},
+	{&vnop_inactive_desc,	(VOPFUNC)zfsctl_vnop_inactive},
+	{&vnop_reclaim_desc,	(VOPFUNC)zfsctl_vnop_reclaim},
+	{&vnop_revoke_desc,		(VOPFUNC)err_revoke},
+	{&vnop_fsync_desc,		(VOPFUNC)nop_fsync},
+	{NULL, (VOPFUNC)NULL }
+};
+struct vnodeopv_desc zfs_ctldir_opv_desc =
+{ &zfs_ctldirops, zfs_ctldir_template };
+
+/*
+ * Get new vnode for znode.
+ *
+ * This function uses zp->z_zfsvfs, zp->z_mode, zp->z_flags, zp->z_id and sets
+ * zp->z_vnode and zp->z_vid.
+ */
+int
+zfs_znode_getvnode(znode_t *zp, zfsvfs_t *zfsvfs)
+{
+	struct vnode_fsparam vfsp;
+	struct vnode *vp = NULL;
+
+	dprintf("getvnode zp %p with vp %p zfsvfs %p vfs %p\n", zp, vp,
+	    zfsvfs, zfsvfs->z_vfs);
+
+	if (zp->z_vnode)
+		panic("zp %p vnode already set\n", zp->z_vnode);
+
+	bzero(&vfsp, sizeof (vfsp));
+	vfsp.vnfs_str = "zfs";
+	vfsp.vnfs_mp = zfsvfs->z_vfs;
+	vfsp.vnfs_vtype = IFTOVT((mode_t)zp->z_mode);
+	vfsp.vnfs_fsnode = zp;
+	vfsp.vnfs_flags = VNFS_ADDFSREF;
+
+	/* Tag root directory */
+	if (zp->z_id == zfsvfs->z_root) {
+		vfsp.vnfs_markroot = 1;
+	}
+
+	switch (vfsp.vnfs_vtype) {
+	case VDIR:
+		if (zp->z_pflags & ZFS_XATTR) {
+			vfsp.vnfs_vops = zfs_xdvnodeops;
+		} else {
+			vfsp.vnfs_vops = zfs_dvnodeops;
+		}
+		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
+		break;
+	case VBLK:
+	case VCHR:
+		{
+			uint64_t rdev;
+			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
+			    &rdev, sizeof (rdev)) == 0);
+
+			vfsp.vnfs_rdev = zfs_cmpldev(rdev);
+		}
+		/* FALLTHROUGH */
+	case VSOCK:
+		vfsp.vnfs_vops = zfs_fvnodeops;
+		break;
+	case VFIFO:
+		vfsp.vnfs_vops = zfs_fifonodeops;
+		break;
+	case VREG:
+		vfsp.vnfs_vops = zfs_fvnodeops;
+		vfsp.vnfs_filesize = zp->z_size;
+		break;
+	case VLNK:
+		vfsp.vnfs_vops = zfs_symvnodeops;
+#if 0
+		vfsp.vnfs_filesize = ???;
+#endif
+		break;
+	default:
+		vfsp.vnfs_vops = zfs_fvnodeops;
+		printf("ZFS: Warning, error-vnops selected: vtype %d\n",
+		    vfsp.vnfs_vtype);
+		break;
+	}
+
+	/*
+	 * vnode_create() has a habit of calling both vnop_reclaim() and
+	 * vnop_fsync(), which can create havok as we are already holding locks.
+	 */
+
+	while (vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp) != 0) {
+		kpreempt(KPREEMPT_SYNC);
+	}
+	atomic_inc_64(&vnop_num_vnodes);
+
+	printf("Assigned zp %p with vp %p zfsvfs %p\n", zp, vp, zp->z_zfsvfs);
+
+	/*
+	 * Unfortunately, when it comes to IOCTL_GET_BOOT_INFO and getting
+	 * the volume finderinfo, XNU checks the tags, and only acts on
+	 * HFS. So we have to set it to HFS on the root. It is pretty gross
+	 * but until XNU adds supporting code..
+	 * The only place we use tags in ZFS is ctldir checking for VT_OTHER
+	 */
+	if (zp->z_id == zfsvfs->z_root)
+		vnode_settag(vp, VT_HFS);
+	else
+		vnode_settag(vp, VT_ZFS);
+
+	zp->z_vid = vnode_vid(vp);
+	zp->z_vnode = vp;
+
+	/*
+	 * OS X Finder is hardlink agnostic, so we need to mark vp's that
+	 * are hardlinks, so that it forces a lookup each time, ignoring
+	 * the name cache.
+	 */
+	if ((zp->z_links > 1) && (IFTOVT((mode_t)zp->z_mode) == VREG))
+		vnode_setmultipath(vp);
+
+	return (0);
+}
+
+
+/*
+ * Called by taskq, to call zfs_znode_getvnode( vnode_create( - and
+ * attach vnode to znode.
+ */
+void
+zfs_znode_asyncgetvnode_impl(void *arg)
+{
+	znode_t *zp = (znode_t *)arg;
+	VERIFY3P(zp, !=, NULL);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	VERIFY3P(zfsvfs, !=, NULL);
+
+	// Attach vnode, done as different thread
+	zfs_znode_getvnode(zp, zfsvfs);
+
+	// Wake up anyone blocked on us
+	mutex_enter(&zp->z_attach_lock);
+	taskq_init_ent(&zp->z_attach_taskq);
+	cv_broadcast(&zp->z_attach_cv);
+	mutex_exit(&zp->z_attach_lock);
+
+}
+
+
+/*
+ * If the znode's vnode is not yet attached (zp->z_vnode == NULL)
+ * we call taskq_wait to wait for it to complete.
+ * We guarantee znode has a vnode at the return of function only
+ * when return is "0". On failure to wait, it returns -1, and caller
+ * may consider waiting by other means.
+ */
+int
+zfs_znode_asyncwait(znode_t *zp)
+{
+	int ret = -1;
+	zfsvfs_t *zfsvfs;
+
+	if (zp == NULL)
+		return (ret);
+
+	zfsvfs = zp->z_zfsvfs;
+	if (zfsvfs == NULL)
+		return (ret);
+
+	ZFS_ENTER_IFERROR(zfsvfs)
+		goto out;
+
+	if (zfsvfs->z_os == NULL)
+		goto out;
+
+	// Work out if we need to block, that is, we have
+	// no vnode AND a taskq was launched. Unsure if we should
+	// look inside taskqent node like this.
+	mutex_enter(&zp->z_attach_lock);
+	if (zp->z_vnode == NULL &&
+	    zp->z_attach_taskq.tqent_func != NULL) {
+		// We need to block and wait for taskq to finish.
+		cv_wait(&zp->z_attach_cv, &zp->z_attach_lock);
+		ret = 0;
+	}
+	mutex_exit(&zp->z_attach_lock);
+
+out:
+	ZFS_EXIT(zfsvfs);
+	return (ret);
+}
+
+/*
+ * Called in place of VN_RELE() for the places that uses ZGET_FLAG_ASYNC.
+ */
+void
+zfs_znode_asyncput_impl(znode_t *zp)
+{
+	// Make sure the other thread finished zfs_znode_getvnode();
+	// This may block, if waiting is required.
+	zfs_znode_asyncwait(zp);
+
+	// Safe to release now that it is attached.
+	VN_RELE(ZTOV(zp));
+}
+
+/*
+ * Called in place of VN_RELE() for the places that uses ZGET_FLAG_ASYNC,
+ * where we also taskq it - as we come from reclaim.
+ */
+void
+zfs_znode_asyncput(znode_t *zp)
+{
+	dsl_pool_t *dp = dmu_objset_pool(zp->z_zfsvfs->z_os);
+	taskq_t *tq = dsl_pool_zrele_taskq(dp);
+
+	VERIFY3P(tq, !=, NULL);
+
+	VERIFY(taskq_dispatch(
+	    (taskq_t *)tq,
+	    (task_func_t *)zfs_znode_asyncput_impl, zp, TQ_SLEEP) != 0);
+}
+
+/*
+ * Attach a new vnode to the znode asynchronically. We do this using
+ * a taskq to call it, and then wait to release the iocount.
+ * Called of zget_ext(..., ZGET_FLAG_ASYNC); will use
+ * zfs_znode_asyncput(zp) instead of VN_RELE(vp).
+ */
+int
+zfs_znode_asyncgetvnode(znode_t *zp, zfsvfs_t *zfsvfs)
+{
+	VERIFY(zp != NULL);
+	VERIFY(zfsvfs != NULL);
+
+	// We should not have a vnode here.
+	VERIFY3P(ZTOV(zp), ==, NULL);
+
+	dsl_pool_t *dp = dmu_objset_pool(zfsvfs->z_os);
+	taskq_t *tq = dsl_pool_zrele_taskq(dp);
+	VERIFY3P(tq, !=, NULL);
+
+	mutex_enter(&zp->z_attach_lock);
+	taskq_dispatch_ent(tq,
+	    (task_func_t *)zfs_znode_asyncgetvnode_impl,
+	    zp,
+	    TQ_SLEEP,
+	    &zp->z_attach_taskq);
+	mutex_exit(&zp->z_attach_lock);
+	return (0);
+}
+
+
+
+/*
+ * Maybe these should live in vfsops
+ */
+int
+zfs_vfsops_init(void)
+{
+	struct vfs_fsentry vfe;
+
+	/* Start thread to notify Finder of changes */
+	zfs_start_notify_thread();
+
+	vfe.vfe_vfsops = &zfs_vfsops_template;
+	vfe.vfe_vopcnt = ZFS_VNOP_TBL_CNT;
+	vfe.vfe_opvdescs = zfs_vnodeop_opv_desc_list;
+
+	strlcpy(vfe.vfe_fsname, "zfs", MFSNAMELEN);
+
+	/*
+	 * Note: must set VFS_TBLGENERICMNTARGS with VFS_TBLLOCALVOL
+	 * to suppress local mount argument handling.
+	 */
+	vfe.vfe_flags = VFS_TBLTHREADSAFE | VFS_TBLNOTYPENUM | VFS_TBLLOCALVOL |
+	    VFS_TBL64BITREADY | VFS_TBLNATIVEXATTR | VFS_TBLGENERICMNTARGS |
+	    VFS_TBLREADDIR_EXTENDED;
+
+#if	HAVE_PAGEOUT_V2
+	vfe.vfe_flags |= VFS_TBLVNOP_PAGEOUTV2;
+#endif
+
+#ifdef VFS_TBLCANMOUNTROOT  // From 10.12
+	vfe.vfe_flags |= VFS_TBLCANMOUNTROOT;
+#endif
+
+	vfe.vfe_reserv[0] = 0;
+	vfe.vfe_reserv[1] = 0;
+
+	if (vfs_fsadd(&vfe, &zfs_vfsconf) != 0)
+		return (KERN_FAILURE);
+	else
+		return (KERN_SUCCESS);
+}
+
+int
+zfs_vfsops_fini(void)
+{
+
+	zfs_stop_notify_thread();
+
+	return (vfs_fsremove(zfs_vfsconf));
+}
diff --git a/module/os/macos/zfs/zfs_vnops_osx_lib.c b/module/os/macos/zfs/zfs_vnops_osx_lib.c
new file mode 100644
index 0000000000..7504d71013
--- /dev/null
+++ b/module/os/macos/zfs/zfs_vnops_osx_lib.c
@@ -0,0 +1,2232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013 Will Andrews <will@firepipe.net>
+ * Copyright (c) 2013, 2020 Jorgen Lundman <lundman@lundman.net>
+ */
+#include <sys/cred.h>
+#include <sys/vnode.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/spa.h>
+#include <sys/txg.h>
+#include <sys/dbuf.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/zfs_vnops.h>
+#include <sys/stat.h>
+
+#include <sys/unistd.h>
+#include <sys/xattr.h>
+#include <sys/uuid.h>
+#include <sys/utfconv.h>
+#include <sys/finderinfo.h>
+#include <libkern/crypto/md5.h>
+
+extern int zfs_vnop_force_formd_normalized_output; /* disabled by default */
+
+/*
+ * Unfortunately Apple defines "KAUTH_VNODE_ACCESS (1<<31)" which
+ * generates: "warning: signed shift result (0x80000000) sets the
+ * sign bit of the shift expression's type ('int') and becomes negative."
+ * So until they fix their define, we override it here.
+ */
+
+#if KAUTH_VNODE_ACCESS == 0x80000000
+#undef KAUTH_VNODE_ACCESS
+#define	KAUTH_VNODE_ACCESS (1ULL<<31)
+#endif
+
+
+
+int zfs_hardlink_addmap(znode_t *zp, uint64_t parentid, uint32_t linkid);
+
+/* Originally from illumos:uts/common/sys/vfs.h */
+typedef uint64_t vfs_feature_t;
+#define	VFSFT_XVATTR		0x100000001	/* Supports xvattr for attrs */
+#define	VFSFT_CASEINSENSITIVE	0x100000002	/* Supports case-insensitive */
+#define	VFSFT_NOCASESENSITIVE	0x100000004	/* NOT case-sensitive */
+#define	VFSFT_DIRENTFLAGS	0x100000008	/* Supports dirent flags */
+#define	VFSFT_ACLONCREATE	0x100000010	/* Supports ACL on create */
+#define	VFSFT_ACEMASKONACCESS	0x100000020	/* Can use ACEMASK for access */
+#define	VFSFT_SYSATTR_VIEWS	0x100000040	/* Supports sysattr view i/f */
+#define	VFSFT_ACCESS_FILTER	0x100000080	/* dirents filtered by access */
+#define	VFSFT_REPARSE		0x100000100	/* Supports reparse point */
+#define	VFSFT_ZEROCOPY_SUPPORTED 0x100000200	/* Supports loaning buffers */
+
+#define	ZFS_SUPPORTED_VATTRS \
+	(VNODE_ATTR_va_mode | \
+		VNODE_ATTR_va_uid | \
+		VNODE_ATTR_va_gid |	\
+		VNODE_ATTR_va_fsid | \
+		VNODE_ATTR_va_fileid | \
+		VNODE_ATTR_va_nlink | \
+		VNODE_ATTR_va_data_size | \
+		VNODE_ATTR_va_total_size | \
+		VNODE_ATTR_va_rdev | \
+		VNODE_ATTR_va_gen | \
+		VNODE_ATTR_va_create_time | \
+		VNODE_ATTR_va_access_time | \
+		VNODE_ATTR_va_modify_time | \
+		VNODE_ATTR_va_change_time |	\
+		VNODE_ATTR_va_backup_time |	\
+		VNODE_ATTR_va_flags | \
+		VNODE_ATTR_va_parentid | \
+		VNODE_ATTR_va_iosize | \
+		VNODE_ATTR_va_filerev | \
+		VNODE_ATTR_va_type    |	\
+		VNODE_ATTR_va_encoding | \
+		0)
+
+// VNODE_ATTR_va_uuuid |
+// VNODE_ATTR_va_guuid |
+
+
+
+
+
+
+
+
+/*
+ * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string
+ *
+ * input:
+ *	str	- string to hash
+ *	hval	- previous hash value or 0 if first call
+ *
+ * returns:
+ *	32 bit hash as a static hash type
+ *
+ * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
+ *   hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
+ */
+uint32_t
+fnv_32a_str(const char *str, uint32_t hval)
+{
+	unsigned char *s = (unsigned char *)str;	/* unsigned string */
+
+	/*
+	 * FNV-1a hash each octet in the buffer
+	 */
+	while (*s) {
+
+		/* xor the bottom with the current octet */
+		hval ^= (uint32_t)*s++;
+
+		/* multiply by the 32 bit FNV magic prime mod 2^32 */
+#if defined(NO_FNV_GCC_OPTIMIZATION)
+		hval *= FNV_32_PRIME;
+#else
+		hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) +
+		    (hval<<24);
+#endif
+	}
+
+	/* return our new hash value */
+	return (hval);
+}
+
+/*
+ * fnv_32a_buf - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a buffer
+ *
+ * input:
+ * buf- start of buffer to hash
+ * len- length of buffer in octets
+ * hval- previous hash value or 0 if first call
+ *
+ * returns:
+ * 32 bit hash as a static hash type
+ *
+ * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
+ * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
+ */
+uint32_t
+fnv_32a_buf(void *buf, size_t len, uint32_t hval)
+{
+	unsigned char *bp = (unsigned char *)buf; /* start of buffer */
+	unsigned char *be = bp + len; /* beyond end of buffer */
+
+	/*
+	 * FNV-1a hash each octet in the buffer
+	 */
+	while (bp < be) {
+
+		/* xor the bottom with the current octet */
+		hval ^= (uint32_t)*bp++;
+
+		/* multiply by the 32 bit FNV magic prime mod 2^32 */
+#if defined(NO_FNV_GCC_OPTIMIZATION)
+		hval *= FNV_32_PRIME;
+#else
+		hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) +
+		    (hval<<24);
+#endif
+	}
+
+	/* return our new hash value */
+	return (hval);
+}
+
+int
+zfs_getattr_znode_unlocked(struct vnode *vp, vattr_t *vap)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error = 0;
+	uint64_t	parent;
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+#ifdef VNODE_ATTR_va_addedtime
+	uint64_t addtime[2] = { 0 };
+#endif
+	int ishardlink = 0;
+
+	// printf("getattr_osx\n");
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	if (VATTR_IS_ACTIVE(vap, va_acl)) {
+		// printf("want acl\n");
+		VATTR_RETURN(vap, va_uuuid, kauth_null_guid);
+		VATTR_RETURN(vap, va_guuid, kauth_null_guid);
+
+		// dprintf("Calling getacl\n");
+		if ((error = zfs_getacl(zp, &vap->va_acl, B_FALSE, NULL))) {
+			// dprintf("zfs_getacl returned error %d\n", error);
+			error = 0;
+		} else {
+
+			VATTR_SET_SUPPORTED(vap, va_acl);
+			/* va_acl implies va_uuuid & va_guuid are supported. */
+			VATTR_RETURN(vap, va_uuuid, kauth_null_guid);
+			VATTR_RETURN(vap, va_guuid, kauth_null_guid);
+		}
+
+	}
+
+	mutex_enter(&zp->z_lock);
+
+	ishardlink = ((zp->z_links > 1) &&
+	    (IFTOVT((mode_t)zp->z_mode) == VREG)) ? 1 : 0;
+	if (zp->z_finder_hardlink == TRUE)
+		ishardlink = 1;
+	else if (ishardlink)
+		zp->z_finder_hardlink = TRUE;
+
+	/* Work out which SA we need to fetch */
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+
+	/*
+	 * Unfortunately, sa_bulk_lookup does not let you handle optional
+	 * SA entries - so have to look up the optionals individually.
+	 */
+	error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count);
+	if (error) {
+		dprintf("ZFS: Warning: getattr failed sa_bulk_lookup: %d, "
+		    "parent %llu, flags %llu\n", error, parent, zp->z_pflags);
+		mutex_exit(&zp->z_lock);
+		ZFS_EXIT(zfsvfs);
+		return (0);
+	}
+
+#ifdef VNODE_ATTR_va_addedtime
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		sa_lookup(zp->z_sa_hdl, SA_ZPL_ADDTIME(zfsvfs),
+		    &addtime, sizeof (addtime));
+	}
+#endif
+
+	/*
+	 * On Mac OS X we always export the root directory id as 2
+	 */
+	vap->va_fileid = INO_ZFSTOXNU(zp->z_id, zfsvfs->z_root);
+
+	vap->va_data_size = zp->z_size;
+	vap->va_total_size = zp->z_size;
+	// vap->va_gen = zp->z_gen;
+	vap->va_gen = 0;
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+	if (zp->z_gen != 0)
+		dprintf("%s: va_gen %lld -> 0\n", __func__, zp->z_gen);
+#endif
+
+	if (vnode_isdir(vp)) {
+		vap->va_nlink = zp->z_size;
+	} else {
+		vap->va_nlink = zp->z_links;
+	}
+
+
+	/*
+	 * Carbon compatibility, pretend to support this legacy attribute
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_backup_time)) {
+		vap->va_backup_time.tv_sec = 0;
+		vap->va_backup_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_backup_time);
+	}
+	vap->va_flags = zfs_getbsdflags(zp);
+	/*
+	 * On Mac OS X we always export the root directory id as 2
+	 * and its parent as 1
+	 */
+	if (zp->z_id == zfsvfs->z_root)
+		vap->va_parentid = 1;
+	else if (parent == zfsvfs->z_root)
+		vap->va_parentid = 2;
+	else
+		vap->va_parentid = parent;
+
+	// Hardlinks: Return cached parentid, make it 2 if root.
+	if (ishardlink && zp->z_finder_parentid)
+		vap->va_parentid = (zp->z_finder_parentid == zfsvfs->z_root) ?
+		    2 : zp->z_finder_parentid;
+
+	vap->va_iosize = zp->z_blksz ? zp->z_blksz : zfsvfs->z_max_blksz;
+	// vap->va_iosize = 512;
+	VATTR_SET_SUPPORTED(vap, va_iosize);
+
+	/* Don't include '.' and '..' in the number of entries */
+	if (VATTR_IS_ACTIVE(vap, va_nchildren) && vnode_isdir(vp)) {
+		VATTR_RETURN(vap, va_nchildren, vap->va_nlink - 2);
+	}
+
+	/*
+	 * va_dirlinkcount is the count of directory hard links. When a file
+	 * system does not support ATTR_DIR_LINKCOUNT, xnu will default to 1.
+	 * Since we claim to support ATTR_DIR_LINKCOUNT both as valid and as
+	 * native, we'll just return 1. We set 1 for this value in dirattrpack
+	 * as well. If in the future ZFS actually supports directory hard links,
+	 * we can return a real value.
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_dirlinkcount) && vnode_isdir(vp)) {
+		VATTR_RETURN(vap, va_dirlinkcount, 1);
+	}
+
+
+	if (VATTR_IS_ACTIVE(vap, va_data_alloc) ||
+	    VATTR_IS_ACTIVE(vap, va_total_alloc)) {
+		uint32_t  blksize;
+		u_longlong_t  nblks;
+		sa_object_size(zp->z_sa_hdl, &blksize, &nblks);
+		vap->va_data_alloc = (uint64_t)512LL * (uint64_t)nblks;
+		vap->va_total_alloc = vap->va_data_alloc;
+		vap->va_supported |= VNODE_ATTR_va_data_alloc |
+		    VNODE_ATTR_va_total_alloc;
+	}
+
+	if (VATTR_IS_ACTIVE(vap, va_name)) {
+		vap->va_name[0] = 0;
+
+		if (!vnode_isvroot(vp)) {
+
+			/*
+			 * Finder (Carbon) relies on getattr returning the
+			 * correct name for hardlinks to work, so we store the
+			 * lookup name in vnop_lookup if file references are
+			 * high, then set the return name here.
+			 * If we also want ATTR_CMN_* lookups to work, we need
+			 * to set a unique va_linkid for each entry, and based
+			 * on the linkid in the lookup, return the correct name.
+			 * It is set in zfs_vnop_lookup().
+			 * Since zap_value_search is a slow call, we only use
+			 * it if we have not cached the name in vnop_lookup.
+			 */
+
+			// Cached name, from vnop_lookup
+			if (ishardlink &&
+			    zp->z_name_cache[0]) {
+
+				strlcpy(vap->va_name, zp->z_name_cache,
+				    MAXPATHLEN);
+				VATTR_SET_SUPPORTED(vap, va_name);
+
+			} else if (zp->z_name_cache[0]) {
+
+				strlcpy(vap->va_name, zp->z_name_cache,
+				    MAXPATHLEN);
+				VATTR_SET_SUPPORTED(vap, va_name);
+
+			} else {
+
+				// Go find the name.
+				if (zap_value_search(zfsvfs->z_os, parent,
+				    zp->z_id, ZFS_DIRENT_OBJ(-1ULL),
+				    vap->va_name) == 0) {
+					VATTR_SET_SUPPORTED(vap, va_name);
+					// Might as well keep this name too.
+					strlcpy(zp->z_name_cache, vap->va_name,
+					    MAXPATHLEN);
+				} // zap_value_search
+
+			}
+
+			dprintf("getattr: %p return name '%s':%04llx\n", vp,
+			    vap->va_name, vap->va_linkid);
+
+
+		} else {
+			/*
+			 * The vroot objects must return a unique name for
+			 * Finder to be able to distringuish between mounts.
+			 * For this reason we simply return the fullname,
+			 * from the statfs mountedfrom
+			 *
+			 * dataset     mountpoint
+			 * foo         /bar
+			 * As we used to return "foo" to ATTR_CMN_NAME of
+			 * "/bar" we change this to return "bar" as expected.
+			 */
+			char *r, *osname;
+			osname = vfs_statfs(zfsvfs->z_vfs)->f_mntonname;
+			r = strrchr(osname, '/');
+			strlcpy(vap->va_name,
+			    r ? &r[1] : osname,
+			    MAXPATHLEN);
+			VATTR_SET_SUPPORTED(vap, va_name);
+			dprintf("getattr root returning '%s'\n", vap->va_name);
+		}
+	}
+
+	if (VATTR_IS_ACTIVE(vap, va_linkid)) {
+
+		/*
+		 * Apple needs a little extra care with HARDLINKs. All hardlink
+		 * targets return the same va_fileid (POSIX) but also return
+		 * a unique va_linkid. This we generate by hashing the (unique)
+		 * name and store as va_linkid. However, Finder will call
+		 * vfs_vget() with linkid and expect to receive the correct link
+		 * target, so we need to add it to the AVL z_hardlinks.
+		 */
+		if (ishardlink) {
+			hardlinks_t *searchnode, *findnode;
+			avl_index_t loc;
+
+			// If we don't have a linkid, make one.
+			searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+			searchnode->hl_parent = vap->va_parentid;
+			searchnode->hl_fileid = zp->z_id;
+			strlcpy(searchnode->hl_name, zp->z_name_cache,
+			    PATH_MAX);
+
+			rw_enter(&zfsvfs->z_hardlinks_lock, RW_READER);
+			findnode = avl_find(&zfsvfs->z_hardlinks, searchnode,
+			    &loc);
+			rw_exit(&zfsvfs->z_hardlinks_lock);
+			kmem_free(searchnode, sizeof (hardlinks_t));
+
+			if (!findnode) {
+				static uint32_t zfs_hardlink_sequence =
+				    1ULL<<31;
+				uint32_t id;
+
+				id = atomic_inc_32_nv(&zfs_hardlink_sequence);
+
+				zfs_hardlink_addmap(zp, vap->va_parentid, id);
+				VATTR_RETURN(vap, va_linkid, id);
+
+			} else {
+				VATTR_RETURN(vap, va_linkid,
+				    findnode->hl_linkid);
+			}
+
+		} else { // !ishardlink - use same as fileid
+
+			VATTR_RETURN(vap, va_linkid, vap->va_fileid);
+
+		}
+
+	} // active linkid
+
+	if (VATTR_IS_ACTIVE(vap, va_filerev)) {
+		VATTR_RETURN(vap, va_filerev, 0);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_fsid)) {
+		VATTR_RETURN(vap, va_fsid, zfsvfs->z_rdev);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_type)) {
+		VATTR_RETURN(vap, va_type, vnode_vtype(ZTOV(zp)));
+	}
+	if (VATTR_IS_ACTIVE(vap, va_encoding)) {
+		VATTR_RETURN(vap, va_encoding, kTextEncodingMacUnicode);
+	}
+#ifdef VNODE_ATTR_va_addedtime
+	/*
+	 * ADDEDTIME should come from finderinfo according to hfs_attrlist.c
+	 * in ZFS we can use crtime, and add logic to getxattr finderinfo to
+	 * copy the ADDEDTIME into the structure. See vnop_getxattr
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		/* Lookup the ADDTIME if it exists, if not, use CRTIME */
+		if ((addtime[0] == 0) && (addtime[1])) {
+			dprintf("ZFS: ADDEDTIME using crtime %llu (error %d)\n",
+			    vap->va_crtime.tv_sec, error);
+			vap->va_addedtime.tv_sec  = vap->va_crtime.tv_sec;
+			vap->va_addedtime.tv_nsec = vap->va_crtime.tv_nsec;
+		} else {
+			dprintf("ZFS: ADDEDTIME using addtime %llu\n",
+			    addtime[0]);
+			ZFS_TIME_DECODE(&vap->va_addedtime, addtime);
+		}
+		VATTR_SET_SUPPORTED(vap, va_addedtime);
+	}
+#endif
+#ifdef VNODE_ATTR_va_fsid64
+	if (VATTR_IS_ACTIVE(vap, va_fsid64)) {
+		vap->va_fsid64.val[0] =
+		    vfs_statfs(zfsvfs->z_vfs)->f_fsid.val[0];
+		vap->va_fsid64.val[1] = vfs_typenum(zfsvfs->z_vfs);
+		VATTR_SET_SUPPORTED(vap, va_fsid64);
+	}
+#endif
+#ifdef VNODE_ATTR_va_write_gencount
+	if (VATTR_IS_ACTIVE(vap, va_write_gencount)) {
+		if (!zp->z_write_gencount)
+			atomic_inc_64(&zp->z_write_gencount);
+		VATTR_RETURN(vap, va_write_gencount,
+		    (uint32_t)zp->z_write_gencount);
+	}
+#endif
+
+#ifdef VNODE_ATTR_va_document_id
+	if (VATTR_IS_ACTIVE(vap, va_document_id)) {
+
+		if (!zp->z_document_id) {
+			zfs_setattr_generate_id(zp, parent, vap->va_name);
+		}
+
+		VATTR_RETURN(vap, va_document_id, zp->z_document_id);
+	}
+#endif /* VNODE_ATTR_va_document_id */
+
+
+#if 0 // Issue #192
+	if (VATTR_IS_ACTIVE(vap, va_uuuid)) {
+		kauth_cred_uid2guid(zp->z_uid, &vap->va_uuuid);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_guuid)) {
+		kauth_cred_gid2guid(zp->z_gid, &vap->va_guuid);
+	}
+#endif
+
+	if (ishardlink) {
+		dprintf("ZFS:getattr(%s,%llu,%llu) parent %llu: cache_parent "
+		    "%llu: va_nlink %u\n", VATTR_IS_ACTIVE(vap, va_name) ?
+		    vap->va_name : zp->z_name_cache,
+		    vap->va_fileid,
+		    VATTR_IS_ACTIVE(vap, va_linkid) ? vap->va_linkid : 0,
+		    vap->va_parentid,
+		    zp->z_finder_parentid,
+		    vap->va_nlink);
+	}
+
+	vap->va_supported |= ZFS_SUPPORTED_VATTRS;
+	uint64_t missing = 0;
+	missing = (vap->va_active ^ (vap->va_active & vap->va_supported));
+	if (missing != 0) {
+		dprintf("vnop_getattr:: asked %08llx replied %08llx "
+		    " missing %08llx\n",
+		    vap->va_active, vap->va_supported,
+		    missing);
+	}
+
+	mutex_exit(&zp->z_lock);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+boolean_t
+vfs_has_feature(vfs_t *vfsp, vfs_feature_t vfsft)
+{
+
+	switch (vfsft) {
+	case VFSFT_CASEINSENSITIVE:
+	case VFSFT_NOCASESENSITIVE:
+		return (B_TRUE);
+	default:
+		return (B_FALSE);
+	}
+}
+
+int
+zfs_access_native_mode(struct vnode *vp, int *mode, cred_t *cr,
+    caller_context_t *ct)
+{
+	int accmode = *mode & (VREAD|VWRITE|VEXEC /* |VAPPEND */);
+	int error = 0;
+	int flag = 0; // FIXME
+
+	if (accmode != 0)
+		error = zfs_access(vp, accmode, flag, cr);
+
+	*mode &= ~(accmode);
+
+	return (error);
+}
+
+int
+zfs_ioflags(int ap_ioflag)
+{
+	int flags = 0;
+
+	if (ap_ioflag & IO_APPEND)
+		flags |= FAPPEND;
+	if (ap_ioflag & IO_NDELAY)
+		flags |= FNONBLOCK;
+	if (ap_ioflag & IO_SYNC)
+		flags |= (FSYNC | FDSYNC | FRSYNC);
+
+	return (flags);
+}
+
+int
+zfs_vnop_ioctl_fullfsync(struct vnode *vp, vfs_context_t ct, zfsvfs_t *zfsvfs)
+{
+	int error;
+
+	error = zfs_fsync(VTOZ(vp), /* syncflag */ 0, NULL);
+	if (error)
+		return (error);
+
+	if (zfsvfs->z_log != NULL)
+		zil_commit(zfsvfs->z_log, 0);
+	else
+		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+	return (0);
+}
+
+uint32_t
+zfs_getbsdflags(znode_t *zp)
+{
+	uint32_t  bsdflags = 0;
+	uint64_t zflags = zp->z_pflags;
+
+	if (zflags & ZFS_NODUMP)
+		bsdflags |= UF_NODUMP;
+	if (zflags & ZFS_UIMMUTABLE)
+		bsdflags |= UF_IMMUTABLE;
+	if (zflags & ZFS_UAPPENDONLY)
+		bsdflags |= UF_APPEND;
+	if (zflags & ZFS_OPAQUE)
+		bsdflags |= UF_OPAQUE;
+	if (zflags & ZFS_HIDDEN)
+		bsdflags |= UF_HIDDEN;
+	if (zflags & ZFS_TRACKED)
+		bsdflags |= UF_TRACKED;
+	if (zflags & ZFS_COMPRESSED)
+		bsdflags |= UF_COMPRESSED;
+
+	if (zflags & ZFS_SIMMUTABLE)
+		bsdflags |= SF_IMMUTABLE;
+	if (zflags & ZFS_SAPPENDONLY)
+		bsdflags |= SF_APPEND;
+	/*
+	 * Due to every file getting archive set automatically, and OSX
+	 * don't let you move/copy it as a user, we disable archive connection
+	 * for now
+	 * if (zflags & ZFS_ARCHIVE)
+	 * bsdflags |= SF_ARCHIVED;
+	 */
+	dprintf("getbsd changing zfs %08lx to osx %08lx\n",
+	    zflags, bsdflags);
+	return (bsdflags);
+}
+
+void
+zfs_setbsdflags(znode_t *zp, uint32_t bsdflags)
+{
+	uint64_t zflags;
+	VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs),
+	    &zflags, sizeof (zflags)) == 0);
+
+	if (bsdflags & UF_NODUMP)
+		zflags |= ZFS_NODUMP;
+	else
+		zflags &= ~ZFS_NODUMP;
+
+	if (bsdflags & UF_IMMUTABLE)
+		zflags |= ZFS_UIMMUTABLE;
+	else
+		zflags &= ~ZFS_UIMMUTABLE;
+
+	if (bsdflags & UF_APPEND)
+		zflags |= ZFS_UAPPENDONLY;
+	else
+		zflags &= ~ZFS_UAPPENDONLY;
+
+	if (bsdflags & UF_OPAQUE)
+		zflags |= ZFS_OPAQUE;
+	else
+		zflags &= ~ZFS_OPAQUE;
+
+	if (bsdflags & UF_HIDDEN)
+		zflags |= ZFS_HIDDEN;
+	else
+		zflags &= ~ZFS_HIDDEN;
+
+	if (bsdflags & UF_TRACKED)
+		zflags |= ZFS_TRACKED;
+	else
+		zflags &= ~ZFS_TRACKED;
+
+	if (bsdflags & UF_COMPRESSED)
+		zflags |= ZFS_COMPRESSED;
+	else
+		zflags &= ~ZFS_COMPRESSED;
+
+	/*
+	 * if (bsdflags & SF_ARCHIVED)
+	 *   zflags |= ZFS_ARCHIVE;
+	 * else
+	 *   zflags &= ~ZFS_ARCHIVE;
+	 */
+	if (bsdflags & SF_IMMUTABLE)
+		zflags |= ZFS_SIMMUTABLE;
+	else
+		zflags &= ~ZFS_SIMMUTABLE;
+
+	if (bsdflags & SF_APPEND)
+		zflags |= ZFS_SAPPENDONLY;
+	else
+		zflags &= ~ZFS_SAPPENDONLY;
+
+	zp->z_pflags = zflags;
+	dprintf("setbsd changing osx %08lx to zfs %08lx\n",
+	    bsdflags, zflags);
+
+	/*
+	 *  (void )sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs),
+	 * (void *)&zp->z_pflags, sizeof (uint64_t), tx);
+	 */
+}
+
+/*
+ * Lookup/Create an extended attribute entry.
+ *
+ * Input arguments:
+ *	dzp	- znode for hidden attribute directory
+ *	name	- name of attribute
+ *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
+ *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
+ *
+ * Output arguments:
+ *	vpp	- pointer to the vnode for the entry (NULL if there isn't one)
+ *
+ * Return value: 0 on success or errno value on failure.
+ */
+int
+zpl_obtain_xattr(znode_t *dzp, const char *name, mode_t mode, cred_t *cr,
+    vnode_t **vpp, int flag)
+{
+	znode_t  *xzp = NULL;
+	zfsvfs_t  *zfsvfs = dzp->z_zfsvfs;
+	zilog_t  *zilog;
+	zfs_dirlock_t  *dl;
+	dmu_tx_t  *tx;
+	struct vnode_attr  vattr;
+	int error;
+	struct componentname cn = { 0 };
+	zfs_acl_ids_t	acl_ids;
+
+	/* zfs_dirent_lock() expects a component name */
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
+	zilog = zfsvfs->z_log;
+
+	VATTR_INIT(&vattr);
+	VATTR_SET(&vattr, va_type, VREG);
+	VATTR_SET(&vattr, va_mode, mode & ~S_IFMT);
+
+	if ((error = zfs_acl_ids_create(dzp, 0,
+	    &vattr, cr, NULL, &acl_ids)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	cn.cn_namelen = strlen(name)+1;
+	cn.cn_nameptr = (char *)kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+top:
+	/* Lock the attribute entry name. */
+	if ((error = zfs_dirent_lock(&dl, dzp, (char *)name, &xzp, flag,
+	    NULL, &cn))) {
+		goto out;
+	}
+	/* If the name already exists, we're done. */
+	if (xzp != NULL) {
+		zfs_dirent_unlock(dl);
+		goto out;
+	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, (char *)name);
+	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+
+#if 1 // FIXME
+	if (dzp->z_pflags & ZFS_INHERIT_ACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
+	}
+#endif
+	zfs_sa_upgrade_txholds(tx, dzp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		zfs_dirent_unlock(dl);
+		if (error == ERESTART) {
+			dmu_tx_wait(tx);
+			dmu_tx_abort(tx);
+			goto top;
+		}
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	zfs_mknode(dzp, &vattr, tx, cr, 0, &xzp, &acl_ids);
+
+	/*
+	 * ASSERT(xzp->z_id == zoid);
+	 */
+	(void) zfs_link_create(dl, xzp, tx, ZNEW);
+	zfs_log_create(zilog, tx, TX_CREATE, dzp, xzp, (char *)name,
+	    NULL /* vsecp */, 0 /* acl_ids.z_fuidp */, &vattr);
+	dmu_tx_commit(tx);
+
+	/*
+	 * OS X - attach the vnode _after_ committing the transaction
+	 */
+	zfs_znode_getvnode(xzp, zfsvfs);
+
+	zfs_dirent_unlock(dl);
+out:
+	zfs_acl_ids_free(&acl_ids);
+	if (cn.cn_nameptr)
+		kmem_free(cn.cn_nameptr, cn.cn_namelen);
+
+	/* The REPLACE error if doesn't exist is ENOATTR */
+	if ((flag & ZEXISTS) && (error == ENOENT))
+		error = ENOATTR;
+
+	if (xzp)
+		*vpp = ZTOV(xzp);
+
+	ZFS_EXIT(zfsvfs);
+	return (error);
+}
+
+/*
+ * ace_trivial:
+ * determine whether an ace_t acl is trivial
+ *
+ * Trivialness implies that the acl is composed of only
+ * owner, group, everyone entries.  ACL can't
+ * have read_acl denied, and write_owner/write_acl/write_attributes
+ * can only be owner@ entry.
+ */
+int
+ace_trivial_common(void *acep, int aclcnt,
+    uint64_t (*walk)(void *, uint64_t, int aclcnt,
+    uint16_t *, uint16_t *, uint32_t *))
+{
+	uint16_t flags;
+	uint32_t mask;
+	uint16_t type;
+	uint64_t cookie = 0;
+
+	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
+		switch (flags & ACE_TYPE_FLAGS) {
+			case ACE_OWNER:
+			case ACE_GROUP|ACE_IDENTIFIER_GROUP:
+			case ACE_EVERYONE:
+				break;
+			default:
+				return (1);
+
+		}
+
+		if (flags & (ACE_FILE_INHERIT_ACE|
+		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
+		    ACE_INHERIT_ONLY_ACE))
+			return (1);
+
+		/*
+		 * Special check for some special bits
+		 *
+		 * Don't allow anybody to deny reading basic
+		 * attributes or a files ACL.
+		 */
+		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
+		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
+			return (1);
+
+		/*
+		 * Delete permission is never set by default
+		 */
+		if (mask & ACE_DELETE)
+			return (1);
+
+		/*
+		 * Child delete permission should be accompanied by write
+		 */
+		if ((mask & ACE_DELETE_CHILD) && !(mask & ACE_WRITE_DATA))
+			return (1);
+		/*
+		 * only allow owner@ to have
+		 * write_acl/write_owner/write_attributes/write_xattr/
+		 */
+
+		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
+		    (!(flags & ACE_OWNER) && (mask &
+		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
+		    ACE_WRITE_NAMED_ATTRS))))
+			return (1);
+
+	}
+
+	return (0);
+}
+
+
+void
+acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
+{
+	uint32_t read_mask = ACE_READ_DATA;
+	uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;
+	uint32_t execute_mask = ACE_EXECUTE;
+
+	if (isdir)
+		write_mask |= ACE_DELETE_CHILD;
+
+	masks->deny1 = 0;
+	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
+		masks->deny1 |= read_mask;
+	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
+		masks->deny1 |= write_mask;
+	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
+		masks->deny1 |= execute_mask;
+
+	masks->deny2 = 0;
+	if (!(mode & S_IRGRP) && (mode & S_IROTH))
+		masks->deny2 |= read_mask;
+	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
+		masks->deny2 |= write_mask;
+	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
+		masks->deny2 |= execute_mask;
+
+	masks->allow0 = 0;
+	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
+		masks->allow0 |= read_mask;
+	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
+		masks->allow0 |= write_mask;
+	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
+		masks->allow0 |= execute_mask;
+
+	masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
+	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
+	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
+	if (mode & S_IRUSR)
+		masks->owner |= read_mask;
+	if (mode & S_IWUSR)
+		masks->owner |= write_mask;
+	if (mode & S_IXUSR)
+		masks->owner |= execute_mask;
+
+	masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IRGRP)
+		masks->group |= read_mask;
+	if (mode & S_IWGRP)
+		masks->group |= write_mask;
+	if (mode & S_IXGRP)
+		masks->group |= execute_mask;
+
+	masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
+	    ACE_SYNCHRONIZE;
+	if (mode & S_IROTH)
+		masks->everyone |= read_mask;
+	if (mode & S_IWOTH)
+		masks->everyone |= write_mask;
+	if (mode & S_IXOTH)
+		masks->everyone |= execute_mask;
+}
+
+void commonattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp,
+    const char *name, ino64_t objnum, enum vtype vtype,
+    boolean_t user64)
+{
+	attrgroup_t commonattr = aip->ai_attrlist->commonattr;
+	void *attrbufptr = *aip->ai_attrbufpp;
+	void *varbufptr = *aip->ai_varbufpp;
+	struct mount *mp = zfsvfs->z_vfs;
+	cred_t  *cr = (cred_t *)vfs_context_ucred(aip->ai_context);
+	finderinfo_t finderinfo;
+
+	/*
+	 * We should probably combine all the sa_lookup into a bulk
+	 * lookup operand.
+	 */
+
+	finderinfo.fi_flags = 0;
+
+	if (ATTR_CMN_NAME & commonattr) {
+		nameattrpack(aip, name, strlen(name));
+		attrbufptr = *aip->ai_attrbufpp;
+		varbufptr = *aip->ai_varbufpp;
+	}
+	if (ATTR_CMN_DEVID & commonattr) {
+		*((dev_t *)attrbufptr) = vfs_statfs(mp)->f_fsid.val[0];
+		attrbufptr = ((dev_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FSID & commonattr) {
+		*((fsid_t *)attrbufptr) = vfs_statfs(mp)->f_fsid;
+		attrbufptr = ((fsid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_OBJTYPE & commonattr) {
+		*((fsobj_type_t *)attrbufptr) = vtype;
+		attrbufptr = ((fsobj_type_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_OBJTAG & commonattr) {
+		*((fsobj_tag_t *)attrbufptr) = VT_ZFS;
+		attrbufptr = ((fsobj_tag_t *)attrbufptr) + 1;
+	}
+	/*
+	 * Note: ATTR_CMN_OBJID is lossy (only 32 bits).
+	 */
+	if ((ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID) & commonattr) {
+		u_int32_t fileid;
+		/*
+		 * On Mac OS X we always export the root directory id as 2
+		 */
+		fileid = (objnum == zfsvfs->z_root) ? 2 : objnum;
+
+		if (ATTR_CMN_OBJID & commonattr) {
+			((fsobj_id_t *)attrbufptr)->fid_objno = fileid;
+			((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+			attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+		}
+		if (ATTR_CMN_OBJPERMANENTID & commonattr) {
+			((fsobj_id_t *)attrbufptr)->fid_objno = fileid;
+			((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+			attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+		}
+	}
+	/*
+	 * Note: ATTR_CMN_PAROBJID is lossy (only 32 bits).
+	 */
+	if (ATTR_CMN_PAROBJID & commonattr) {
+		uint64_t parentid;
+
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+		    &parentid, sizeof (parentid)) == 0);
+
+		/*
+		 * On Mac OS X we always export the root
+		 * directory id as 2 and its parent as 1
+		 */
+		if (zp && zp->z_id == zfsvfs->z_root)
+			parentid = 1;
+		else if (parentid == zfsvfs->z_root)
+			parentid = 2;
+
+		ASSERT(parentid != 0);
+
+		((fsobj_id_t *)attrbufptr)->fid_objno = (uint32_t)parentid;
+		((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+		attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_SCRIPT & commonattr) {
+		*((text_encoding_t *)attrbufptr) = kTextEncodingMacUnicode;
+		attrbufptr = ((text_encoding_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_CRTIME & commonattr) {
+		uint64_t times[2];
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
+		    times, sizeof (times)) == 0);
+		if (user64) {
+			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
+		} else {
+			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
+		}
+	}
+	if (ATTR_CMN_MODTIME & commonattr) {
+		uint64_t times[2];
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zfsvfs),
+		    times, sizeof (times)) == 0);
+		if (user64) {
+			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
+		} else {
+			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
+		}
+	}
+	if (ATTR_CMN_CHGTIME & commonattr) {
+		uint64_t times[2];
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zfsvfs),
+		    times, sizeof (times)) == 0);
+		if (user64) {
+			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
+		} else {
+			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
+		}
+	}
+	if (ATTR_CMN_ACCTIME & commonattr) {
+		uint64_t times[2];
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+		    times, sizeof (times)) == 0);
+		if (user64) {
+			ZFS_TIME_DECODE((timespec_user64_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
+		} else {
+			ZFS_TIME_DECODE((timespec_user32_t *)attrbufptr,
+			    times);
+			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
+		}
+	}
+	if (ATTR_CMN_BKUPTIME & commonattr) {
+		/* legacy attribute -- just pass zero */
+		if (user64) {
+			((timespec_user64_t *)attrbufptr)->tv_sec = 0;
+			((timespec_user64_t *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((timespec_user64_t *)attrbufptr) + 1;
+		} else {
+			((timespec_user32_t *)attrbufptr)->tv_sec = 0;
+			((timespec_user32_t *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((timespec_user32_t *)attrbufptr) + 1;
+		}
+	}
+	if (ATTR_CMN_FNDRINFO & commonattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		getfinderinfo(zp, cr, &finderinfo);
+		/* Shadow ZFS_HIDDEN to Finder Info's invisible bit */
+		if (val & ZFS_HIDDEN) {
+			finderinfo.fi_flags |=
+			    OSSwapHostToBigConstInt16(kIsInvisible);
+		}
+		bcopy(&finderinfo, attrbufptr, sizeof (finderinfo));
+		attrbufptr = (char *)attrbufptr + 32;
+	}
+	if (ATTR_CMN_OWNERID & commonattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_UID(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((uid_t *)attrbufptr) = val;
+		attrbufptr = ((uid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_GRPID & commonattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_GID(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((gid_t *)attrbufptr) = val;
+		attrbufptr = ((gid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_ACCESSMASK & commonattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((u_int32_t *)attrbufptr) = val;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FLAGS & commonattr) {
+		// TODO, sa_lookup of ZPL_FLAGS
+		u_int32_t flags = zfs_getbsdflags(zp);
+
+		/* Shadow Finder Info's invisible bit to UF_HIDDEN */
+		if ((ATTR_CMN_FNDRINFO & commonattr) &&
+		    (OSSwapBigToHostInt16(finderinfo.fi_flags) & kIsInvisible))
+			flags |= UF_HIDDEN;
+
+		*((u_int32_t *)attrbufptr) = flags;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_USERACCESS & commonattr) {
+		u_int32_t user_access = 0;
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+		    &val, sizeof (val)) == 0);
+
+		user_access = getuseraccess(zp, aip->ai_context);
+
+		/* Also consider READ-ONLY file system. */
+		if (vfs_flags(mp) & MNT_RDONLY) {
+			user_access &= ~W_OK;
+		}
+
+		/* Locked objects are not writable either */
+		if ((val & ZFS_IMMUTABLE) &&
+		    (vfs_context_suser(aip->ai_context) != 0)) {
+			user_access &= ~W_OK;
+		}
+
+		*((u_int32_t *)attrbufptr) = user_access;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FILEID & commonattr) {
+		/*
+		 * On Mac OS X we always export the root directory id as 2
+		 */
+		if (objnum == zfsvfs->z_root)
+			objnum = 2;
+
+		*((u_int64_t *)attrbufptr) = objnum;
+		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_PARENTID & commonattr) {
+		uint64_t parentid;
+
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+		    &parentid, sizeof (parentid)) == 0);
+
+		/*
+		 * On Mac OS X we always export the root
+		 * directory id as 2 and its parent as 1
+		 */
+		if (zp && zp->z_id == zfsvfs->z_root)
+			parentid = 1;
+		else if (parentid == zfsvfs->z_root)
+			parentid = 2;
+
+		ASSERT(parentid != 0);
+
+		*((u_int64_t *)attrbufptr) = parentid;
+		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
+	}
+
+	*aip->ai_attrbufpp = attrbufptr;
+	*aip->ai_varbufpp = varbufptr;
+}
+
+void
+dirattrpack(attrinfo_t *aip, znode_t *zp)
+{
+	attrgroup_t dirattr = aip->ai_attrlist->dirattr;
+	void *attrbufptr = *aip->ai_attrbufpp;
+
+	if (ATTR_DIR_LINKCOUNT & dirattr) {
+		*((u_int32_t *)attrbufptr) = 1;  /* no dir hard links */
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_DIR_ENTRYCOUNT & dirattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((u_int32_t *)attrbufptr) = (uint32_t)val;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_DIR_MOUNTSTATUS & dirattr && zp) {
+		vnode_t *vp = ZTOV(zp);
+
+		if (vp != NULL && vnode_mountedhere(vp) != NULL)
+			*((u_int32_t *)attrbufptr) = DIR_MNTSTATUS_MNTPOINT;
+		else
+			*((u_int32_t *)attrbufptr) = 0;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	*aip->ai_attrbufpp = attrbufptr;
+}
+
+void
+fileattrpack(attrinfo_t *aip, zfsvfs_t *zfsvfs, znode_t *zp)
+{
+	attrgroup_t fileattr = aip->ai_attrlist->fileattr;
+	void *attrbufptr = *aip->ai_attrbufpp;
+	void *varbufptr = *aip->ai_varbufpp;
+	uint64_t allocsize = 0;
+	cred_t  *cr = (cred_t *)vfs_context_ucred(aip->ai_context);
+
+	if ((ATTR_FILE_ALLOCSIZE | ATTR_FILE_DATAALLOCSIZE) & fileattr && zp) {
+		uint32_t  blksize;
+		u_longlong_t  nblks;
+
+		sa_object_size(zp->z_sa_hdl, &blksize, &nblks);
+		allocsize = (uint64_t)512LL * (uint64_t)nblks;
+	}
+	if (ATTR_FILE_LINKCOUNT & fileattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((u_int32_t *)attrbufptr) = val;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_TOTALSIZE & fileattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((off_t *)attrbufptr) = val;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_ALLOCSIZE & fileattr) {
+		*((off_t *)attrbufptr) = allocsize;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_IOBLOCKSIZE & fileattr && zp) {
+		*((u_int32_t *)attrbufptr) =
+		    zp->z_blksz ? zp->z_blksz : zfsvfs->z_max_blksz;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_DEVTYPE & fileattr) {
+		uint64_t mode, val = 0;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
+		    &mode, sizeof (mode)) == 0);
+		sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
+		    &val, sizeof (val));
+		if (S_ISBLK(mode) || S_ISCHR(mode))
+			*((u_int32_t *)attrbufptr) = (u_int32_t)val;
+		else
+			*((u_int32_t *)attrbufptr) = 0;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_DATALENGTH & fileattr) {
+		uint64_t val;
+		VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+		    &val, sizeof (val)) == 0);
+		*((off_t *)attrbufptr) = val;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_DATAALLOCSIZE & fileattr) {
+		*((off_t *)attrbufptr) = allocsize;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if ((ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE) & fileattr) {
+		uint64_t rsrcsize = 0;
+		uint64_t xattr;
+
+		if (!sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr, sizeof (xattr)) &&
+		    xattr) {
+			znode_t *xdzp = NULL, *xzp = NULL;
+			struct componentname cn = { 0 };
+			char *name = NULL;
+
+			name = spa_strdup(XATTR_RESOURCEFORK_NAME);
+			cn.cn_namelen = strlen(name)+1;
+			cn.cn_nameptr = kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+			/* Grab the hidden attribute directory vnode. */
+			if (zfs_get_xattrdir(zp, &xdzp, cr, 0) == 0 &&
+			    zfs_dirlook(xdzp, name, &xzp, 0, NULL,
+			    &cn) == 0) {
+				rsrcsize = xzp->z_size;
+			}
+			spa_strfree(name);
+			kmem_free(cn.cn_nameptr, cn.cn_namelen);
+
+			if (xzp)
+				zrele(xzp);
+			if (xdzp)
+				zrele(xdzp);
+		}
+		if (ATTR_FILE_RSRCLENGTH & fileattr) {
+			*((off_t *)attrbufptr) = rsrcsize;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+		if (ATTR_FILE_RSRCALLOCSIZE & fileattr) {
+			*((off_t *)attrbufptr) = roundup(rsrcsize, 512);
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+	}
+	*aip->ai_attrbufpp = attrbufptr;
+	*aip->ai_varbufpp = varbufptr;
+}
+
+void
+nameattrpack(attrinfo_t *aip, const char *name, int namelen)
+{
+	void *varbufptr;
+	struct attrreference *attr_refptr;
+	u_int32_t attrlen;
+	size_t nfdlen, freespace;
+	int force_formd_normalized_output;
+
+	varbufptr = *aip->ai_varbufpp;
+	attr_refptr = (struct attrreference *)(*aip->ai_attrbufpp);
+
+	freespace = (char *)aip->ai_varbufend - (char *)varbufptr;
+	/*
+	 * Mac OS X: non-ascii names are UTF-8 NFC on disk
+	 * so convert to NFD before exporting them.
+	 */
+
+	if (zfs_vnop_force_formd_normalized_output &&
+	    !is_ascii_str(name))
+		force_formd_normalized_output = 1;
+	else
+		force_formd_normalized_output = 0;
+
+	namelen = strlen(name);
+	if (!force_formd_normalized_output ||
+	    utf8_normalizestr((const u_int8_t *)name, namelen,
+	    (u_int8_t *)varbufptr, &nfdlen,
+	    freespace, UTF_DECOMPOSED) != 0) {
+		/* ASCII or normalization failed, just copy zap name. */
+		strncpy((char *)varbufptr, name, MIN(freespace, namelen+1));
+	} else {
+		/* Normalization succeeded (already in buffer). */
+		namelen = nfdlen;
+	}
+	attrlen = namelen + 1;
+	attr_refptr->attr_dataoffset = (char *)varbufptr - (char *)attr_refptr;
+	attr_refptr->attr_length = attrlen;
+	/*
+	 * Advance beyond the space just allocated and
+	 * round up to the next 4-byte boundary:
+	 */
+	varbufptr = ((char *)varbufptr) + attrlen + ((4 - (attrlen & 3)) & 3);
+	++attr_refptr;
+
+	*aip->ai_attrbufpp = attr_refptr;
+	*aip->ai_varbufpp = varbufptr;
+}
+
+int
+getpackedsize(struct attrlist *alp, boolean_t user64)
+{
+	attrgroup_t attrs;
+	int timespecsize;
+	int size = 0;
+
+	timespecsize = user64 ? sizeof (timespec_user64_t) :
+	    sizeof (timespec_user32_t);
+
+	if ((attrs = alp->commonattr) != 0) {
+		if (attrs & ATTR_CMN_NAME)
+			size += sizeof (struct attrreference);
+		if (attrs & ATTR_CMN_DEVID)
+			size += sizeof (dev_t);
+		if (attrs & ATTR_CMN_FSID)
+			size += sizeof (fsid_t);
+		if (attrs & ATTR_CMN_OBJTYPE)
+			size += sizeof (fsobj_type_t);
+		if (attrs & ATTR_CMN_OBJTAG)
+			size += sizeof (fsobj_tag_t);
+		if (attrs & ATTR_CMN_OBJID)
+			size += sizeof (fsobj_id_t);
+		if (attrs & ATTR_CMN_OBJPERMANENTID)
+			size += sizeof (fsobj_id_t);
+		if (attrs & ATTR_CMN_PAROBJID)
+			size += sizeof (fsobj_id_t);
+		if (attrs & ATTR_CMN_SCRIPT)
+			size += sizeof (text_encoding_t);
+		if (attrs & ATTR_CMN_CRTIME)
+			size += timespecsize;
+		if (attrs & ATTR_CMN_MODTIME)
+			size += timespecsize;
+		if (attrs & ATTR_CMN_CHGTIME)
+			size += timespecsize;
+		if (attrs & ATTR_CMN_ACCTIME)
+			size += timespecsize;
+		if (attrs & ATTR_CMN_BKUPTIME)
+			size += timespecsize;
+		if (attrs & ATTR_CMN_FNDRINFO)
+			size += 32 * sizeof (u_int8_t);
+		if (attrs & ATTR_CMN_OWNERID)
+			size += sizeof (uid_t);
+		if (attrs & ATTR_CMN_GRPID)
+			size += sizeof (gid_t);
+		if (attrs & ATTR_CMN_ACCESSMASK)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_CMN_FLAGS)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_CMN_USERACCESS)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_CMN_FILEID)
+			size += sizeof (u_int64_t);
+		if (attrs & ATTR_CMN_PARENTID)
+			size += sizeof (u_int64_t);
+		/*
+		 * Also add:
+		 * ATTR_CMN_GEN_COUNT         (|FSOPT_ATTR_CMN_EXTENDED)
+		 * ATTR_CMN_DOCUMENT_ID       (|FSOPT_ATTR_CMN_EXTENDED)
+		 * ATTR_CMN_EXTENDED_SECURITY
+		 * ATTR_CMN_UUID
+		 * ATTR_CMN_GRPUUID
+		 * ATTR_CMN_FULLPATH
+		 * ATTR_CMN_ADDEDTIME
+		 * ATTR_CMN_ERROR
+		 * ATTR_CMN_DATA_PROTECT_FLAGS
+		 */
+	}
+	if ((attrs = alp->dirattr) != 0) {
+		if (attrs & ATTR_DIR_LINKCOUNT)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_DIR_ENTRYCOUNT)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_DIR_MOUNTSTATUS)
+			size += sizeof (u_int32_t);
+	}
+	if ((attrs = alp->fileattr) != 0) {
+		if (attrs & ATTR_FILE_LINKCOUNT)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_FILE_TOTALSIZE)
+			size += sizeof (off_t);
+		if (attrs & ATTR_FILE_ALLOCSIZE)
+			size += sizeof (off_t);
+		if (attrs & ATTR_FILE_IOBLOCKSIZE)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_FILE_DEVTYPE)
+			size += sizeof (u_int32_t);
+		if (attrs & ATTR_FILE_DATALENGTH)
+			size += sizeof (off_t);
+		if (attrs & ATTR_FILE_DATAALLOCSIZE)
+			size += sizeof (off_t);
+		if (attrs & ATTR_FILE_RSRCLENGTH)
+			size += sizeof (off_t);
+		if (attrs & ATTR_FILE_RSRCALLOCSIZE)
+			size += sizeof (off_t);
+	}
+	return (size);
+}
+
+
+void
+getfinderinfo(znode_t *zp, cred_t *cr, finderinfo_t *fip)
+{
+	znode_t	*xdzp = NULL;
+	znode_t	*xzp = NULL;
+	struct uio		*auio = NULL;
+	struct componentname  cn = { 0 };
+	int		error;
+	uint64_t xattr = 0;
+	char *name = NULL;
+
+	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zp->z_zfsvfs),
+	    &xattr, sizeof (xattr)) ||
+	    (xattr == 0)) {
+		goto nodata;
+	}
+
+	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+	if (auio == NULL) {
+		goto nodata;
+	}
+	uio_addiov(auio, CAST_USER_ADDR_T(fip), sizeof (finderinfo_t));
+
+	/*
+	 * Grab the hidden attribute directory vnode.
+	 *
+	 * XXX - switch to embedded Finder Info when it becomes available
+	 */
+	if ((error = zfs_get_xattrdir(zp, &xdzp, cr, 0))) {
+		goto out;
+	}
+
+	name = spa_strdup(XATTR_FINDERINFO_NAME);
+	cn.cn_namelen = strlen(name)+1;
+	cn.cn_nameptr = kmem_zalloc(cn.cn_namelen, KM_SLEEP);
+
+	if ((error = zfs_dirlook(xdzp, name, &xzp, 0, NULL, &cn))) {
+		goto out;
+	}
+	error = dmu_read_uio(zp->z_zfsvfs->z_os, xzp->z_id, auio,
+	    sizeof (finderinfo_t));
+out:
+	if (name)
+		spa_strfree(name);
+	if (cn.cn_nameptr)
+		kmem_free(cn.cn_nameptr, cn.cn_namelen);
+	if (auio)
+		uio_free(auio);
+	if (xzp)
+		zrele(xzp);
+	if (xdzp)
+		zrele(xdzp);
+	if (error == 0)
+		return;
+nodata:
+	bzero(fip, sizeof (finderinfo_t));
+}
+
+#define	KAUTH_DIR_WRITE (KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE |	\
+    KAUTH_VNODE_ADD_SUBDIRECTORY |	\
+    KAUTH_VNODE_DELETE_CHILD)
+
+#define	KAUTH_DIR_READ	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY)
+
+#define	KAUTH_DIR_EXECUTE	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH)
+
+#define	KAUTH_FILE_WRITE	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA)
+
+#define	KAUTH_FILE_READ		(KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA)
+
+#define	KAUTH_FILE_EXECUTE	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE)
+
+/*
+ * Compute the same user access value as getattrlist(2)
+ */
+u_int32_t
+getuseraccess(znode_t *zp, vfs_context_t ctx)
+{
+	vnode_t	*vp;
+	u_int32_t	user_access = 0;
+	zfs_acl_phys_t acl_phys;
+	int error;
+	/* Only take the expensive vnode_authorize path when we have an ACL */
+
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+	    &acl_phys, sizeof (acl_phys));
+
+	if (error || acl_phys.z_acl_count == 0) {
+		kauth_cred_t	cred = vfs_context_ucred(ctx);
+		uint64_t		obj_uid;
+		uint64_t    	obj_mode;
+
+		/* User id 0 (root) always gets access. */
+		if (!vfs_context_suser(ctx)) {
+			return (R_OK | W_OK | X_OK);
+		}
+
+		sa_lookup(zp->z_sa_hdl, SA_ZPL_UID(zp->z_zfsvfs),
+		    &obj_uid, sizeof (obj_uid));
+		sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zp->z_zfsvfs),
+		    &obj_mode, sizeof (obj_mode));
+
+		// obj_uid = pzp->zp_uid;
+		obj_mode = obj_mode & MODEMASK;
+		if (obj_uid == UNKNOWNUID) {
+			obj_uid = kauth_cred_getuid(cred);
+		}
+		if ((obj_uid == kauth_cred_getuid(cred)) ||
+		    (obj_uid == UNKNOWNUID)) {
+			return (((u_int32_t)obj_mode & S_IRWXU) >> 6);
+		}
+		/* Otherwise, settle for 'others' access. */
+		return ((u_int32_t)obj_mode & S_IRWXO);
+	}
+	vp = ZTOV(zp);
+	if (vnode_isdir(vp)) {
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_WRITE, ctx) == 0)
+			user_access |= W_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_READ, ctx) == 0)
+			user_access |= R_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_EXECUTE, ctx) == 0)
+			user_access |= X_OK;
+	} else {
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_WRITE, ctx) == 0)
+			user_access |= W_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_READ, ctx) == 0)
+			user_access |= R_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_EXECUTE, ctx) == 0)
+			user_access |= X_OK;
+	}
+	return (user_access);
+}
+
+
+
+static unsigned char fingerprint[] = {0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef,
+    0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef};
+
+/*
+ * Convert "Well Known" GUID to enum type.
+ */
+int
+kauth_wellknown_guid(guid_t *guid)
+{
+	uint32_t last = 0;
+
+	if (memcmp(fingerprint, guid->g_guid, sizeof (fingerprint)))
+		return (KAUTH_WKG_NOT);
+
+	last = BE_32(*((u_int32_t *)&guid->g_guid[12]));
+
+	switch (last) {
+		case 0x0c:
+			return (KAUTH_WKG_EVERYBODY);
+		case 0x0a:
+			return (KAUTH_WKG_OWNER);
+		case 0x10:
+			return (KAUTH_WKG_GROUP);
+		case 0xFFFFFFFE:
+			return (KAUTH_WKG_NOBODY);
+	}
+
+	return (KAUTH_WKG_NOT);
+}
+
+
+/*
+ * Set GUID to "well known" guid, based on enum type
+ */
+void
+nfsacl_set_wellknown(int wkg, guid_t *guid)
+{
+	/*
+	 * All WKGs begin with the same 12 bytes.
+	 */
+	bcopy(fingerprint, (void *)guid, 12);
+	/*
+	 * The final 4 bytes are our code (in network byte order).
+	 */
+	switch (wkg) {
+		case 4:
+			*((u_int32_t *)&guid->g_guid[12]) = BE_32(0x0000000c);
+			break;
+		case 3:
+			*((u_int32_t *)&guid->g_guid[12]) = BE_32(0xfffffffe);
+			break;
+		case 1:
+			*((u_int32_t *)&guid->g_guid[12]) = BE_32(0x0000000a);
+			break;
+		case 2:
+			*((u_int32_t *)&guid->g_guid[12]) = BE_32(0x00000010);
+	};
+}
+
+
+/*
+ * Convert Darwin ACL list, into ZFS ACL "aces" list.
+ */
+void
+aces_from_acl(ace_t *aces, int *nentries, struct kauth_acl *k_acl,
+    int *seen_type)
+{
+	int i;
+	ace_t *ace;
+	guid_t *guidp;
+	kauth_ace_rights_t  ace_rights;
+	uid_t  who;
+	uint32_t  mask = 0;
+	uint16_t  flags = 0;
+	uint16_t  type = 0;
+	u_int32_t  ace_flags;
+	int wkg;
+	int err = 0;
+
+	*nentries = k_acl->acl_entrycount;
+
+	// bzero(aces, sizeof (*aces) * *nentries);
+
+	// *nentries = aclp->acl_cnt;
+
+	for (i = 0; i < *nentries; i++) {
+		// entry = &(aclp->acl_entry[i]);
+
+		flags = 0;
+		mask  = 0;
+
+		ace = &(aces[i]);
+
+		/* Note Mac OS X GUID is a 128-bit identifier */
+		guidp = &k_acl->acl_ace[i].ace_applicable;
+
+		who = -1;
+		wkg = kauth_wellknown_guid(guidp);
+
+		switch (wkg) {
+			case KAUTH_WKG_OWNER:
+				flags |= ACE_OWNER;
+				if (seen_type) *seen_type |= ACE_OWNER;
+				break;
+			case KAUTH_WKG_GROUP:
+				flags |= ACE_GROUP|ACE_IDENTIFIER_GROUP;
+				if (seen_type) *seen_type |= ACE_GROUP;
+				break;
+			case KAUTH_WKG_EVERYBODY:
+				flags |= ACE_EVERYONE;
+				if (seen_type) *seen_type |= ACE_EVERYONE;
+				break;
+
+			case KAUTH_WKG_NOBODY:
+			default:
+				/* Try to get a uid from supplied guid */
+				err = kauth_cred_guid2uid(guidp, &who);
+				if (err) {
+					err = kauth_cred_guid2gid(guidp, &who);
+					if (!err) {
+						flags |= ACE_IDENTIFIER_GROUP;
+					}
+				}
+				if (err) {
+					*nentries = 0;
+					return;
+				}
+
+		} // switch
+
+		ace->a_who = who;
+
+		ace_rights = k_acl->acl_ace[i].ace_rights;
+		if (ace_rights & KAUTH_VNODE_READ_DATA)
+			mask |= ACE_READ_DATA;
+		if (ace_rights & KAUTH_VNODE_WRITE_DATA)
+			mask |= ACE_WRITE_DATA;
+		if (ace_rights & KAUTH_VNODE_APPEND_DATA)
+			mask |= ACE_APPEND_DATA;
+		if (ace_rights & KAUTH_VNODE_READ_EXTATTRIBUTES)
+			mask |= ACE_READ_NAMED_ATTRS;
+		if (ace_rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES)
+			mask |= ACE_WRITE_NAMED_ATTRS;
+		if (ace_rights & KAUTH_VNODE_EXECUTE)
+			mask |= ACE_EXECUTE;
+		if (ace_rights & KAUTH_VNODE_DELETE_CHILD)
+			mask |= ACE_DELETE_CHILD;
+		if (ace_rights & KAUTH_VNODE_READ_ATTRIBUTES)
+			mask |= ACE_READ_ATTRIBUTES;
+		if (ace_rights & KAUTH_VNODE_WRITE_ATTRIBUTES)
+			mask |= ACE_WRITE_ATTRIBUTES;
+		if (ace_rights & KAUTH_VNODE_DELETE)
+			mask |= ACE_DELETE;
+		if (ace_rights & KAUTH_VNODE_READ_SECURITY)
+			mask |= ACE_READ_ACL;
+		if (ace_rights & KAUTH_VNODE_WRITE_SECURITY)
+			mask |= ACE_WRITE_ACL;
+		if (ace_rights & KAUTH_VNODE_TAKE_OWNERSHIP)
+			mask |= ACE_WRITE_OWNER;
+		if (ace_rights & KAUTH_VNODE_SYNCHRONIZE)
+			mask |= ACE_SYNCHRONIZE;
+		ace->a_access_mask = mask;
+
+		ace_flags = k_acl->acl_ace[i].ace_flags;
+		if (ace_flags & KAUTH_ACE_FILE_INHERIT)
+			flags |= ACE_FILE_INHERIT_ACE;
+		if (ace_flags & KAUTH_ACE_DIRECTORY_INHERIT)
+			flags |= ACE_DIRECTORY_INHERIT_ACE;
+		if (ace_flags & KAUTH_ACE_LIMIT_INHERIT)
+			flags |= ACE_NO_PROPAGATE_INHERIT_ACE;
+		if (ace_flags & KAUTH_ACE_ONLY_INHERIT)
+			flags |= ACE_INHERIT_ONLY_ACE;
+		ace->a_flags = flags;
+
+		switch (ace_flags & KAUTH_ACE_KINDMASK) {
+			case KAUTH_ACE_PERMIT:
+				type = ACE_ACCESS_ALLOWED_ACE_TYPE;
+				break;
+			case KAUTH_ACE_DENY:
+				type = ACE_ACCESS_DENIED_ACE_TYPE;
+				break;
+			case KAUTH_ACE_AUDIT:
+				type = ACE_SYSTEM_AUDIT_ACE_TYPE;
+				break;
+			case KAUTH_ACE_ALARM:
+				type = ACE_SYSTEM_ALARM_ACE_TYPE;
+				break;
+		}
+		ace->a_type = type;
+		dprintf("  ACL: %d type %04x, mask %04x, flags %04x, who %d\n",
+		    i, type, mask, flags, who);
+	}
+
+}
+
+void
+finderinfo_update(uint8_t *finderinfo, znode_t *zp)
+{
+	u_int8_t *finfo = NULL;
+	struct timespec va_crtime;
+
+	/* Advance finfo by 16 bytes to the 2nd half of the finderinfo */
+	finfo = (u_int8_t *)finderinfo + 16;
+
+	/* Don't expose a symlink's private type/creator. */
+	if (IFTOVT((mode_t)zp->z_mode) == VLNK) {
+		struct FndrFileInfo *fip;
+
+		fip = (struct FndrFileInfo *)finderinfo;
+		fip->fdType = 0;
+		fip->fdCreator = 0;
+	}
+
+	/* hfs_xattr.c hfs_zero_hidden_fields() */
+	if ((IFTOVT((mode_t)zp->z_mode) == VREG) ||
+	    (IFTOVT((mode_t)zp->z_mode) == VLNK)) {
+		struct FndrExtendedFileInfo *extinfo =
+		    (struct FndrExtendedFileInfo *)finfo;
+		extinfo->document_id = 0;
+		extinfo->date_added = 0;
+		extinfo->write_gen_counter = 0;
+	}
+
+	if (IFTOVT((mode_t)zp->z_mode) == VDIR) {
+		struct FndrExtendedDirInfo *extinfo =
+		    (struct FndrExtendedDirInfo *)finfo;
+		extinfo->document_id = 0;
+		extinfo->date_added = 0;
+		extinfo->write_gen_counter = 0;
+	}
+
+}
+
+
+
+int
+zpl_xattr_set_sa(struct vnode *vp, const char *name, const void *value,
+    size_t size, int flags, cred_t *cr)
+{
+	znode_t *zp = VTOZ(vp);
+	nvlist_t *nvl;
+	size_t sa_size;
+	int error;
+
+	ASSERT(zp->z_xattr_cached);
+	nvl = zp->z_xattr_cached;
+
+	if (value == NULL) {
+		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
+		if (error == -ENOENT)
+			return (error);
+		// error = zpl_xattr_set_dir(vp, name, NULL, 0, flags, cr);
+	} else {
+		/* Limited to 32k to keep nvpair memory allocations small */
+		if (size > DXATTR_MAX_ENTRY_SIZE)
+			return (-EFBIG);
+
+		/* Prevent the DXATTR SA from consuming the entire SA region */
+		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
+		if (error)
+			return (error);
+
+		if (sa_size > DXATTR_MAX_SA_SIZE)
+			return (-EFBIG);
+		error = -nvlist_add_byte_array(nvl, name,
+		    (uchar_t *)value, size);
+		if (error)
+			return (error);
+	}
+
+	/* Update the SA for additions, modifications, and removals. */
+	if (!error)
+		error = -zfs_sa_set_xattr(zp);
+
+	ASSERT3S(error, <=, 0);
+
+	return (error);
+}
+
+int
+zpl_xattr_get_sa(struct vnode *vp, const char *name, void *value, size_t size)
+{
+	znode_t *zp = VTOZ(vp);
+	uchar_t *nv_value;
+	uint_t nv_size;
+	int error = 0;
+
+#ifdef __LINUX__
+	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
+#endif
+
+	mutex_enter(&zp->z_lock);
+	if (zp->z_xattr_cached == NULL)
+		error = -zfs_sa_get_xattr(zp);
+	mutex_exit(&zp->z_lock);
+
+	if (error)
+		return (error);
+
+	ASSERT(zp->z_xattr_cached);
+	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
+	    &nv_value, &nv_size);
+	if (error)
+		return (error);
+
+	if (!size)
+		return (nv_size);
+	if (size < nv_size)
+		return (-ERANGE);
+
+	memcpy(value, nv_value, nv_size);
+
+	return (nv_size);
+}
+
+
+
+/*
+ * Document ID. Persistant IDs that can survive "safe saving".
+ * 'revisiond' appears to use fchflags(UF_TRACKED) on files/dirs
+ * that it wishes to use DocumentIDs with. Here, we will lookup
+ * if an entry already has a DocumentID stored in SA, but if not,
+ * hash the DocumentID for (PARENTID + filename) and return it.
+ * In vnop_setattr for UF_TRACKED, we will store the DocumentID to
+ * disk.
+ * Although it is not entirely clear which situations we should handle
+ * we do handle:
+ *
+ * Case 1:
+ *   "file.txt" gets chflag(UF_TRACKED) and DocumentID set.
+ *   "file.txt" is renamed to "file.tmp". DocumentID is kept.
+ *   "file.txt" is re-created, DocumentID remains same, but not saved.
+ *
+ * Case 2:
+ *   "file.txt" gets chflag(UF_TRACKED) and DocumentID set.
+ *   "file.txt" is moved to another directory. DocumentID is kept.
+ *
+ * It is interesting to note that HFS+ has "tombstones" which is
+ * created when a UF_TRACKED entry is unlinked, or, renamed.
+ * Then if a new entry is created with same PARENT+name, and matching
+ * tombstone is found, will inherit the DocumentID, and UF_TRACKED flag.
+ *
+ * We may need to implement this as well.
+ *
+ * If "name" or "parent" is known, pass it along, or it needs to look it up.
+ *
+ */
+void
+zfs_setattr_generate_id(znode_t *zp, uint64_t val, char *name)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	char *nameptr = NULL;
+	char *filename = NULL;
+	uint64_t parent = val;
+	int error = 0;
+	uint64_t docid = 0;
+
+	if (!zp->z_document_id && zp->z_sa_hdl) {
+
+		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DOCUMENTID(zfsvfs),
+		    &docid, sizeof (docid));
+		if (!error && docid) {
+			zp->z_document_id = docid;
+			return;
+		}
+
+		/* Have name? */
+		if (name && *name) {
+			nameptr = name;
+		} else {
+			/* Do we have parent? */
+			if (!parent) {
+				VERIFY(sa_lookup(zp->z_sa_hdl,
+				    SA_ZPL_PARENT(zfsvfs), &parent,
+				    sizeof (parent)) == 0);
+			}
+			/* Lookup filename */
+			filename = kmem_zalloc(MAXPATHLEN + 2, KM_SLEEP);
+			if (zap_value_search(zfsvfs->z_os, parent, zp->z_id,
+			    ZFS_DIRENT_OBJ(-1ULL), filename) == 0) {
+
+				nameptr = filename;
+				// Might as well keep this name too.
+				strlcpy(zp->z_name_cache, filename,
+				    MAXPATHLEN);
+			}
+		}
+
+		zp->z_document_id = fnv_32a_buf(&parent, sizeof (parent),
+		    FNV1_32A_INIT);
+		if (nameptr)
+			zp->z_document_id =
+			    fnv_32a_str(nameptr, zp->z_document_id);
+
+		if (filename)
+			kmem_free(filename, MAXPATHLEN + 2);
+	} // !document_id
+}
+
+/*
+ * setattr asked for UF_TRACKED to be set, which means we will make sure
+ * we have a hash made (includes getting filename) and stored in SA.
+ */
+int
+zfs_setattr_set_documentid(znode_t *zp, boolean_t update_flags)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error = 0;
+	dmu_tx_t *tx;
+	int count = 0;
+	sa_bulk_attr_t  bulk[2];
+
+	dprintf("ZFS: vnop_setattr(UF_TRACKED) obj %llu : documentid %08u\n",
+	    zp->z_id,
+	    zp->z_document_id);
+
+	/* Write the new documentid to SA */
+	if ((zfsvfs->z_use_sa == B_TRUE) &&
+	    !vfs_isrdonly(zfsvfs->z_vfs) &&
+	    spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
+
+		uint64_t docid = zp->z_document_id;  // 32->64
+
+		if (update_flags == B_TRUE) {
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+			    NULL, &zp->z_pflags, 8);
+		}
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DOCUMENTID(zfsvfs), NULL,
+		    &docid, sizeof (docid));
+
+		tx = dmu_tx_create(zfsvfs->z_os);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+		} else {
+			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+			dmu_tx_commit(tx);
+		}
+
+		if (error)
+			dprintf("ZFS: sa_update(SA_ZPL_DOCUMENTID) failed %d\n",
+			    error);
+
+	} // if z_use_sa && !readonly
+
+	return (error);
+}
+
+int
+zfs_hardlink_addmap(znode_t *zp, uint64_t parentid, uint32_t linkid)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	hardlinks_t *searchnode, *findnode;
+	avl_index_t loc;
+
+	searchnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+	searchnode->hl_parent = parentid;
+	searchnode->hl_fileid = zp->z_id;
+	strlcpy(searchnode->hl_name, zp->z_name_cache, PATH_MAX);
+
+	rw_enter(&zfsvfs->z_hardlinks_lock, RW_WRITER);
+	findnode = avl_find(&zfsvfs->z_hardlinks, searchnode, &loc);
+	kmem_free(searchnode, sizeof (hardlinks_t));
+	if (!findnode) {
+		// Add hash entry
+		zp->z_finder_hardlink = TRUE;
+		findnode = kmem_alloc(sizeof (hardlinks_t), KM_SLEEP);
+
+		findnode->hl_parent = parentid;
+		findnode->hl_fileid = zp->z_id;
+		strlcpy(findnode->hl_name, zp->z_name_cache, PATH_MAX);
+
+		findnode->hl_linkid = linkid;
+
+		avl_add(&zfsvfs->z_hardlinks, findnode);
+		avl_add(&zfsvfs->z_hardlinks_linkid, findnode);
+		dprintf("ZFS: Inserted new hardlink node (%llu,%llu,'%s') "
+		    "<-> (%x,%u)\n",
+		    findnode->hl_parent,
+		    findnode->hl_fileid, findnode->hl_name,
+		    findnode->hl_linkid, findnode->hl_linkid);
+	}
+	rw_exit(&zfsvfs->z_hardlinks_lock);
+
+	return (findnode ? 1 : 0);
+}
+
+/* dst buffer must be at least UUID_PRINTABLE_STRING_LENGTH bytes */
+
+int
+zfs_vfs_uuid_unparse(uuid_t uuid, char *dst)
+{
+	if (!uuid || !dst) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	snprintf(dst, UUID_PRINTABLE_STRING_LENGTH, "%02X%02X%02X%02X-"
+	    "%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
+	    uuid[0], uuid[1], uuid[2], uuid[3],
+	    uuid[4], uuid[5], uuid[6], uuid[7],
+	    uuid[8], uuid[9], uuid[10], uuid[11],
+	    uuid[12], uuid[13], uuid[14], uuid[15]);
+
+	return (0);
+}
+
+int
+zfs_vfs_uuid_gen(const char *osname, uuid_t uuid)
+{
+	MD5_CTX  md5c;
+	/* namespace (generated by uuidgen) */
+	/* 50670853-FBD2-4EC3-9802-73D847BF7E62 */
+	char namespace[16] = {0x50, 0x67, 0x08, 0x53, /* - */
+	    0xfb, 0xd2, /* - */ 0x4e, 0xc3, /* - */
+	    0x98, 0x02, /* - */
+	    0x73, 0xd8, 0x47, 0xbf, 0x7e, 0x62};
+
+	/* Validate arguments */
+	if (!osname || !uuid || strlen(osname) == 0) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	/*
+	 * UUID version 3 (MD5) namespace variant:
+	 * hash namespace (uuid) together with name
+	 */
+	MD5Init(&md5c);
+	MD5Update(&md5c, &namespace, sizeof (namespace));
+	MD5Update(&md5c, osname, strlen(osname));
+	MD5Final(uuid, &md5c);
+
+	/*
+	 * To make UUID version 3, twiddle a few bits:
+	 * xxxxxxxx-xxxx-Mxxx-Nxxx-xxxxxxxxxxxx
+	 * [uint32]-[uin-t32]-[uin-t32][uint32]
+	 * M should be 0x3 to indicate uuid v3
+	 * N should be 0x8, 0x9, 0xa, or 0xb
+	 */
+	uuid[6] = (uuid[6] & 0x0F) | 0x30;
+	uuid[8] = (uuid[8] & 0x3F) | 0x80;
+
+	/* Print all caps */
+	// dprintf("%s UUIDgen: [%s](%ld)->"
+	dprintf("%s UUIDgen: [%s](%ld) -> "
+	    "[%02X%02X%02X%02X-%02X%02X-%02X%02X-"
+	    "%02X%02X-%02X%02X%02X%02X%02X%02X]\n",
+	    __func__, osname, strlen(osname),
+	    uuid[0], uuid[1], uuid[2], uuid[3],
+	    uuid[4], uuid[5], uuid[6], uuid[7],
+	    uuid[8], uuid[9], uuid[10], uuid[11],
+	    uuid[12], uuid[13], uuid[14], uuid[15]);
+
+	return (0);
+}
+
+int
+uio_prefaultpages(ssize_t n, struct uio *uio)
+{
+	return (0);
+}
+
+/* No #pragma weaks here! */
+void
+dmu_buf_add_ref(dmu_buf_t *db, void *tag)
+{
+	dbuf_add_ref((dmu_buf_impl_t *)db, tag);
+}
+
+boolean_t
+dmu_buf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t object,
+    uint64_t blkid, void *tag)
+{
+	return (dbuf_try_add_ref(db, os, object, blkid, tag));
+}
diff --git a/module/os/macos/zfs/zfs_znode.c b/module/os/macos/zfs/zfs_znode.c
new file mode 100644
index 0000000000..f890035a89
--- /dev/null
+++ b/module/os/macos/zfs/zfs_znode.c
@@ -0,0 +1,2351 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Portions Copyright 2007-2009 Apple Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ */
+
+/* Portions Copyright 2007 Jeremy Teo */
+/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
+/* Portions Copyright 2013 Jorgen Lundman <lundman@lundman.net> */
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/resource.h>
+#include <sys/mntent.h>
+#include <sys/u8_textprep.h>
+#include <sys/dsl_dataset.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/unistd.h>
+#include <sys/atomic.h>
+#include <sys/dbuf.h>
+#include <sys/zfs_dir.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_rlock.h>
+#include <sys/zfs_fuid.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_vnops.h>
+#endif /* _KERNEL */
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
+#include <sys/refcount.h>
+#include <sys/stat.h>
+#include <sys/zap.h>
+#include <sys/zfs_znode.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
+#include <sys/zfs_stat.h>
+
+#include "zfs_prop.h"
+#include "zfs_comutil.h"
+
+/* Used by fstat(1). */
+#ifndef __APPLE__
+SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof (znode_t),
+			"sizeof (znode_t)");
+#endif
+void
+zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag);
+
+
+// #define	dprintf printf
+
+
+/*
+ * Functions needed for userland (ie: libzpool) are not put under
+ * #ifdef_KERNEL; the rest of the functions have dependencies
+ * (such as VFS logic) that will not compile easily in userland.
+ */
+#ifdef _KERNEL
+/*
+ * This is used by the test suite so that it can delay znodes from being
+ * freed in order to inspect the unlinked set.
+ */
+int zfs_unlink_suspend_progress = 0;
+
+/*
+ * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
+ * z_rangelock. It will modify the offset and length of the lock to reflect
+ * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
+ * called with the rangelock_t's rl_lock held, which avoids races.
+ */
+
+kmem_cache_t *znode_cache = NULL;
+static kmem_cache_t *znode_hold_cache = NULL;
+unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
+
+/*
+ * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
+ * z_rangelock. It will modify the offset and length of the lock to reflect
+ * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
+ * called with the rangelock_t's rl_lock held, which avoids races.
+ */
+static void
+zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
+{
+	znode_t *zp = arg;
+
+	/*
+	 * If in append mode, convert to writer and lock starting at the
+	 * current end of file.
+	 */
+	if (new->lr_type == RL_APPEND) {
+		new->lr_offset = zp->z_size;
+		new->lr_type = RL_WRITER;
+	}
+
+	/*
+	 * If we need to grow the block size then lock the whole file range.
+	 */
+	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
+	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
+	    zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
+		new->lr_offset = 0;
+		new->lr_length = UINT64_MAX;
+	}
+}
+
+/*ARGSUSED*/
+#if 0 // unused function
+static void
+znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
+{
+	/*
+	 * We should never drop all dbuf refs without first clearing
+	 * the eviction callback.
+	 */
+	panic("evicting znode %p\n", user_ptr);
+}
+#endif
+
+extern struct vop_vector zfs_vnodeops;
+extern struct vop_vector zfs_fifoops;
+extern struct vop_vector zfs_shareops;
+
+/*
+ * XXX: We cannot use this function as a cache constructor, because
+ *      there is one global cache for all file systems and we need
+ *      to pass vfsp here, which is not possible, because argument
+ *      'cdrarg' is defined at kmem_cache_create() time.
+ */
+/*ARGSUSED*/
+static int
+zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
+{
+	znode_t *zp = buf;
+
+	list_link_init(&zp->z_link_node);
+
+	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
+	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
+
+	mutex_init(&zp->z_attach_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&zp->z_attach_cv, NULL, CV_DEFAULT, NULL);
+
+	zp->z_dirlocks = NULL;
+	zp->z_acl_cached = NULL;
+	zp->z_xattr_cached = NULL;
+	zp->z_xattr_parent = 0;
+	zp->z_moved = 0;
+	zp->z_skip_truncate_undo_decmpfs = B_FALSE;
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+zfs_znode_cache_destructor(void *buf, void *arg)
+{
+	znode_t *zp = buf;
+
+	ASSERT(ZTOV(zp) == NULL);
+	ASSERT(!list_link_active(&zp->z_link_node));
+	mutex_destroy(&zp->z_lock);
+	rw_destroy(&zp->z_map_lock);
+	rw_destroy(&zp->z_parent_lock);
+	rw_destroy(&zp->z_name_lock);
+	mutex_destroy(&zp->z_acl_lock);
+	rw_destroy(&zp->z_xattr_lock);
+	zfs_rangelock_fini(&zp->z_rangelock);
+	mutex_destroy(&zp->z_attach_lock);
+	cv_destroy(&zp->z_attach_cv);
+
+	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT(zp->z_acl_cached == NULL);
+	ASSERT(zp->z_xattr_cached == NULL);
+}
+
+static int
+zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
+{
+	znode_hold_t *zh = buf;
+
+	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
+	zfs_refcount_create(&zh->zh_refcount);
+	zh->zh_obj = ZFS_NO_OBJECT;
+
+	return (0);
+}
+
+static void
+zfs_znode_hold_cache_destructor(void *buf, void *arg)
+{
+	znode_hold_t *zh = buf;
+
+	mutex_destroy(&zh->zh_lock);
+	zfs_refcount_destroy(&zh->zh_refcount);
+}
+
+void
+zfs_znode_init(void)
+{
+	/*
+	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
+	 * backed by kmalloc() when on the Linux slab in order that any
+	 * wait_on_bit() operations on the related inode operate properly.
+	 */
+	ASSERT(znode_cache == NULL);
+	znode_cache = kmem_cache_create("zfs_znode_cache",
+	    sizeof (znode_t), 0,
+	    zfs_znode_cache_constructor,
+	    zfs_znode_cache_destructor, NULL, NULL,
+	    NULL, 0);
+
+	ASSERT(znode_hold_cache == NULL);
+	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
+	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
+	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+zfs_znode_fini(void)
+{
+	/*
+	 * Cleanup zcache
+	 */
+	if (znode_cache)
+		kmem_cache_destroy(znode_cache);
+	znode_cache = NULL;
+
+	if (znode_hold_cache)
+		kmem_cache_destroy(znode_hold_cache);
+	znode_hold_cache = NULL;
+}
+
+/*
+ * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
+ * serialize access to a znode and its SA buffer while the object is being
+ * created or destroyed.  This kind of locking would normally reside in the
+ * znode itself but in this case that's impossible because the znode and SA
+ * buffer may not yet exist.  Therefore the locking is handled externally
+ * with an array of mutexs and AVLs trees which contain per-object locks.
+ *
+ * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
+ * in to the correct AVL tree and finally the per-object lock is held.  In
+ * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
+ * released, removed from the AVL tree and destroyed if there are no waiters.
+ *
+ * This scheme has two important properties:
+ *
+ * 1) No memory allocations are performed while holding one of the z_hold_locks.
+ *    This ensures evict(), which can be called from direct memory reclaim, will
+ *    never block waiting on a z_hold_locks which just happens to have hashed
+ *    to the same index.
+ *
+ * 2) All locks used to serialize access to an object are per-object and never
+ *    shared.  This minimizes lock contention without creating a large number
+ *    of dedicated locks.
+ *
+ * On the downside it does require znode_lock_t structures to be frequently
+ * allocated and freed.  However, because these are backed by a kmem cache
+ * and very short lived this cost is minimal.
+ */
+int
+zfs_znode_hold_compare(const void *a, const void *b)
+{
+	const znode_hold_t *zh_a = (const znode_hold_t *)a;
+	const znode_hold_t *zh_b = (const znode_hold_t *)b;
+
+	return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj));
+}
+
+boolean_t
+zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
+{
+	znode_hold_t *zh, search;
+	int i = ZFS_OBJ_HASH(zfsvfs, obj);
+	boolean_t held;
+
+	search.zh_obj = obj;
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
+	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	return (held);
+}
+
+static znode_hold_t *
+zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
+{
+	znode_hold_t *zh, *zh_new, search;
+	int i = ZFS_OBJ_HASH(zfsvfs, obj);
+	boolean_t found = B_FALSE;
+
+	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
+	zh_new->zh_obj = obj;
+	search.zh_obj = obj;
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
+	if (likely(zh == NULL)) {
+		zh = zh_new;
+		avl_add(&zfsvfs->z_hold_trees[i], zh);
+	} else {
+		ASSERT3U(zh->zh_obj, ==, obj);
+		found = B_TRUE;
+	}
+	zfs_refcount_add(&zh->zh_refcount, NULL);
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	if (found == B_TRUE)
+		kmem_cache_free(znode_hold_cache, zh_new);
+
+	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
+	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
+	mutex_enter(&zh->zh_lock);
+
+	return (zh);
+}
+
+static void
+zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
+{
+	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
+	boolean_t remove = B_FALSE;
+
+	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
+	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
+	mutex_exit(&zh->zh_lock);
+
+	mutex_enter(&zfsvfs->z_hold_locks[i]);
+	if (zfs_refcount_remove(&zh->zh_refcount, NULL) == 0) {
+		avl_remove(&zfsvfs->z_hold_trees[i], zh);
+		remove = B_TRUE;
+	}
+	mutex_exit(&zfsvfs->z_hold_locks[i]);
+
+	if (remove == B_TRUE)
+		kmem_cache_free(znode_hold_cache, zh);
+}
+
+int
+zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
+{
+	int error = 0;
+#if 0 // FIXME, uses vnode struct, not ptr
+	zfs_acl_ids_t acl_ids;
+	vattr_t vattr;
+	znode_t *sharezp;
+	struct vnode *vp, *vnode;
+	znode_t *zp;
+
+	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_TYPE;
+	vattr.va_type = VDIR;
+	vattr.va_mode = S_IFDIR|0555;
+	vattr.va_uid = crgetuid(kcred);
+	vattr.va_gid = crgetgid(kcred);
+
+	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	sharezp->z_moved = 0;
+	sharezp->z_unlinked = 0;
+	sharezp->z_atime_dirty = 0;
+	sharezp->z_zfsvfs = zfsvfs;
+	sharezp->z_is_sa = zfsvfs->z_use_sa;
+
+	sharezp->z_vnode = vnode;
+	vnode.v_data = sharezp;
+
+	vp = ZTOV(sharezp);
+	vp->v_type = VDIR;
+
+	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
+	    kcred, NULL, &acl_ids));
+	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
+	ASSERT3P(zp, ==, sharezp);
+	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
+	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
+	zfsvfs->z_shares_dir = sharezp->z_id;
+
+	zfs_acl_ids_free(&acl_ids);
+	ZTOV(sharezp)->v_data = NULL;
+	ZTOV(sharezp)->v_count = 0;
+	ZTOV(sharezp)->v_holdcnt = 0;
+	zp->z_vnode = NULL;
+	sa_handle_destroy(sharezp->z_sa_hdl);
+	sharezp->z_vnode = NULL;
+	kmem_cache_free(znode_cache, sharezp);
+#endif
+	return (error);
+}
+
+/*
+ * define a couple of values we need available
+ * for both 64 and 32 bit environments.
+ */
+#ifndef NBITSMINOR64
+#define	NBITSMINOR64	32
+#endif
+#ifndef MAXMAJ64
+#define	MAXMAJ64	0xffffffffUL
+#endif
+#ifndef	MAXMIN64
+#define	MAXMIN64	0xffffffffUL
+#endif
+
+/*
+ * Create special expldev for ZFS private use.
+ * Can't use standard expldev since it doesn't do
+ * what we want.  The standard expldev() takes a
+ * dev32_t in LP64 and expands it to a long dev_t.
+ * We need an interface that takes a dev32_t in ILP32
+ * and expands it to a long dev_t.
+ */
+static uint64_t
+zfs_expldev(dev_t dev)
+{
+	return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
+}
+/*
+ * Special cmpldev for ZFS private use.
+ * Can't use standard cmpldev since it takes
+ * a long dev_t and compresses it to dev32_t in
+ * LP64.  We need to do a compaction of a long dev_t
+ * to a dev32_t in ILP32.
+ */
+dev_t
+zfs_cmpldev(uint64_t dev)
+{
+	return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
+}
+
+static void
+zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
+    dmu_buf_t *db, dmu_object_type_t obj_type,
+    sa_handle_t *sa_hdl)
+{
+	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
+
+	mutex_enter(&zp->z_lock);
+
+	ASSERT(zp->z_sa_hdl == NULL);
+	ASSERT(zp->z_acl_cached == NULL);
+	if (sa_hdl == NULL) {
+		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
+		    SA_HDL_SHARED, &zp->z_sa_hdl));
+	} else {
+		zp->z_sa_hdl = sa_hdl;
+		sa_set_userp(sa_hdl, zp);
+	}
+
+	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
+
+	mutex_exit(&zp->z_lock);
+}
+
+void
+zfs_znode_dmu_fini(znode_t *zp)
+{
+	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
+	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
+
+	sa_handle_destroy(zp->z_sa_hdl);
+	zp->z_sa_hdl = NULL;
+}
+
+#if 0 // Until we need it ?
+static void
+zfs_vnode_destroy(struct vnode *vp)
+{
+	znode_t *zp = VTOZ(vp);
+	zfsvfs_t *zfsvfs = ZTOZSB(zp);
+
+	if (vp != NULL) {
+		znode_t *zp = VTOZ(vp);
+
+		if (zp != NULL) {
+			mutex_enter(&zfsvfs->z_znodes_lock);
+			if (list_link_active(&zp->z_link_node)) {
+				list_remove(&zfsvfs->z_all_znodes, zp);
+			}
+			mutex_exit(&zfsvfs->z_znodes_lock);
+
+			if (zp->z_acl_cached) {
+				zfs_acl_free(zp->z_acl_cached);
+				zp->z_acl_cached = NULL;
+			}
+
+			if (zp->z_xattr_cached) {
+				nvlist_free(zp->z_xattr_cached);
+				zp->z_xattr_cached = NULL;
+			}
+
+			kmem_cache_free(znode_cache, zp);
+		}
+
+		vnode_clearfsnode(vp);
+		vnode_put(vp);
+		vnode_recycle(vp);
+	}
+}
+#endif
+
+/*
+ * Construct a new znode/vnode and intialize.
+ *
+ * This does not do a call to dmu_set_user() that is
+ * up to the caller to do, in case you don't want to
+ * return the znode
+ */
+static znode_t *
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+    dmu_object_type_t obj_type, sa_handle_t *hdl)
+{
+	znode_t	*zp;
+	struct vnode *vp;
+	uint64_t mode;
+	uint64_t parent;
+	sa_bulk_attr_t bulk[11];
+	int count = 0;
+	uint64_t projid = ZFS_DEFAULT_PROJID;
+
+	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+
+	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
+	zp->z_moved = 0;
+
+	/*
+	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
+	 * the zfs_znode_move() callback.
+	 */
+	zp->z_vnode = NULL;
+	zp->z_sa_hdl = NULL;
+	zp->z_unlinked = 0;
+	zp->z_atime_dirty = 0;
+	zp->z_mapcnt = 0;
+	zp->z_id = db->db_object;
+	zp->z_blksz = blksz;
+	zp->z_seq = 0x7A4653;
+	zp->z_sync_cnt = 0;
+
+	zp->z_is_mapped = 0;
+	zp->z_is_ctldir = 0;
+	zp->z_vid = 0;
+	zp->z_uid = 0;
+	zp->z_gid = 0;
+	zp->z_size = 0;
+	zp->z_name_cache[0] = 0;
+	zp->z_finder_parentid = 0;
+	zp->z_finder_hardlink = FALSE;
+
+	taskq_init_ent(&zp->z_attach_taskq);
+
+	vp = ZTOV(zp); /* Does nothing in OSX */
+
+	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &zp->z_uid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &zp->z_gid, 8);
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 ||
+	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    (zp->z_pflags & ZFS_PROJID) &&
+	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
+		if (hdl == NULL)
+			sa_handle_destroy(zp->z_sa_hdl);
+		zp->z_sa_hdl = NULL;
+		printf("znode_alloc: sa_bulk_lookup failed - aborting\n");
+		kmem_cache_free(znode_cache, zp);
+		return (NULL);
+	}
+
+	zp->z_projid = projid;
+	zp->z_mode = mode;
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	list_insert_tail(&zfsvfs->z_all_znodes, zp);
+	membar_producer();
+	/*
+	 * Everything else must be valid before assigning z_zfsvfs makes the
+	 * znode eligible for zfs_znode_move().
+	 */
+	zp->z_zfsvfs = zfsvfs;
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	return (zp);
+}
+
+
+static uint64_t empty_xattr;
+static uint64_t pad[4];
+static zfs_acl_phys_t acl_phys;
+/*
+ * Create a new DMU object to hold a zfs znode.
+ *
+ *	IN:	dzp	- parent directory for new znode
+ *		vap	- file attributes for new znode
+ *		tx	- dmu transaction id for zap operations
+ *		cr	- credentials of caller
+ *		flag	- flags:
+ *			  IS_ROOT_NODE	- new object will be root
+ *			  IS_XATTR	- new object is an attribute
+ *		bonuslen - length of bonus buffer
+ *		setaclp  - File/Dir initial ACL
+ *		fuidp	 - Tracks fuid allocation.
+ *
+ *	OUT:	zpp	- allocated znode
+ *
+ * OS X implementation notes:
+ *
+ * The caller of zfs_mknode() is expected to call zfs_znode_getvnode()
+ * AFTER the dmu_tx_commit() is performed.  This prevents deadlocks
+ * since vnode_create can indirectly attempt to clean a dirty vnode.
+ *
+ * The current list of callers includes:
+ *      zfs_vnop_create
+ *      zfs_vnop_mkdir
+ *      zfs_vnop_symlink
+ *      zfs_obtain_xattr
+ *      zfs_make_xattrdir
+ */
+void
+zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
+    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
+{
+	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
+	uint64_t	mode, size, links, parent, pflags;
+	uint64_t	projid = ZFS_DEFAULT_PROJID;
+	uint64_t	dzp_pflags = 0;
+	uint64_t	rdev = 0;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
+	dmu_buf_t	*db;
+	timestruc_t	now;
+	uint64_t	gen, obj;
+	int		bonuslen;
+	int		dnodesize;
+	sa_handle_t	*sa_hdl;
+	dmu_object_type_t obj_type;
+	sa_bulk_attr_t  *sa_attrs;
+	int		cnt = 0;
+	zfs_acl_locator_cb_t locate = { 0 };
+	int err = 0;
+	znode_hold_t	*zh;
+
+	ASSERT(vap && (vap->va_mask & (ATTR_TYPE|ATTR_MODE)) ==
+	    (ATTR_TYPE|ATTR_MODE));
+
+	if (zfsvfs->z_replay) {
+		obj = vap->va_nodeid;
+		now = vap->va_ctime;		/* see zfs_replay_create() */
+		gen = vap->va_nblocks;		/* ditto */
+		dnodesize = vap->va_fsid;	/* ditto */
+	} else {
+		obj = 0;
+		gethrestime(&now);
+		gen = dmu_tx_get_txg(tx);
+		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
+	}
+
+	if (dnodesize == 0)
+		dnodesize = DNODE_MIN_SIZE;
+
+	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+	bonuslen = (obj_type == DMU_OT_SA) ?
+	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
+
+	/*
+	 * Create a new DMU object.
+	 */
+	/*
+	 * There's currently no mechanism for pre-reading the blocks that will
+	 * be needed to allocate a new object, so we accept the small chance
+	 * that there will be an i/o error and we will fail one of the
+	 * assertions below.
+	 */
+	if (vap->va_type == VDIR) {
+		if (zfsvfs->z_replay) {
+			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = zap_create_norm_dnsize(zfsvfs->z_os,
+			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	} else {
+		if (zfsvfs->z_replay) {
+			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx));
+		} else {
+			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
+			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
+			    obj_type, bonuslen, dnodesize, tx);
+		}
+	}
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj);
+	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
+
+	/*
+	 * If this is the root, fix up the half-initialized parent pointer
+	 * to reference the just-allocated physical data area.
+	 */
+	if (flag & IS_ROOT_NODE) {
+		dzp->z_id = obj;
+	} else {
+		dzp_pflags = dzp->z_pflags;
+	}
+
+	/*
+	 * If parent is an xattr, so am I.
+	 */
+	if (dzp_pflags & ZFS_XATTR) {
+		flag |= IS_XATTR;
+	}
+
+	if (zfsvfs->z_use_fuids)
+		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+	else
+		pflags = 0;
+
+	if (vap->va_type == VDIR) {
+		size = 2;		/* contents ("." and "..") */
+		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
+	} else {
+		size = links = 0;
+	}
+
+	if (vap->va_type == VBLK || vap->va_type == VCHR) {
+		rdev = zfs_expldev(vap->va_rdev);
+	}
+
+	parent = dzp->z_id;
+	mode = acl_ids->z_mode;
+	if (flag & IS_XATTR)
+		pflags |= ZFS_XATTR;
+
+	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
+		/*
+		 * With ZFS_PROJID flag, we can easily know whether there is
+		 * project ID stored on disk or not. See zfs_space_delta_cb().
+		 */
+		if (obj_type != DMU_OT_ZNODE &&
+		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
+			pflags |= ZFS_PROJID;
+
+		/*
+		 * Inherit project ID from parent if required.
+		 */
+		projid = zfs_inherit_projid(dzp);
+		if (dzp->z_pflags & ZFS_PROJINHERIT)
+			pflags |= ZFS_PROJINHERIT;
+	}
+
+	/*
+	 * No execs denied will be deterimed when zfs_mode_compute() is called.
+	 */
+	pflags |= acl_ids->z_aclp->z_hints &
+	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
+	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
+
+	ZFS_TIME_ENCODE(&now, crtime);
+	ZFS_TIME_ENCODE(&now, ctime);
+
+	if (vap->va_mask & ATTR_ATIME) {
+		ZFS_TIME_ENCODE(&vap->va_atime, atime);
+	} else {
+		ZFS_TIME_ENCODE(&now, atime);
+	}
+
+	if (vap->va_mask & ATTR_MTIME) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+	} else {
+		ZFS_TIME_ENCODE(&now, mtime);
+	}
+
+	/* Now add in all of the "SA" attributes */
+	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
+	    &sa_hdl));
+
+	/*
+	 * Setup the array of attributes to be replaced/set on the new file
+	 *
+	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
+	 * in the old znode_phys_t format.  Don't change this ordering
+	 */
+	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+	} else {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
+		    NULL, &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
+		    NULL, &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+	}
+
+	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
+		    &empty_xattr, 8);
+	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
+	    pflags & ZFS_PROJID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
+		    NULL, &projid, 8);
+	}
+	if (obj_type == DMU_OT_ZNODE ||
+	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
+		    NULL, &rdev, 8);
+
+	}
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
+		    &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
+		    &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
+		    sizeof (uint64_t) * 4);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (zfs_acl_phys_t));
+	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
+		    &acl_ids->z_aclp->z_acl_count, 8);
+		locate.cb_aclp = acl_ids->z_aclp;
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate,
+		    acl_ids->z_aclp->z_acl_bytes);
+		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
+		    acl_ids->z_fuid, acl_ids->z_fgid);
+	}
+
+	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
+
+	if (!(flag & IS_ROOT_NODE)) {
+		/*
+		 * We must not hold any locks while calling vnode_create inside
+		 * zfs_znode_alloc(), as it may call either of vnop_reclaim, or
+		 * vnop_fsync. If it is not enough to just release ZFS_OBJ_HOLD
+		 * we will have to attach the vnode after the dmu_commit like
+		 * maczfs does, in each vnop caller.
+		 */
+		do {
+			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
+		} while (*zpp == NULL);
+
+		VERIFY(*zpp != NULL);
+		VERIFY(dzp != NULL);
+	} else {
+		/*
+		 * If we are creating the root node, the "parent" we
+		 * passed in is the znode for the root.
+		 */
+		*zpp = dzp;
+
+		(*zpp)->z_sa_hdl = sa_hdl;
+	}
+
+	(*zpp)->z_pflags = pflags;
+	(*zpp)->z_mode = mode;
+	(*zpp)->z_dnodesize = dnodesize;
+	(*zpp)->z_projid = projid;
+
+	if (vap->va_mask & ATTR_XVATTR)
+		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
+
+	if (obj_type == DMU_OT_ZNODE ||
+	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
+		err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
+		ASSERT(err == 0);
+	}
+
+	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+/*
+ * Update in-core attributes.  It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+	xoptattr_t *xoap;
+
+	xoap = xva_getxoptattr(xvap);
+	ASSERT(xoap);
+
+	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+		uint64_t times[2];
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+		    &times, sizeof (times), tx);
+		XVA_SET_RTN(xvap, XAT_CREATETIME);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_READONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_HIDDEN);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SYSTEM);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_ARCHIVE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NOUNLINK);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_APPENDONLY);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_NODUMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OPAQUE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+		zfs_sa_set_scanstamp(zp, xvap, tx);
+		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_REPARSE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_OFFLINE);
+	}
+	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+		    zp->z_pflags, tx);
+		XVA_SET_RTN(xvap, XAT_SPARSE);
+	}
+}
+
+int
+zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
+{
+	return (zfs_zget_ext(zfsvfs, obj_num, zpp, 0));
+}
+
+int
+zfs_zget_ext(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp,
+    int flags)
+{
+	dmu_object_info_t doi;
+	dmu_buf_t		*db;
+	znode_t			*zp;
+	znode_hold_t	*zh;
+	struct vnode	*vp = NULL;
+	sa_handle_t		*hdl;
+	uint32_t		vid;
+	int err;
+
+	dprintf("+zget %llu\n", obj_num);
+
+	*zpp = NULL;
+
+again:
+	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
+
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EINVAL));
+	}
+
+	hdl = dmu_buf_get_user(db);
+	if (hdl != NULL) {
+		zp = sa_get_userdata(hdl);
+
+
+		/*
+		 * Since "SA" does immediate eviction we
+		 * should never find a sa handle that doesn't
+		 * know about the znode.
+		 */
+		ASSERT3P(zp, !=, NULL);
+
+		mutex_enter(&zp->z_lock);
+
+		/*
+		 * Since zp may disappear after we unlock below,
+		 * we save a copy of vp and it's vid
+		 */
+		vid = zp->z_vid;
+		vp = ZTOV(zp);
+
+		/*
+		 * Since we do immediate eviction of the z_dbuf, we
+		 * should never find a dbuf with a znode that doesn't
+		 * know about the dbuf.
+		 */
+		ASSERT3U(zp->z_id, ==, obj_num);
+
+		/*
+		 * OS X can return the znode when the file is unlinked
+		 * in order to support the sync of open-unlinked files
+		 */
+		if (!(flags & ZGET_FLAG_UNLINKED) && zp->z_unlinked) {
+			mutex_exit(&zp->z_lock);
+			sa_buf_rele(db, NULL);
+			zfs_znode_hold_exit(zfsvfs, zh);
+			return (ENOENT);
+		}
+
+		mutex_exit(&zp->z_lock);
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+
+		/*
+		 * We are racing zfs_znode_getvnode() and we got here first, we
+		 * need to let it get ahead
+		 */
+		if (!vp) {
+
+			// Wait until attached, if we can.
+			if ((flags & ZGET_FLAG_ASYNC) &&
+			    zfs_znode_asyncwait(zp) == 0) {
+				dprintf("%s: waited on z_vnode OK\n", __func__);
+			} else {
+				dprintf("%s: async racing attach\n", __func__);
+				// Could be zp is being torn down, idle a bit,
+				// and retry. This branch is rarely executed.
+				kpreempt(KPREEMPT_SYNC);
+			}
+			goto again;
+		}
+
+		/*
+		 * Due to vnode_create() -> zfs_fsync() -> zil_commit() ->
+		 * zget() -> vnode_getwithvid() -> deadlock. Unsure why
+		 * vnode_getwithvid() ends up sleeping in msleep() but
+		 * vnode_get() does not.
+		 * As we can deadlock here using vnode_getwithvid() we will use
+		 * the simpler vnode_get() in the ASYNC cases. We verify the
+		 * vids match below.
+		 */
+		if ((flags & ZGET_FLAG_ASYNC))
+			err = vnode_get(vp);
+		else
+			err = vnode_getwithvid(vp, vid);
+
+		if (err != 0) {
+			dprintf("ZFS: vnode_get() returned %d\n", err);
+			kpreempt(KPREEMPT_SYNC);
+			goto again;
+		}
+
+		/*
+		 * Since we had to drop all of our locks above, make sure
+		 * that we have the vnode and znode we had before.
+		 */
+		mutex_enter(&zp->z_lock);
+		if ((vid != zp->z_vid) || (vp != ZTOV(zp))) {
+			mutex_exit(&zp->z_lock);
+			/*
+			 * Release the wrong vp from vnode_getwithvid().
+			 */
+			VN_RELE(vp);
+			dprintf("ZFS: the vids do not match part 1\n");
+			goto again;
+		}
+		if (vnode_vid(vp) != zp->z_vid)
+			printf("ZFS: the vids do not match\n");
+		mutex_exit(&zp->z_lock);
+
+		*zpp = zp;
+
+		return (0);
+	} // if vnode != NULL
+
+	/*
+	 * Not found create new znode/vnode
+	 * but only if file exists.
+	 *
+	 * There is a small window where zfs_vget() could
+	 * find this object while a file create is still in
+	 * progress.  This is checked for in zfs_znode_alloc()
+	 *
+	 * if zfs_znode_alloc() fails it will drop the hold on the
+	 * bonus buffer.
+	 */
+
+	zp = NULL;
+	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
+	    doi.doi_bonus_type, NULL);
+	if (zp == NULL) {
+		err = SET_ERROR(ENOENT);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		dprintf("zget returning %d\n", err);
+		return (err);
+	}
+
+	printf("zget create: %llu setting to %p\n", obj_num, zp);
+	*zpp = zp;
+
+	// Spawn taskq to attach while we are locked
+	if (flags & ZGET_FLAG_ASYNC) {
+		zfs_znode_asyncgetvnode(zp, zfsvfs);
+	}
+
+	zfs_znode_hold_exit(zfsvfs, zh);
+
+	/* Attach a vnode to our new znode */
+	if (!(flags & ZGET_FLAG_ASYNC)) {
+		zfs_znode_getvnode(zp, zfsvfs);
+	}
+
+	dprintf("zget returning %d\n", err);
+	return (err);
+}
+
+
+int
+zfs_rezget(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	dmu_object_info_t doi;
+	dmu_buf_t *db;
+	struct vnode *vp;
+	uint64_t obj_num = zp->z_id;
+	uint64_t mode, size;
+	sa_bulk_attr_t bulk[8];
+	int err;
+	int count = 0;
+	uint64_t gen;
+	uint64_t projid = ZFS_DEFAULT_PROJID;
+	znode_hold_t *zh;
+
+	if (zp->z_is_ctldir)
+		return (0);
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
+
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+	mutex_exit(&zp->z_acl_lock);
+
+	rw_enter(&zp->z_xattr_lock, RW_WRITER);
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+
+	rw_exit(&zp->z_xattr_lock);
+
+	ASSERT(zp->z_sa_hdl == NULL);
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	if (err) {
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (err);
+	}
+
+	dmu_object_info_from_db(db, &doi);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EINVAL));
+	}
+
+	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
+	size = zp->z_size;
+
+	/* reload cached values */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
+	    &gen, sizeof (gen));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, sizeof (zp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, sizeof (zp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags,
+	    sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, sizeof (zp->z_atime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &zp->z_uid, sizeof (zp->z_uid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &zp->z_gid, sizeof (zp->z_gid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EIO));
+	}
+
+	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
+		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
+		    &projid, 8);
+		if (err != 0 && err != ENOENT) {
+			zfs_znode_dmu_fini(zp);
+			zfs_znode_hold_exit(zfsvfs, zh);
+			return (SET_ERROR(err));
+		}
+	}
+
+	zp->z_projid = projid;
+	zp->z_mode = mode;
+
+	if (gen != zp->z_gen) {
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (SET_ERROR(EIO));
+	}
+
+	/*
+	 * XXXPJD: Not sure how is that possible, but under heavy
+	 * zfs recv -F load it happens that z_gen is the same, but
+	 * vnode type is different than znode type. This would mean
+	 * that for example regular file was replaced with directory
+	 * which has the same object number.
+	 */
+	vp = ZTOV(zp);
+	if (vp != NULL &&
+	    vnode_vtype(vp) != IFTOVT((mode_t)zp->z_mode)) {
+		zfs_znode_dmu_fini(zp);
+		zfs_znode_hold_exit(zfsvfs, zh);
+		return (EIO);
+	}
+
+	zp->z_blksz = doi.doi_data_block_size;
+	if (vp != NULL) {
+		vn_pages_remove(vp, 0, 0);
+		if (zp->z_size != size)
+			vnode_pager_setsize(vp, zp->z_size);
+	}
+
+	/*
+	 * If the file has zero links, then it has been unlinked on the send
+	 * side and it must be in the received unlinked set.
+	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
+	 * stale data and to prevent automatical removal of the file in
+	 * zfs_zinactive().  The file will be removed either when it is removed
+	 * on the send side and the next incremental stream is received or
+	 * when the unlinked set gets processed.
+	 */
+	zp->z_unlinked = (zp->z_links == 0);
+	if (zp->z_unlinked)
+		zfs_znode_dmu_fini(zp);
+
+	zfs_znode_hold_exit(zfsvfs, zh);
+
+	return (0);
+}
+
+void
+zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	objset_t *os = zfsvfs->z_os;
+	uint64_t obj = zp->z_id;
+	uint64_t acl_obj = zfs_external_acl(zp);
+	znode_hold_t *zh;
+
+	zh = zfs_znode_hold_enter(zfsvfs, obj);
+	if (acl_obj) {
+		VERIFY(!zp->z_is_sa);
+		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
+	}
+	VERIFY(0 == dmu_object_free(os, obj, tx));
+	zfs_znode_dmu_fini(zp);
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+void
+zfs_zinactive(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t z_id = zp->z_id;
+	znode_hold_t *zh;
+
+	ASSERT(zp->z_sa_hdl);
+
+	/*
+	 * Don't allow a zfs_zget() while were trying to release this znode
+	 */
+	zh = zfs_znode_hold_enter(zfsvfs, z_id);
+
+	mutex_enter(&zp->z_lock);
+
+	/*
+	 * If this was the last reference to a file with no links, remove
+	 * the file from the file system unless the file system is mounted
+	 * read-only.  That can happen, for example, if the file system was
+	 * originally read-write, the file was opened, then unlinked and
+	 * the file system was made read-only before the file was finally
+	 * closed.  The file will remain in the unlinked set.
+	 */
+	if (zp->z_unlinked) {
+		ASSERT(!zfsvfs->z_issnap);
+
+		if (!(vfs_isrdonly(zfsvfs->z_vfs)) &&
+		    !zfs_unlink_suspend_progress) {
+			mutex_exit(&zp->z_lock);
+			zfs_znode_hold_exit(zfsvfs, zh);
+			zfs_rmnode(zp);
+			return;
+		}
+	}
+
+	mutex_exit(&zp->z_lock);
+	zfs_znode_dmu_fini(zp);
+
+	zfs_znode_hold_exit(zfsvfs, zh);
+}
+
+void
+zfs_znode_free(znode_t *zp)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+
+	mutex_enter(&zfsvfs->z_znodes_lock);
+	zp->z_vnode = NULL;
+	zp->z_zfsvfs = NULL;
+	POINTER_INVALIDATE(&zp->z_zfsvfs);
+	list_remove(&zfsvfs->z_all_znodes, zp); /* XXX */
+	mutex_exit(&zfsvfs->z_znodes_lock);
+
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	if (zp->z_xattr_cached) {
+		nvlist_free(zp->z_xattr_cached);
+		zp->z_xattr_cached = NULL;
+	}
+
+	ASSERT(zp->z_sa_hdl == NULL);
+
+	kmem_cache_free(znode_cache, zp);
+}
+
+
+/*
+ * Prepare to update znode time stamps.
+ *
+ *	IN:	zp	- znode requiring timestamp update
+ *		flag	- ATTR_MTIME, ATTR_CTIME, ATTR_ATIME flags
+ *		have_tx	- true of caller is creating a new txg
+ *
+ *	OUT:	zp	- new atime (via underlying inode's i_atime)
+ *		mtime	- new mtime
+ *		ctime	- new ctime
+ *
+ * NOTE: The arguments are somewhat redundant.  The following condition
+ * is always true:
+ *
+ *		have_tx == !(flag & ATTR_ATIME)
+ */
+void
+zfs_tstamp_update_setup_ext(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2], boolean_t have_tx)
+{
+	timestruc_t	now;
+
+	gethrestime(&now);
+
+	if (have_tx) {  /* will sa_bulk_update happen really soon? */
+		zp->z_atime_dirty = 0;
+		zp->z_seq++;
+	} else {
+		zp->z_atime_dirty = 1;
+	}
+
+	if (flag & ATTR_ATIME) {
+		ZFS_TIME_ENCODE(&now, zp->z_atime);
+	}
+
+	if (flag & ATTR_MTIME) {
+		ZFS_TIME_ENCODE(&now, mtime);
+		if (zp->z_zfsvfs->z_use_fuids) {
+			zp->z_pflags |= (ZFS_ARCHIVE |
+			    ZFS_AV_MODIFIED);
+		}
+	}
+
+	if (flag & ATTR_CTIME) {
+		ZFS_TIME_ENCODE(&now, ctime);
+		if (zp->z_zfsvfs->z_use_fuids)
+			zp->z_pflags |= ZFS_ARCHIVE;
+	}
+}
+
+void
+zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2])
+{
+	zfs_tstamp_update_setup_ext(zp, flag, mtime, ctime, B_TRUE);
+}
+
+/*
+ * Grow the block size for a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		size	- requested block size
+ *		tx	- open transaction.
+ *
+ * NOTE: this function assumes that the znode is write locked.
+ */
+void
+zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
+{
+	int		error;
+	u_longlong_t	dummy;
+
+	if (size <= zp->z_blksz)
+		return;
+	/*
+	 * If the file size is already greater than the current blocksize,
+	 * we will not grow.  If there is more than one block in a file,
+	 * the blocksize cannot change.
+	 */
+	if (zp->z_blksz && zp->z_size > zp->z_blksz)
+		return;
+
+	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os,
+	    zp->z_id,
+	    size, 0, tx);
+
+	if (error == ENOTSUP)
+		return;
+	ASSERT(error == 0);
+
+	/* What blocksize did we actually get? */
+	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
+}
+
+#ifdef sun
+/*
+ * This is a dummy interface used when pvn_vplist_dirty() should *not*
+ * be calling back into the fs for a putpage().  E.g.: when truncating
+ * a file, the pages being "thrown away* don't need to be written out.
+ */
+/* ARGSUSED */
+static int
+zfs_no_putpage(struct vnode *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
+    int flags, cred_t *cr)
+{
+	ASSERT(0);
+	return (0);
+}
+#endif	/* sun */
+
+/*
+ * Increase the file length
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_extend(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	uint64_t newblksz;
+	int error;
+
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end <= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	if (end > zp->z_blksz &&
+	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
+		/*
+		 * We are growing the file past the current block size.
+		 */
+		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
+			/*
+			 * File's blocksize is already larger than the
+			 * "recordsize" property.  Only let it grow to
+			 * the next power of 2.
+			 */
+			ASSERT(!ISP2(zp->z_blksz));
+			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
+		} else {
+			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
+		}
+		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
+	} else {
+		newblksz = 0;
+	}
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	if (newblksz)
+		zfs_grow_blocksize(zp, newblksz, tx);
+
+	zp->z_size = end;
+
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
+	    &zp->z_size,
+	    sizeof (zp->z_size), tx));
+
+	vnode_pager_setsize(ZTOV(zp), end);
+
+	zfs_rangelock_exit(lr);
+
+	dmu_tx_commit(tx);
+
+	return (0);
+}
+
+
+/*
+ * Free space in a file.
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of section to free.
+ *		len	- length of section to free.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zfs_locked_range_t *lr;
+	int error;
+
+	/*
+	 * Lock the range being freed.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (off >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	if (off + len > zp->z_size)
+		len = zp->z_size - off;
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
+
+	if (error == 0) {
+		/*
+		 * In FreeBSD we cannot free block in the middle of a file,
+		 * but only at the end of a file, so this code path should
+		 * never happen.
+		 */
+		vnode_pager_setsize(ZTOV(zp), off);
+	}
+
+#ifdef _LINUX
+	/*
+	 * Zero partial page cache entries.  This must be done under a
+	 * range lock in order to keep the ARC and page cache in sync.
+	 */
+	if (zp->z_is_mapped) {
+		loff_t first_page, last_page, page_len;
+		loff_t first_page_offset, last_page_offset;
+
+		/* first possible full page in hole */
+		first_page = (off + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		/* last page of hole */
+		last_page = (off + len) >> PAGE_CACHE_SHIFT;
+
+		/* offset of first_page */
+		first_page_offset = first_page << PAGE_CACHE_SHIFT;
+		/* offset of last_page */
+		last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+		/* truncate whole pages */
+		if (last_page_offset > first_page_offset) {
+			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
+			    first_page_offset, last_page_offset - 1);
+		}
+
+		/* truncate sub-page ranges */
+		if (first_page > last_page) {
+			/* entire punched area within a single page */
+			zfs_zero_partial_page(zp, off, len);
+		} else {
+			/* beginning of punched area at the end of a page */
+			page_len  = first_page_offset - off;
+			if (page_len > 0)
+				zfs_zero_partial_page(zp, off, page_len);
+
+			/* end of punched area at the beginning of a page */
+			page_len = off + len - last_page_offset;
+			if (page_len > 0)
+				zfs_zero_partial_page(zp, last_page_offset,
+				    page_len);
+		}
+	}
+#endif
+	zfs_rangelock_exit(lr);
+
+	return (error);
+}
+
+/*
+ * Truncate a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		end	- new end-of-file.
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+static int
+zfs_trunc(znode_t *zp, uint64_t end)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	struct vnode *vp = ZTOV(zp);
+	dmu_tx_t *tx;
+	zfs_locked_range_t *lr;
+	int error;
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
+	/*
+	 * We will change zp_size, lock the whole file.
+	 */
+	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
+
+	/*
+	 * Nothing to do if file already at desired length.
+	 */
+	if (end >= zp->z_size) {
+		zfs_rangelock_exit(lr);
+		return (0);
+	}
+
+	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
+	    DMU_OBJECT_END);
+	if (error) {
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	dmu_tx_mark_netfree(tx);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		zfs_rangelock_exit(lr);
+		return (error);
+	}
+
+	zp->z_size = end;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &zp->z_size, sizeof (zp->z_size));
+
+	if (end == 0) {
+		zp->z_pflags &= ~ZFS_SPARSE;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &zp->z_pflags, 8);
+	}
+	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
+
+	dmu_tx_commit(tx);
+
+	/*
+	 * Clear any mapped pages in the truncated region.  This has to
+	 * happen outside of the transaction to avoid the possibility of
+	 * a deadlock with someone trying to push a page that we are
+	 * about to invalidate.
+	 */
+	vnode_pager_setsize(vp, end);
+
+	zfs_rangelock_exit(lr);
+
+	return (0);
+}
+
+/*
+ * Free space in a file
+ *
+ *	IN:	zp	- znode of file to free data in.
+ *		off	- start of range
+ *		len	- end of range (0 => EOF)
+ *		flag	- current file open mode flags.
+ *		log	- TRUE if this action should be logged
+ *
+ *	RETURN:	0 on success, error code on failure
+ */
+int
+zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
+{
+//	struct vnode *vp = ZTOV(zp);
+	dmu_tx_t *tx;
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	zilog_t *zilog = zfsvfs->z_log;
+	uint64_t mode;
+	uint64_t mtime[2], ctime[2];
+	sa_bulk_attr_t bulk[3];
+	int count = 0;
+	int error;
+
+	if (vnode_isfifo(ZTOV(zp)))
+		return (0);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
+	    sizeof (mode))) != 0)
+		return (error);
+
+	if (off > zp->z_size) {
+		error =  zfs_extend(zp, off+len);
+		if (error == 0 && log)
+			goto log;
+		goto out;
+	}
+
+	if (len == 0) {
+		error = zfs_trunc(zp, off);
+	} else {
+		if ((error = zfs_free_range(zp, off, len)) == 0 &&
+		    off + len > zp->z_size)
+			error = zfs_extend(zp, off+len);
+	}
+	if (error || !log)
+		goto out;
+log:
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	error = dmu_tx_assign(tx, TXG_WAIT);
+	if (error) {
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &zp->z_pflags, 8);
+	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
+	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
+
+	dmu_tx_commit(tx);
+
+	error = 0;
+
+out:
+
+	return (error);
+}
+
+void
+zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
+{
+	zfsvfs_t	*zfsvfs;
+	uint64_t	moid, obj, sa_obj, version;
+	uint64_t	sense = ZFS_CASE_SENSITIVE;
+	uint64_t	norm = 0;
+	nvpair_t	*elem;
+	int			size;
+	int			error;
+	int			i;
+	znode_t		*rootzp = NULL;
+	vattr_t		vattr;
+	znode_t		*zp;
+	zfs_acl_ids_t	acl_ids;
+
+	/*
+	 * First attempt to create master node.
+	 */
+	/*
+	 * In an empty objset, there are no blocks to read and thus
+	 * there can be no i/o errors (which we assert below).
+	 */
+	moid = MASTER_NODE_OBJ;
+	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
+	    DMU_OT_NONE, 0, tx);
+	ASSERT(error == 0);
+
+	/*
+	 * Set starting attributes.
+	 */
+	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
+		/* For the moment we expect all zpl props to be uint64_ts */
+		uint64_t val;
+		char *name;
+
+		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
+		VERIFY(nvpair_value_uint64(elem, &val) == 0);
+		name = nvpair_name(elem);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
+			if (val < version)
+				version = val;
+		} else {
+			error = zap_update(os, moid, name, 8, 1, &val, tx);
+		}
+		ASSERT(error == 0);
+		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
+			norm = val;
+		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
+			sense = val;
+	}
+	ASSERT(version != 0);
+	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
+
+	/*
+	 * Create zap object used for SA attribute registration
+	 */
+
+	if (version >= ZPL_VERSION_SA) {
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT(error == 0);
+	} else {
+		sa_obj = 0;
+	}
+	/*
+	 * Create a delete queue.
+	 */
+	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
+
+	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
+	ASSERT(error == 0);
+
+	/*
+	 * Create root znode.  Create minimal znode/vnode/zfsvfs
+	 * to allow zfs_mknode to work.
+	 */
+	VATTR_NULL(&vattr);
+	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_TYPE;
+	vattr.va_type = VDIR;
+	vattr.va_mode = S_IFDIR|0755;
+	vattr.va_uid = crgetuid(cr);
+	vattr.va_gid = crgetgid(cr);
+
+	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
+	rootzp->z_moved = 0;
+	rootzp->z_unlinked = 0;
+	rootzp->z_atime_dirty = 0;
+	rootzp->z_is_sa = USE_SA(version, os);
+
+	rootzp->z_vnode = NULL;
+#ifndef __APPLE__
+	vnode.v_type = VDIR;
+	vnode.v_data = rootzp;
+	rootzp->z_vnode = &vnode;
+#endif
+
+	zfsvfs = kmem_alloc(sizeof (zfsvfs_t), KM_SLEEP);
+#ifdef __APPLE__
+	bzero(zfsvfs, sizeof (zfsvfs_t));
+#endif
+	zfsvfs->z_os = os;
+	zfsvfs->z_parent = zfsvfs;
+	zfsvfs->z_version = version;
+	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
+	zfsvfs->z_use_sa = USE_SA(version, os);
+	zfsvfs->z_norm = norm;
+
+	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+	    &zfsvfs->z_attr_table);
+
+	ASSERT(error == 0);
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+
+	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
+	zfsvfs->z_hold_size = size;
+	zfsvfs->z_hold_trees = kmem_zalloc(sizeof (avl_tree_t) * size,
+	    KM_SLEEP);
+	zfsvfs->z_hold_locks = kmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+	for (i = 0; i != size; i++) {
+		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
+		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+	}
+
+	rootzp->z_zfsvfs = zfsvfs;
+	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
+	    cr, NULL, &acl_ids));
+	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
+	ASSERT3P(zp, ==, rootzp);
+	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
+	ASSERT(error == 0);
+	zfs_acl_ids_free(&acl_ids);
+	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
+
+	sa_handle_destroy(rootzp->z_sa_hdl);
+	rootzp->z_sa_hdl = NULL;
+	rootzp->z_vnode = NULL;
+	kmem_cache_free(znode_cache, rootzp);
+
+	for (i = 0; i != size; i++) {
+		avl_destroy(&zfsvfs->z_hold_trees[i]);
+		mutex_destroy(&zfsvfs->z_hold_locks[i]);
+	}
+
+	/*
+	 * Create shares directory
+	 */
+
+	error = zfs_create_share_dir(zfsvfs, tx);
+
+	ASSERT(error == 0);
+
+	list_destroy(&zfsvfs->z_all_znodes);
+	mutex_destroy(&zfsvfs->z_znodes_lock);
+
+	kmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+	kmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
+
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+}
+
+#endif /* _KERNEL */
+
+static int
+zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
+{
+	uint64_t sa_obj = 0;
+	int error;
+
+	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
+	if (error != 0 && error != ENOENT)
+		return (error);
+
+	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
+	return (error);
+}
+static int
+zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
+    dmu_buf_t **db, void *tag)
+{
+	dmu_object_info_t doi;
+	int error;
+
+	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
+		return (error);
+
+	dmu_object_info_from_db(*db, &doi);
+	if (((doi.doi_bonus_type != DMU_OT_SA) &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE)) ||
+	    ((doi.doi_bonus_type == DMU_OT_ZNODE) &&
+	    (doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(*db, tag);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
+	if (error != 0) {
+		sa_buf_rele(*db, tag);
+		return (error);
+	}
+	return (0);
+}
+
+void
+zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
+{
+	sa_handle_destroy(hdl);
+	sa_buf_rele(db, tag);
+}
+
+/*
+ * Given an object number, return its parent object number and whether
+ * or not the object is an extended attribute directory.
+ */
+static int
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    uint64_t *pobjp, int *is_xattrdir)
+{
+	uint64_t parent;
+	uint64_t pflags;
+	uint64_t mode;
+	uint64_t parent_mode;
+	sa_bulk_attr_t bulk[3];
+	sa_handle_t *sa_hdl;
+	dmu_buf_t *sa_db;
+	int count = 0;
+	int error;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
+	    &parent, sizeof (parent));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
+	    &pflags, sizeof (pflags));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &mode, sizeof (mode));
+
+	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
+		return (error);
+
+	/*
+	 * When a link is removed its parent pointer is not changed and will
+	 * be invalid.  There are two cases where a link is removed but the
+	 * file stays around, when it goes to the delete queue and when there
+	 * are additional links.
+	 */
+	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	if (error != 0)
+		return (error);
+
+	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+
+	/*
+	 * Extended attributes can be applied to files, directories, etc.
+	 * Otherwise the parent must be a directory.
+	 */
+	if (!*is_xattrdir && !S_ISDIR(parent_mode))
+		return ((EINVAL));
+
+	*pobjp = parent;
+
+	return (0);
+}
+
+/*
+ * Given an object number, return some zpl level statistics
+ */
+static int
+zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
+    zfs_stat_t *sb)
+{
+	sa_bulk_attr_t bulk[4];
+	int count = 0;
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &sb->zs_mode, sizeof (sb->zs_mode));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
+	    &sb->zs_gen, sizeof (sb->zs_gen));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
+	    &sb->zs_links, sizeof (sb->zs_links));
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
+	    &sb->zs_ctime, sizeof (sb->zs_ctime));
+
+	return (sa_bulk_lookup(hdl, bulk, count));
+}
+
+static int
+zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
+    sa_attr_type_t *sa_table, char *buf, int len)
+{
+	sa_handle_t *sa_hdl;
+	sa_handle_t *prevhdl = NULL;
+	dmu_buf_t *prevdb = NULL;
+	dmu_buf_t *sa_db = NULL;
+	char *path = buf + len - 1;
+	int error;
+
+	*path = '\0';
+	sa_hdl = hdl;
+
+	uint64_t deleteq_obj;
+	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
+	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
+	error = zap_lookup_int(osp, deleteq_obj, obj);
+	if (error == 0) {
+		return (ESTALE);
+	} else if (error != ENOENT) {
+		return (error);
+	}
+	error = 0;
+
+	for (;;) {
+		uint64_t pobj = 0;
+		char component[MAXNAMELEN + 2];
+		size_t complen;
+		int is_xattrdir = 0;
+
+		if (prevdb)
+			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
+
+		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
+		    &is_xattrdir)) != 0)
+			break;
+
+		if (pobj == obj) {
+			if (path[0] != '/')
+				*--path = '/';
+			break;
+		}
+
+		component[0] = '/';
+		if (is_xattrdir) {
+			(void) snprintf(component + 1, MAXNAMELEN+1,
+			    "<xattrdir>");
+		} else {
+			error = zap_value_search(osp, pobj, obj,
+			    ZFS_DIRENT_OBJ(-1ULL),
+			    component + 1);
+			if (error != 0)
+				break;
+		}
+
+		complen = strlen(component);
+		path -= complen;
+		ASSERT(path >= buf);
+		bcopy(component, path, complen);
+		obj = pobj;
+
+		if (sa_hdl != hdl) {
+			prevhdl = sa_hdl;
+			prevdb = sa_db;
+		}
+		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
+		if (error != 0) {
+			sa_hdl = prevhdl;
+			sa_db = prevdb;
+			break;
+		}
+	}
+
+	if (sa_hdl != NULL && sa_hdl != hdl) {
+		ASSERT(sa_db != NULL);
+		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+	}
+
+	if (error == 0)
+		(void) memmove(buf, path, buf + len - path);
+
+	return (error);
+}
+
+int
+zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
+{
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
+
+int
+zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
+    char *buf, int len)
+{
+	char *path = buf + len - 1;
+	sa_attr_type_t *sa_table;
+	sa_handle_t *hdl;
+	dmu_buf_t *db;
+	int error;
+
+	*path = '\0';
+
+	error = zfs_sa_setup(osp, &sa_table);
+	if (error != 0)
+		return (error);
+
+	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
+	if (error != 0)
+		return (error);
+
+	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
+	if (error != 0) {
+		zfs_release_sa_handle(hdl, db, FTAG);
+		return (error);
+	}
+
+	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
+
+	zfs_release_sa_handle(hdl, db, FTAG);
+	return (error);
+}
diff --git a/module/os/macos/zfs/zio_crypt.c b/module/os/macos/zfs/zio_crypt.c
new file mode 100644
index 0000000000..7523844c07
--- /dev/null
+++ b/module/os/macos/zfs/zio_crypt.c
@@ -0,0 +1,1995 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zio_crypt.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/zil.h>
+#include <sys/sha2.h>
+#include <sys/hkdf.h>
+
+/*
+ * This file is responsible for handling all of the details of generating
+ * encryption parameters and performing encryption and authentication.
+ *
+ * BLOCK ENCRYPTION PARAMETERS:
+ * Encryption /Authentication Algorithm Suite (crypt):
+ * The encryption algorithm, mode, and key length we are going to use. We
+ * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
+ * keys. All authentication is currently done with SHA512-HMAC.
+ *
+ * Plaintext:
+ * The unencrypted data that we want to encrypt.
+ *
+ * Initialization Vector (IV):
+ * An initialization vector for the encryption algorithms. This is used to
+ * "tweak" the encryption algorithms so that two blocks of the same data are
+ * encrypted into different ciphertext outputs, thus obfuscating block patterns.
+ * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
+ * never reused with the same encryption key. This value is stored unencrypted
+ * and must simply be provided to the decryption function. We use a 96 bit IV
+ * (as recommended by NIST) for all block encryption. For non-dedup blocks we
+ * derive the IV randomly. The first 64 bits of the IV are stored in the second
+ * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
+ * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
+ * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
+ * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
+ * level 0 blocks is the number of allocated dnodes in that block. The on-disk
+ * format supports at most 2^15 slots per L0 dnode block, because the maximum
+ * block size is 16MB (2^24). In either case, for level 0 blocks this number
+ * will still be smaller than UINT32_MAX so it is safe to store the IV in the
+ * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
+ * for the dnode code.
+ *
+ * Master key:
+ * This is the most important secret data of an encrypted dataset. It is used
+ * along with the salt to generate that actual encryption keys via HKDF. We
+ * do not use the master key to directly encrypt any data because there are
+ * theoretical limits on how much data can actually be safely encrypted with
+ * any encryption mode. The master key is stored encrypted on disk with the
+ * user's wrapping key. Its length is determined by the encryption algorithm.
+ * For details on how this is stored see the block comment in dsl_crypt.c
+ *
+ * Salt:
+ * Used as an input to the HKDF function, along with the master key. We use a
+ * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
+ * can be used for encrypting many blocks, so we cache the current salt and the
+ * associated derived key in zio_crypt_t so we do not need to derive it again
+ * needlessly.
+ *
+ * Encryption Key:
+ * A secret binary key, generated from an HKDF function used to encrypt and
+ * decrypt data.
+ *
+ * Message Authenication Code (MAC)
+ * The MAC is an output of authenticated encryption modes such as AES-GCM and
+ * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
+ * data on disk and return garbage to the application. Effectively, it is a
+ * checksum that can not be reproduced by an attacker. We store the MAC in the
+ * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
+ * regular checksum of the ciphertext which can be used for scrubbing.
+ *
+ * OBJECT AUTHENTICATION:
+ * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
+ * they contain some info that always needs to be readable. To prevent this
+ * data from being altered, we authenticate this data using SHA512-HMAC. This
+ * will produce a MAC (similar to the one produced via encryption) which can
+ * be used to verify the object was not modified. HMACs do not require key
+ * rotation or IVs, so we can keep up to the full 3 copies of authenticated
+ * data.
+ *
+ * ZIL ENCRYPTION:
+ * ZIL blocks have their bp written to disk ahead of the associated data, so we
+ * cannot store the MAC there as we normally do. For these blocks the MAC is
+ * stored in the embedded checksum within the zil_chain_t header. The salt and
+ * IV are generated for the block on bp allocation instead of at encryption
+ * time. In addition, ZIL blocks have some pieces that must be left in plaintext
+ * for claiming even though all of the sensitive user data still needs to be
+ * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
+ * pieces of the block need to be encrypted. All data that is not encrypted is
+ * authenticated using the AAD mechanisms that the supported encryption modes
+ * provide for. In order to preserve the semantics of the ZIL for encrypted
+ * datasets, the ZIL is not protected at the objset level as described below.
+ *
+ * DNODE ENCRYPTION:
+ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
+ * in plaintext for scrubbing and claiming, but the bonus buffers might contain
+ * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
+ * which which pieces of the block need to be encrypted. For more details about
+ * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
+ *
+ * OBJECT SET AUTHENTICATION:
+ * Up to this point, everything we have encrypted and authenticated has been
+ * at level 0 (or -2 for the ZIL). If we did not do any further work the
+ * on-disk format would be susceptible to attacks that deleted or rearrannged
+ * the order of level 0 blocks. Ideally, the cleanest solution would be to
+ * maintain a tree of authentication MACs going up the bp tree. However, this
+ * presents a problem for raw sends. Send files do not send information about
+ * indirect blocks so there would be no convenient way to transfer the MACs and
+ * they cannot be recalculated on the receive side without the master key which
+ * would defeat one of the purposes of raw sends in the first place. Instead,
+ * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
+ * from the level below. We also include some portable fields from blk_prop such
+ * as the lsize and compression algorithm to prevent the data from being
+ * misinterpretted.
+ *
+ * At the objset level, we maintain 2 seperate 256 bit MACs in the
+ * objset_phys_t. The first one is "portable" and is the logical root of the
+ * MAC tree maintianed in the metadnode's bps. The second, is "local" and is
+ * used as the root MAC for the user accounting objects, which are also not
+ * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
+ * of the send file. The useraccounting code ensures that the useraccounting
+ * info is not present upon a receive, so the local MAC can simply be cleared
+ * out at that time. For more info about objset_phys_t authentication, see
+ * zio_crypt_do_objset_hmacs().
+ *
+ * CONSIDERATIONS FOR DEDUP:
+ * In order for dedup to work, blocks that we want to dedup with one another
+ * need to use the same IV and encryption key, so that they will have the same
+ * ciphertext. Normally, one should never reuse an IV with the same encryption
+ * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
+ * blocks. In this case, however, since we are using the same plaindata as
+ * well all that we end up with is a duplicate of the original ciphertext we
+ * already had. As a result, an attacker with read access to the raw disk will
+ * be able to tell which blocks are the same but this information is given away
+ * by dedup anyway. In order to get the same IVs and encryption keys for
+ * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC
+ * here so that a reproducible checksum of the plaindata is never available to
+ * the attacker. The HMAC key is kept alongside the master key, encrypted on
+ * disk. The first 64 bits of the HMAC are used in place of the random salt, and
+ * the next 96 bits are used as the IV. As a result of this mechanism, dedup
+ * will only work within a clone family since encrypted dedup requires use of
+ * the same master and HMAC keys.
+ */
+
+/*
+ * After encrypting many blocks with the same key we may start to run up
+ * against the theoretical limits of how much data can securely be encrypted
+ * with a single key using the supported encryption modes. The most obvious
+ * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
+ * the more IVs we generate (which both GCM and CCM modes strictly forbid).
+ * This risk actually grows surprisingly quickly over time according to the
+ * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
+ * generated n IVs with a cryptographically secure RNG, the approximate
+ * probability p(n) of a collision is given as:
+ *
+ * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
+ *
+ * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
+ *
+ * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
+ * we must not write more than 398,065,730 blocks with the same encryption key.
+ * Therefore, we rotate our keys after 400,000,000 blocks have been written by
+ * generating a new random 64 bit salt for our HKDF encryption key generation
+ * function.
+ */
+#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
+#define	ZFS_CURRENT_MAX_SALT_USES	\
+	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
+unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
+
+typedef struct blkptr_auth_buf {
+	uint64_t bab_prop;			/* blk_prop - portable mask */
+	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
+	uint64_t bab_pad;			/* reserved for future use */
+} blkptr_auth_buf_t;
+
+zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
+	{"",			ZC_TYPE_NONE,	0,	"inherit"},
+	{"",			ZC_TYPE_NONE,	0,	"on"},
+	{"",			ZC_TYPE_NONE,	0,	"off"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
+};
+
+void
+zio_crypt_key_destroy(zio_crypt_key_t *key)
+{
+	rw_destroy(&key->zk_salt_lock);
+
+	/* free crypto templates */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
+
+	/* zero out sensitive data */
+	bzero(key, sizeof (zio_crypt_key_t));
+}
+
+int
+zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	uint_t keydata_len;
+
+	ASSERT(key != NULL);
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+	bzero(key, sizeof (zio_crypt_key_t));
+
+	/* fill keydata buffers and salt with random data */
+	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for the ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	key->zk_salt_count = 0;
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+static int
+zio_crypt_key_change_salt(zio_crypt_key_t *key)
+{
+	int ret = 0;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	crypto_mechanism_t mech;
+	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
+
+	/* generate a new salt */
+	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	rw_enter(&key->zk_salt_lock, RW_WRITER);
+
+	/* someone beat us to the salt rotation, just unlock and return */
+	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
+		goto out_unlock;
+
+	/* derive the current key from the master key and the new salt */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
+	if (ret != 0)
+		goto out_unlock;
+
+	/* assign the salt and reset the usage count */
+	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
+	key->zk_salt_count = 0;
+
+	/* destroy the old context template and create the new one */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	rw_exit(&key->zk_salt_lock);
+
+	return (0);
+
+out_unlock:
+	rw_exit(&key->zk_salt_lock);
+error:
+	return (ret);
+}
+
+/* See comment above zfs_key_max_salt_uses definition for details */
+int
+zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
+{
+	int ret;
+	boolean_t salt_change;
+
+	rw_enter(&key->zk_salt_lock, RW_READER);
+
+	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
+	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
+	    ZFS_CURRENT_MAX_SALT_USES);
+
+	rw_exit(&key->zk_salt_lock);
+
+	if (salt_change) {
+		ret = zio_crypt_key_change_salt(key);
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * This function handles all encryption and decryption in zfs. When
+ * encrypting it expects puio to reference the plaintext and cuio to
+ * reference the cphertext. cuio must have enough space for the
+ * ciphertext + room for a MAC. datalen should be the length of the
+ * plaintext / ciphertext alone.
+ */
+static int
+zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
+    crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
+    uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
+{
+	int ret;
+	crypto_data_t plaindata, cipherdata;
+	CK_AES_CCM_PARAMS ccmp;
+	CK_AES_GCM_PARAMS gcmp;
+	crypto_mechanism_t mech;
+	zio_crypt_info_t crypt_info;
+	uint_t plain_full_len;
+	uint64_t maclen;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
+
+	/* lookup the encryption info */
+	crypt_info = zio_crypt_table[crypt];
+
+	/* the mac will always be the last iovec_t in the cipher uio */
+	user_addr_t mac;
+	uio_getiov(cuio, uio_iovcnt(cuio) - 1, &mac, &maclen);
+
+	ASSERT(maclen <= ZIO_DATA_MAC_LEN);
+
+	/* setup encryption mechanism (same as crypt) */
+	mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
+
+	/*
+	 * Strangely, the ICP requires that plain_full_len must include
+	 * the MAC length when decrypting, even though the UIO does not
+	 * need to have the extra space allocated.
+	 */
+	if (encrypt) {
+		plain_full_len = datalen;
+	} else {
+		plain_full_len = datalen + maclen;
+	}
+
+	/*
+	 * setup encryption params (currently only AES CCM and AES GCM
+	 * are supported)
+	 */
+	if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
+		ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
+		ccmp.ulAuthDataSize = auth_len;
+		ccmp.authData = authbuf;
+		ccmp.ulMACSize = maclen;
+		ccmp.nonce = ivbuf;
+		ccmp.ulDataSize = plain_full_len;
+
+		mech.cm_param = (char *)(&ccmp);
+		mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
+	} else {
+		gcmp.ulIvLen = ZIO_DATA_IV_LEN;
+		gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN);
+		gcmp.ulAADLen = auth_len;
+		gcmp.pAAD = authbuf;
+		gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen);
+		gcmp.pIv = ivbuf;
+
+		mech.cm_param = (char *)(&gcmp);
+		mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+	}
+
+	/* populate the cipher and plain data structs. */
+	plaindata.cd_format = CRYPTO_DATA_UIO;
+	plaindata.cd_offset = 0;
+	plaindata.cd_uio = puio;
+	plaindata.cd_miscdata = NULL;
+	plaindata.cd_length = plain_full_len;
+
+	cipherdata.cd_format = CRYPTO_DATA_UIO;
+	cipherdata.cd_offset = 0;
+	cipherdata.cd_uio = cuio;
+	cipherdata.cd_miscdata = NULL;
+	cipherdata.cd_length = datalen + maclen;
+
+	/* perform the actual encryption */
+	if (encrypt) {
+		ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
+		    NULL);
+		if (ret != CRYPTO_SUCCESS) {
+			ret = SET_ERROR(EIO);
+			goto error;
+		}
+	} else {
+		ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata,
+		    NULL);
+		if (ret != CRYPTO_SUCCESS) {
+			ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
+			ret = SET_ERROR(ECKSUM);
+			goto error;
+		}
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
+{
+	int ret;
+	uio_t *puio = NULL, *cuio = NULL;
+	uint64_t aad[3];
+	uint64_t crypt = key->zk_crypt;
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* generate iv for wrapping the master and hmac key */
+	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	puio = uio_create(2, 0, UIO_SYSSPACE, UIO_READ);
+	if (puio == NULL) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	cuio = uio_create(3, 0, UIO_SYSSPACE, UIO_WRITE);
+	if (cuio == NULL) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	/* initialize uio_ts */
+	VERIFY0(uio_addiov(puio, (user_addr_t)key->zk_master_keydata,
+	    keydata_len));
+	VERIFY0(uio_addiov(puio, (user_addr_t)key->zk_hmac_keydata,
+	    SHA512_HMAC_KEYLEN));
+
+	VERIFY0(uio_addiov(cuio, (user_addr_t)keydata_out, keydata_len));
+	VERIFY0(uio_addiov(cuio, (user_addr_t)hmac_keydata_out,
+	    SHA512_HMAC_KEYLEN));
+	VERIFY0(uio_addiov(cuio, (user_addr_t)mac, WRAPPING_MAC_LEN));
+
+	/*
+	 * Although we don't support writing to the old format, we do
+	 * support rewrapping the key so that the user can move and
+	 * quarantine datasets on the old format.
+	 */
+	if (key->zk_version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(key->zk_guid);
+	} else {
+		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(key->zk_guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(key->zk_version);
+	}
+
+	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
+
+	/* encrypt the keys and store the resulting ciphertext and mac */
+	ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
+	    puio, cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	if (puio) uio_free(puio);
+	if (cuio) uio_free(cuio);
+
+	return (0);
+
+error:
+	if (puio) uio_free(puio);
+	if (cuio) uio_free(cuio);
+
+	return (ret);
+}
+
+int
+zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
+    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
+    uint8_t *mac, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	uio_t *puio = NULL, *cuio = NULL;
+	uint64_t aad[3];
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	puio = uio_create(2, 0, UIO_SYSSPACE, UIO_WRITE);
+	if (puio == NULL) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	cuio = uio_create(3, 0, UIO_SYSSPACE, UIO_READ);
+	if (cuio == NULL) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	/* initialize uio_ts */
+	VERIFY0(uio_addiov(puio, (user_addr_t)key->zk_master_keydata,
+	    keydata_len));
+	VERIFY0(uio_addiov(puio, (user_addr_t)key->zk_hmac_keydata,
+	    SHA512_HMAC_KEYLEN));
+
+	VERIFY0(uio_addiov(cuio, (user_addr_t)keydata, keydata_len));
+	VERIFY0(uio_addiov(cuio, (user_addr_t)hmac_keydata,
+	    SHA512_HMAC_KEYLEN));
+	VERIFY0(uio_addiov(cuio, (user_addr_t)mac, WRAPPING_MAC_LEN));
+
+	if (version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(guid);
+	} else {
+		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(version);
+	}
+
+	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
+
+	/* decrypt the keys and store the result in the output buffers */
+	ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
+	    puio, cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	/* generate a fresh salt */
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = version;
+	key->zk_guid = guid;
+	key->zk_salt_count = 0;
+
+	if (puio) uio_free(puio);
+	if (cuio) uio_free(cuio);
+
+	return (0);
+
+error:
+	if (puio) uio_free(puio);
+	if (cuio) uio_free(cuio);
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv(uint8_t *ivbuf)
+{
+	int ret;
+
+	/* randomly generate the IV */
+	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	bzero(ivbuf, ZIO_DATA_IV_LEN);
+	return (ret);
+}
+
+int
+zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+    uint8_t *digestbuf, uint_t digestlen)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_data_t in_data, digest_data;
+	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
+
+	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
+
+	/* initialize sha512-hmac mechanism and crypto data */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	/* initialize the crypto data */
+	in_data.cd_format = CRYPTO_DATA_RAW;
+	in_data.cd_offset = 0;
+	in_data.cd_length = datalen;
+	in_data.cd_raw.iov_base = (char *)data;
+	in_data.cd_raw.iov_len = in_data.cd_length;
+
+	digest_data.cd_format = CRYPTO_DATA_RAW;
+	digest_data.cd_offset = 0;
+	digest_data.cd_length = SHA512_DIGEST_LENGTH;
+	digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
+	digest_data.cd_raw.iov_len = digest_data.cd_length;
+
+	/* generate the hmac */
+	ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
+	    &digest_data, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_digestbuf, digestbuf, digestlen);
+
+	return (0);
+
+error:
+	bzero(digestbuf, digestlen);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
+{
+	int ret;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	ret = zio_crypt_do_hmac(key, data, datalen,
+	    digestbuf, SHA512_DIGEST_LENGTH);
+	if (ret != 0)
+		return (ret);
+
+	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
+	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
+
+	return (0);
+}
+
+/*
+ * The following functions are used to encode and decode encryption parameters
+ * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
+ * byte strings, which normally means that these strings would not need to deal
+ * with byteswapping at all. However, both blkptr_t and zil_header_t may be
+ * byteswapped by lower layers and so we must "undo" that byteswap here upon
+ * decoding and encoding in a non-native byteorder. These functions require
+ * that the byteorder bit is correct before being called.
+ */
+void
+zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_ENCRYPTED(bp));
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
+		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, val32);
+	} else {
+		bcopy(salt, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
+
+		bcopy(iv, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
+
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, BSWAP_32(val32));
+	}
+}
+
+void
+zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_PROTECTED(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_IS_AUTHENTICATED(bp)) {
+		bzero(salt, ZIO_DATA_SALT_LEN);
+		bzero(iv, ZIO_DATA_IV_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
+		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
+
+		val32 = (uint32_t)BP_GET_IV2(bp);
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
+		bcopy(&val64, salt, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
+		bcopy(&val64, iv, sizeof (uint64_t));
+
+		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp));
+	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
+		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
+		    sizeof (uint64_t));
+	} else {
+		bcopy(mac, &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
+
+		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
+	}
+}
+
+void
+zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		bzero(mac, ZIO_DATA_MAC_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
+		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
+		    sizeof (uint64_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
+		bcopy(&val64, mac, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
+		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
+{
+	zil_chain_t *zilc = data;
+
+	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
+	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
+	    sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
+{
+	/*
+	 * The ZIL MAC is embedded in the block it protects, which will
+	 * not have been byteswapped by the time this function has been called.
+	 * As a result, we don't need to worry about byteswapping the MAC.
+	 */
+	const zil_chain_t *zilc = data;
+
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
+	    sizeof (uint64_t));
+}
+
+/*
+ * This routine takes a block of dnodes (src_abd) and copies only the bonus
+ * buffers to the same offsets in the dst buffer. datalen should be the size
+ * of both the src_abd and the dst buffer (not just the length of the bonus
+ * buffers).
+ */
+void
+zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
+{
+	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
+	uint8_t *src;
+	dnode_phys_t *dnp, *sdnp, *ddnp;
+
+	src = abd_borrow_buf_copy(src_abd, datalen);
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
+			    DN_MAX_BONUS_LEN(dnp));
+		}
+	}
+
+	abd_return_buf(src_abd, src, datalen);
+}
+
+/*
+ * This function decides what fields from blk_prop are included in
+ * the on-disk various MAC algorithms.
+ */
+static void
+zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
+{
+	/*
+	 * Version 0 did not properly zero out all non-portable fields
+	 * as it should have done. We maintain this code so that we can
+	 * do read-only imports of pools on this version.
+	 */
+	if (version == 0) {
+		BP_SET_DEDUP(bp, 0);
+		BP_SET_CHECKSUM(bp, 0);
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+		return;
+	}
+
+	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+
+	/*
+	 * The hole_birth feature might set these fields even if this bp
+	 * is a hole. We zero them out here to guarantee that raw sends
+	 * will function with or without the feature.
+	 */
+	if (BP_IS_HOLE(bp)) {
+		bp->blk_prop = 0ULL;
+		return;
+	}
+
+	/*
+	 * At L0 we want to verify these fields to ensure that data blocks
+	 * can not be reinterpretted. For instance, we do not want an attacker
+	 * to trick us into returning raw lz4 compressed data to the user
+	 * by modifying the compression bits. At higher levels, we cannot
+	 * enforce this policy since raw sends do not convey any information
+	 * about indirect blocks, so these values might be different on the
+	 * receive side. Fortunately, this does not open any new attack
+	 * vectors, since any alterations that can be made to a higher level
+	 * bp must still verify the correct order of the layer below it.
+	 */
+	if (BP_GET_LEVEL(bp) != 0) {
+		BP_SET_BYTEORDER(bp, 0);
+		BP_SET_COMPRESS(bp, 0);
+
+		/*
+		 * psize cannot be set to zero or it will trigger
+		 * asserts, but the value doesn't really matter as
+		 * long as it is constant.
+		 */
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+	}
+
+	BP_SET_DEDUP(bp, 0);
+	BP_SET_CHECKSUM(bp, 0);
+}
+
+static void
+zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
+    blkptr_auth_buf_t *bab, uint_t *bab_len)
+{
+	blkptr_t tmpbp = *bp;
+
+	if (should_bswap)
+		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+
+	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
+
+	/*
+	 * We always MAC blk_prop in LE to ensure portability. This
+	 * must be done after decoding the mac, since the endianness
+	 * will get zero'd out here.
+	 */
+	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
+	bab->bab_prop = LE_64(tmpbp.blk_prop);
+	bab->bab_pad = 0ULL;
+
+	/* version 0 did not include the padding */
+	*bab_len = sizeof (blkptr_auth_buf_t);
+	if (version == 0)
+		*bab_len -= sizeof (uint64_t);
+}
+
+static int
+zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	int ret;
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+	crypto_data_t cd;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+	cd.cd_length = bab_len;
+	cd.cd_raw.iov_base = (char *)&bab;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+static void
+zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	SHA2Update(ctx, &bab, bab_len);
+}
+
+static void
+zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	bcopy(&bab, *aadp, bab_len);
+	*aadp += bab_len;
+	*aad_len += bab_len;
+}
+
+static int
+zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, dnode_phys_t *dnp)
+{
+	int ret, i;
+	dnode_phys_t *adnp;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	crypto_data_t cd;
+	uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/* authenticate the core dnode (masking out non-portable bits) */
+	bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
+	adnp = (dnode_phys_t *)tmp_dncore;
+	if (le_bswap) {
+		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
+		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
+		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
+		adnp->dn_used = BSWAP_64(adnp->dn_used);
+	}
+	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+	adnp->dn_used = 0;
+
+	cd.cd_length = sizeof (tmp_dncore);
+	cd.cd_raw.iov_base = (char *)adnp;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	for (i = 0; i < dnp->dn_nblkptr; i++) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, &dnp->dn_blkptr[i]);
+		if (ret != 0)
+			goto error;
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, DN_SPILL_BLKPTR(dnp));
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * objset_phys_t blocks introduce a number of exceptions to the normal
+ * authentication process. objset_phys_t's contain 2 seperate HMACS for
+ * protecting the integrity of their data. The portable_mac protects the
+ * the metadnode. This MAC can be sent with a raw send and protects against
+ * reordering of data within the metadnode. The local_mac protects the user
+ * accounting objects which are not sent from one system to another.
+ *
+ * In addition, objset blocks are the only blocks that can be modified and
+ * written to disk without the key loaded under certain circumstances. During
+ * zil_claim() we need to be able to update the zil_header_t to complete
+ * claiming log blocks and during raw receives we need to write out the
+ * portable_mac from the send file. Both of these actions are possible
+ * because these fields are not protected by either MAC so neither one will
+ * need to modify the MACs without the key. However, when the modified blocks
+ * are written out they will be byteswapped into the host machine's native
+ * endianness which will modify fields protected by the MAC. As a result, MAC
+ * calculation for objset blocks works slightly differently from other block
+ * types. Where other block types MAC the data in whatever endianness is
+ * written to disk, objset blocks always MAC little endian version of their
+ * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
+ * and le_bswap indicates whether a byteswap is needed to get this block
+ * into little endian format.
+ */
+int
+zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_context_t ctx;
+	crypto_data_t cd;
+	objset_phys_t *osp = data;
+	uint64_t intval;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
+	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
+
+	/* initialize HMAC mechanism */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/* calculate the portable MAC from the portable fields and metadnode */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the os_type */
+	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the metadnode */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_meta_dnode);
+	if (ret)
+		goto error;
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_portable_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
+
+	/*
+	 * The local MAC protects the user and group accounting. If these
+	 * objects are not present, the local MAC is zeroed out.
+	 */
+	if (osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) {
+		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+		return (0);
+	}
+
+	/* calculate the local MAC from the userused and groupused dnodes */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the non-portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the user accounting dnodes */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_userused_dnode);
+	if (ret)
+		goto error;
+
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_groupused_dnode);
+	if (ret)
+		goto error;
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_local_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
+
+	return (0);
+
+error:
+	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
+	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+	return (ret);
+}
+
+static void
+zio_crypt_destroy_uio(uio_t *uio)
+{
+#ifdef _KERNEL
+	if (uio) uio_free(uio);
+#endif
+}
+
+/*
+ * This function parses an uncompressed indirect block and returns a checksum
+ * of all the portable fields from all of the contained bps. The portable
+ * fields are the MAC and all of the fields from blk_prop except for the dedup,
+ * checksum, and psize bits. For an explanation of the purpose of this, see
+ * the comment block on object set authentication.
+ */
+static int
+zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
+    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
+{
+	blkptr_t *bp;
+	int i, epb = datalen >> SPA_BLKPTRSHIFT;
+	SHA2_CTX ctx;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	/* checksum all of the MACs from the layer below */
+	SHA2Init(SHA512, &ctx);
+	for (i = 0, bp = buf; i < epb; i++, bp++) {
+		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
+		    byteswap, bp);
+	}
+	SHA2Final(digestbuf, &ctx);
+
+	if (generate) {
+		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
+		return (0);
+	}
+
+	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
+		return (SET_ERROR(ECKSUM));
+
+	return (0);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+
+	/*
+	 * Unfortunately, callers of this function will not always have
+	 * easy access to the on-disk format version. This info is
+	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
+	 * is expected to be verifiable even when the key isn't loaded.
+	 * Here, instead of doing a ZAP lookup for the version for each
+	 * zio, we simply try both existing formats.
+	 */
+	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
+	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
+	if (ret == ECKSUM) {
+		ASSERT(!generate);
+		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
+		    buf, datalen, 0, byteswap, cksum);
+	}
+
+	return (ret);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+	void *buf;
+
+	buf = abd_borrow_buf_copy(abd, datalen);
+	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
+	    byteswap, cksum);
+	abd_return_buf(abd, buf, datalen);
+
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting ZIL blocks.
+ * We do not check for the older ZIL chain because the encryption feature
+ * was not available before the newer ZIL chain was introduced. The goal
+ * here is to encrypt everything except the blkptr_t of a lr_write_t and
+ * the zil_chain_t header. Everything that is not encrypted is authenticated.
+ */
+static int
+zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t **puio,
+    uio_t **cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+    boolean_t *no_crypt)
+{
+	int ret;
+	uint64_t txtype, lr_len;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	uio_t *srcuio = NULL, *dstuio = NULL;
+	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
+	zil_chain_t *zilc;
+	lr_t *lr;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	/* cipherbuf always needs an extra iovec for the MAC */
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+
+	/* find the start and end record of the log block */
+	zilc = (zil_chain_t *)src;
+	slrp = src + sizeof (zil_chain_t);
+	aadp = aadbuf;
+	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+
+	/* calculate the number of encrypted iovecs we will need */
+	for (; slrp < blkend; slrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		nr_iovecs++;
+		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
+			nr_iovecs++;
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	if (nr_src == 0)
+		nr_src = 1;
+	if (nr_dst == 0)
+		nr_dst = 1;
+
+	/* allocate the uio to hold iovecs */
+	if (nr_src != 0) {
+		srcuio = uio_create(nr_src, 0, UIO_SYSSPACE, UIO_READ);
+		if (srcuio == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	if (nr_dst != 0) {
+		dstuio = uio_create(nr_dst, 0, UIO_SYSSPACE, UIO_WRITE);
+		if (dstuio == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	/*
+	 * Copy the plain zil header over and authenticate everything except
+	 * the checksum that will store our MAC. If we are writing the data
+	 * the embedded checksum will not have been calculated yet, so we don't
+	 * authenticate that.
+	 */
+	bcopy(src, dst, sizeof (zil_chain_t));
+	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
+	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+
+	/* loop over records again, filling in iovecs */
+	nr_iovecs = 0;
+	slrp = src + sizeof (zil_chain_t);
+	dlrp = dst + sizeof (zil_chain_t);
+
+	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		/* copy the common lr_t */
+		bcopy(slrp, dlrp, sizeof (lr_t));
+		bcopy(slrp, aadp, sizeof (lr_t));
+		aadp += sizeof (lr_t);
+		aad_len += sizeof (lr_t);
+
+		/*
+		 * If this is a TX_WRITE record we want to encrypt everything
+		 * except the bp if exists. If the bp does exist we want to
+		 * authenticate it.
+		 */
+		if (txtype == TX_WRITE) {
+			crypt_len = sizeof (lr_write_t) -
+			    sizeof (lr_t) - sizeof (blkptr_t);
+
+			VERIFY0(uio_addiov(srcuio,
+			    (user_addr_t)slrp + sizeof (lr_t),
+			    crypt_len));
+			VERIFY0(uio_addiov(dstuio,
+			    (user_addr_t)dlrp + sizeof (lr_t),
+			    crypt_len));
+
+			/* copy the bp now since it will not be encrypted */
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    sizeof (blkptr_t));
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    aadp, sizeof (blkptr_t));
+			aadp += sizeof (blkptr_t);
+			aad_len += sizeof (blkptr_t);
+			nr_iovecs++;
+			total_len += crypt_len;
+
+			if (lr_len != sizeof (lr_write_t)) {
+				crypt_len = lr_len - sizeof (lr_write_t);
+
+				VERIFY0(uio_addiov(srcuio,
+				    (user_addr_t)slrp + sizeof (lr_write_t),
+				    crypt_len));
+				VERIFY0(uio_addiov(dstuio,
+				    (user_addr_t)dlrp + sizeof (lr_write_t),
+				    crypt_len));
+				nr_iovecs++;
+				total_len += crypt_len;
+			}
+		} else {
+			crypt_len = lr_len - sizeof (lr_t);
+			VERIFY0(uio_addiov(srcuio,
+			    (user_addr_t)slrp + sizeof (lr_t),
+			    crypt_len));
+			VERIFY0(uio_addiov(dstuio,
+			    (user_addr_t)dlrp + sizeof (lr_t),
+			    crypt_len));
+			nr_iovecs++;
+			total_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		*puio = srcuio;
+		*cuio = dstuio;
+	} else {
+		*puio = dstuio;
+		*cuio = srcuio;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	if (srcuio) uio_free(srcuio);
+	if (dstuio) uio_free(dstuio);
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting dnode blocks.
+ */
+static int
+zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uio_t **puio, uio_t **cuio, uint_t *enc_len, uint8_t **authbuf,
+    uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
+	struct uio *src_uio = NULL, *dst_uio = NULL;
+	uint8_t *src, *dst, *aadp;
+	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+	aadp = aadbuf;
+
+	/*
+	 * Count the number of iovecs we will need to do the encryption by
+	 * counting the number of bonus buffers that need to be encrypted.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		/*
+		 * This block may still be byteswapped. However, all of the
+		 * values we use are either uint8_t's (for which byteswapping
+		 * is a noop) or a * != 0 check, which will work regardless
+		 * of whether or not we byteswap.
+		 */
+		if (sdnp[i].dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
+		    sdnp[i].dn_bonuslen != 0) {
+			nr_iovecs++;
+		}
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	if (nr_src == 0)
+		nr_src = 1;
+	if (nr_dst == 0)
+		nr_dst = 1;
+
+	if (nr_src != 0) {
+		src_uio = uio_create(nr_src, 0, UIO_SYSSPACE, UIO_READ);
+		if (src_uio == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+	ASSERT(src_uio != NULL);
+
+	if (nr_dst != 0) {
+		dst_uio = uio_create(nr_dst, 0, UIO_SYSSPACE, UIO_WRITE);
+		if (dst_uio == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+	ASSERT(dst_uio != NULL);
+
+	nr_iovecs = 0;
+
+	/*
+	 * Iterate through the dnodes again, this time filling in the uios
+	 * we allocated earlier. We also concatenate any data we want to
+	 * authenticate onto aadbuf.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+
+		/* copy over the core fields and blkptrs (kept as plaintext) */
+		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
+			    sizeof (blkptr_t));
+		}
+
+		/*
+		 * Handle authenticated data. We authenticate everything in
+		 * the dnode that can be brought over when we do a raw send.
+		 * This includes all of the core fields as well as the MACs
+		 * stored in the bp checksums and all of the portable bits
+		 * from blk_prop. We include the dnode padding here in case it
+		 * ever gets used in the future. Some dn_flags and dn_used are
+		 * not portable so we mask those out values out of the
+		 * authenticated data.
+		 */
+		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
+		bcopy(dnp, aadp, crypt_len);
+		adnp = (dnode_phys_t *)aadp;
+		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+		adnp->dn_used = 0;
+		aadp += crypt_len;
+		aad_len += crypt_len;
+
+		for (j = 0; j < dnp->dn_nblkptr; j++) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, &dnp->dn_blkptr[j]);
+		}
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, DN_SPILL_BLKPTR(dnp));
+		}
+
+		/*
+		 * If this bonus buffer needs to be encrypted, we prepare an
+		 * iovec_t. The encryption / decryption functions will fill
+		 * this in for us with the encrypted or decrypted data.
+		 * Otherwise we add the bonus buffer to the authenticated
+		 * data buffer and copy it over to the destination. The
+		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
+		 * we can guarantee alignment with the AES block size
+		 * (128 bits).
+		 */
+		crypt_len = DN_MAX_BONUS_LEN(dnp);
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			ASSERT3U(nr_iovecs, <, nr_src);
+			ASSERT3U(nr_iovecs, <, nr_dst);
+			VERIFY0(uio_addiov(src_uio, (user_addr_t)DN_BONUS(dnp),
+			    crypt_len));
+			VERIFY0(uio_addiov(dst_uio,
+			    (user_addr_t)DN_BONUS(&ddnp[i]),
+			    crypt_len));
+
+			nr_iovecs++;
+			total_len += crypt_len;
+		} else {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
+			bcopy(DN_BONUS(dnp), aadp, crypt_len);
+			aadp += crypt_len;
+			aad_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		*puio = src_uio;
+		*cuio = dst_uio;
+	} else {
+		*puio = dst_uio;
+		*cuio = src_uio;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	zio_crypt_destroy_uio(src_uio);
+	zio_crypt_destroy_uio(dst_uio);
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+	return (ret);
+}
+
+static int
+zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, uio_t **puio, uio_t **cuio,
+    uint_t *enc_len)
+{
+	int ret = 0;
+
+	*puio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+	*cuio = uio_create(2, 0, UIO_SYSSPACE, UIO_WRITE);
+	if (*puio == NULL || *cuio == NULL) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	VERIFY0(uio_addiov(*puio, (user_addr_t)plainbuf, datalen));
+	VERIFY0(uio_addiov(*cuio, (user_addr_t)cipherbuf, datalen));
+
+	*enc_len = datalen;
+
+	return (0);
+
+error:
+	zio_crypt_destroy_uio(*puio);
+	zio_crypt_destroy_uio(*cuio);
+
+	*enc_len = 0;
+	return (ret);
+}
+
+
+/*
+ * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
+ * that they can be used for encryption and decryption by zio_do_crypt_uio().
+ * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
+ * requiring special handling to parse out pieces that are to be encrypted. The
+ * authbuf is used by these special cases to store additional authenticated
+ * data (AAD) for the encryption modes.
+ */
+static int
+zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uint8_t *mac, uio_t **puio, uio_t **cuio, uint_t *enc_len,
+    uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+
+	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
+
+	/* route to handler */
+	switch (ot) {
+	case DMU_OT_INTENT_LOG:
+		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
+		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+		    no_crypt);
+		break;
+	case DMU_OT_DNODE:
+		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
+		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
+		    auth_len, no_crypt);
+		break;
+	default:
+		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
+		    datalen, puio, cuio, enc_len);
+		*authbuf = NULL;
+		*auth_len = 0;
+		*no_crypt = B_FALSE;
+		break;
+	}
+
+	if (ret != 0)
+		goto error;
+
+	ASSERT(puio != NULL);
+	ASSERT(cuio != NULL);
+
+	/* populate the uios */
+#ifdef __APPLE__
+
+	VERIFY0(uio_addiov(*cuio, (user_addr_t)mac, ZIO_DATA_MAC_LEN));
+
+#else // !APPLE
+
+	puio->uio_segflg = UIO_SYSSPACE;
+	cuio->uio_segflg = UIO_SYSSPACE;
+
+	mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
+	mac_iov->iov_base = mac;
+	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
+
+#endif // !APPLE
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * Primary encryption / decryption entrypoint for zio data.
+ */
+int
+zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
+    boolean_t *no_crypt)
+{
+	int ret;
+	boolean_t locked = B_FALSE;
+	uint64_t crypt = key->zk_crypt;
+	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
+	/*
+	 * We have to delay the allocation call uio_create() until we know
+	 * how many iovecs we want (as max).
+	 */
+	uio_t *puio = NULL, *cuio = NULL;
+	uint_t enc_len, auth_len;
+	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
+	crypto_key_t tmp_ckey, *ckey = NULL;
+	crypto_ctx_template_t tmpl;
+	uint8_t *authbuf = NULL;
+
+	/* create uios for encryption */
+	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
+	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
+	    &authbuf, &auth_len, no_crypt);
+
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * If the needed key is the current one, just use it. Otherwise we
+	 * need to generate a temporary one from the given salt + master key.
+	 * If we are encrypting, we must return a copy of the current salt
+	 * so that it can be stored in the blkptr_t.
+	 */
+	rw_enter(&key->zk_salt_lock, RW_READER);
+	locked = B_TRUE;
+
+	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
+		ckey = &key->zk_current_key;
+		tmpl = key->zk_current_tmpl;
+	} else {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+
+		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
+		if (ret != 0)
+			goto error;
+
+		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
+		tmp_ckey.ck_data = enc_keydata;
+		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+		ckey = &tmp_ckey;
+		tmpl = NULL;
+	}
+
+	VERIFY(puio != NULL);
+	VERIFY(cuio != NULL);
+
+	/* perform the encryption / decryption */
+	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
+	    puio, cuio, authbuf, auth_len);
+
+	if (ret != 0)
+		goto error;
+
+	if (locked) {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+	}
+
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(puio);
+	zio_crypt_destroy_uio(cuio);
+
+	return (0);
+
+error:
+	if (locked)
+		rw_exit(&key->zk_salt_lock);
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+
+	zio_crypt_destroy_uio(puio);
+	zio_crypt_destroy_uio(cuio);
+	return (ret);
+}
+
+/*
+ * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
+ * linear buffers.
+ */
+int
+zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
+    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
+    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
+{
+	int ret;
+	void *ptmp, *ctmp;
+
+	if (encrypt) {
+		ptmp = abd_borrow_buf_copy(pabd, datalen);
+		ctmp = abd_borrow_buf(cabd, datalen);
+	} else {
+		ptmp = abd_borrow_buf(pabd, datalen);
+		ctmp = abd_borrow_buf_copy(cabd, datalen);
+	}
+
+	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
+	    datalen, ptmp, ctmp, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (0);
+
+error:
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (ret);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* BEGIN CSTYLED */
+module_param(zfs_key_max_salt_uses, ulong, 0644);
+MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
+	"can be used for generating encryption keys before it is rotated");
+/* END CSTYLED */
+#endif
diff --git a/module/os/macos/zfs/zvolIO.cpp b/module/os/macos/zfs/zvolIO.cpp
new file mode 100644
index 0000000000..96a9aefe58
--- /dev/null
+++ b/module/os/macos/zfs/zvolIO.cpp
@@ -0,0 +1,1192 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013-2020, Jorgen Lundman.  All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/storage/IOBlockStorageDevice.h>
+#include <IOKit/storage/IOBlockStorageDriver.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/storage/IOStorageProtocolCharacteristics.h>
+
+#include <sys/zfs_ioctl.h>
+#include <sys/zfs_znode.h>
+#include <sys/dataset_kstats.h>
+#include <sys/zvol.h>
+#include <sys/zvol_os.h>
+#include <sys/zfs_boot.h>
+#include <sys/spa_impl.h>
+
+#include <sys/ZFSPool.h>
+#include <sys/zvolIO.h>
+
+#include <zfs_config.h>
+
+
+/*
+ * ZVOL Device
+ */
+#define	DEBUG
+#if defined(DEBUG) || defined(ZFS_DEBUG)
+#ifdef	dprintf
+#undef	dprintf
+#endif
+#define	dprintf(fmt, ...) do { \
+	IOLog("zvolIO %s " fmt "\n", __func__, ##__VA_ARGS__);	\
+	_NOTE(CONSTCOND) } while (0)
+#else
+#ifndef dprintf
+#define	dprintf(fmt, ...)	do { } while (0);
+#endif
+#endif /* if DEBUG or ZFS_DEBUG */
+
+// #define dprintf IOLog
+
+// Define the superclass
+#define	super IOBlockStorageDevice
+
+#define	ZVOL_BSIZE	DEV_BSIZE
+
+static const char *ZVOL_PRODUCT_NAME_PREFIX = "ZVOL ";
+
+/* Wrapper for zvol_state pointer to IOKit device */
+typedef struct zvol_iokit {
+	net_lundman_zfs_zvol_device *dev;
+} zvol_iokit_t;
+
+OSDefineMetaClassAndStructors(net_lundman_zfs_zvol_device, IOBlockStorageDevice)
+
+bool
+net_lundman_zfs_zvol_device::init(zvol_state_t *c_zv,
+    OSDictionary *properties)
+{
+	zvol_iokit_t *iokitdev = NULL;
+
+	dprintf("zvolIO_device:init\n");
+
+	if (!c_zv || c_zv->zv_zso->zvo_iokitdev != NULL) {
+		dprintf("zvol %s invalid c_zv\n", __func__);
+		return (false);
+	}
+
+	if ((iokitdev = (zvol_iokit_t *)kmem_alloc(sizeof (zvol_iokit_t),
+	    KM_SLEEP)) == NULL) {
+		printf("zvol %s wrapper alloc failed\n", __func__);
+		return (false);
+	}
+
+	if (super::init(properties) == false) {
+		printf("zvol %s super init failed\n", __func__);
+		kmem_free(iokitdev, sizeof (zvol_iokit_t));
+		return (false);
+	}
+
+	/* Store reference to zvol_state_t in the iokitdev */
+	zv = c_zv;
+	/* Store reference to iokitdev in zvol_state_t */
+	iokitdev->dev = this;
+
+	/* Assign to zv once completely initialized */
+	zv->zv_zso->zvo_iokitdev = iokitdev;
+
+	/* Apply the name from the full dataset path */
+	if (strlen(zv->zv_name) != 0) {
+		setName(zv->zv_name);
+	}
+
+	return (true);
+}
+
+bool
+net_lundman_zfs_zvol_device::attach(IOService* provider)
+{
+	OSDictionary *protocolCharacteristics = 0;
+	OSDictionary *deviceCharacteristics = 0;
+	OSDictionary *storageFeatures = 0;
+	OSBoolean *unmapFeature = 0;
+	const OSSymbol *propSymbol = 0;
+	OSString *dataString = 0;
+	OSNumber *dataNumber = 0;
+
+	char product_name[strlen(ZVOL_PRODUCT_NAME_PREFIX) + MAXPATHLEN + 1];
+
+	if (!provider) {
+		dprintf("ZVOL attach missing provider\n");
+		return (false);
+	}
+
+	if (super::attach(provider) == false)
+		return (false);
+
+	/*
+	 * We want to set some additional properties for ZVOLs, in
+	 * particular, "Virtual Device", and type "File"
+	 * (or is Internal better?)
+	 *
+	 * Finally "Generic" type.
+	 *
+	 * These properties are defined in *protocol* characteristics
+	 */
+
+	protocolCharacteristics = OSDictionary::withCapacity(3);
+
+	if (!protocolCharacteristics) {
+		IOLog("failed to create dict for protocolCharacteristics.\n");
+		return (true);
+	}
+
+	propSymbol = OSSymbol::withCString(
+	    kIOPropertyPhysicalInterconnectTypeVirtual);
+
+	if (!propSymbol) {
+		IOLog("could not create interconnect type string\n");
+		return (true);
+	}
+	protocolCharacteristics->setObject(
+	    kIOPropertyPhysicalInterconnectTypeKey, propSymbol);
+
+	propSymbol->release();
+	propSymbol = 0;
+
+	propSymbol = OSSymbol::withCString(kIOPropertyInterconnectFileKey);
+	if (!propSymbol) {
+		IOLog("could not create interconnect location string\n");
+		return (true);
+	}
+	protocolCharacteristics->setObject(
+	    kIOPropertyPhysicalInterconnectLocationKey, propSymbol);
+
+	propSymbol->release();
+	propSymbol = 0;
+
+	setProperty(kIOPropertyProtocolCharacteristicsKey,
+	    protocolCharacteristics);
+
+	protocolCharacteristics->release();
+	protocolCharacteristics = 0;
+
+	/*
+	 * We want to set some additional properties for ZVOLs, in
+	 * particular, physical block size (volblocksize) of the
+	 * underlying ZVOL, and 'logical' block size presented by
+	 * the virtual disk. Also set physical bytes per sector.
+	 *
+	 * These properties are defined in *device* characteristics
+	 */
+
+	deviceCharacteristics = OSDictionary::withCapacity(3);
+
+	if (!deviceCharacteristics) {
+		IOLog("failed to create dict for deviceCharacteristics.\n");
+		return (true);
+	}
+
+	/* Set this device to be an SSD, for priority and VM paging */
+	propSymbol = OSSymbol::withCString(
+	    kIOPropertyMediumTypeSolidStateKey);
+	if (!propSymbol) {
+		IOLog("could not create medium type string\n");
+		return (true);
+	}
+	deviceCharacteristics->setObject(kIOPropertyMediumTypeKey,
+	    propSymbol);
+
+	propSymbol->release();
+	propSymbol = 0;
+
+	/* Set logical block size to ZVOL_BSIZE (512b) */
+	dataNumber =	OSNumber::withNumber(ZVOL_BSIZE,
+	    8 * sizeof (ZVOL_BSIZE));
+
+	deviceCharacteristics->setObject(kIOPropertyLogicalBlockSizeKey,
+	    dataNumber);
+
+	dprintf("logicalBlockSize %llu\n",
+	    dataNumber->unsigned64BitValue());
+
+	dataNumber->release();
+	dataNumber	= 0;
+
+	/* Set physical block size to match volblocksize property */
+	dataNumber =	OSNumber::withNumber(zv->zv_volblocksize,
+	    8 * sizeof (zv->zv_volblocksize));
+
+	deviceCharacteristics->setObject(kIOPropertyPhysicalBlockSizeKey,
+	    dataNumber);
+
+	dprintf("physicalBlockSize %llu\n",
+	    dataNumber->unsigned64BitValue());
+
+	dataNumber->release();
+	dataNumber	= 0;
+
+	/* Set physical bytes per sector to match volblocksize property */
+	dataNumber =	OSNumber::withNumber((uint64_t)(zv->zv_volblocksize),
+	    8 * sizeof (uint64_t));
+
+	deviceCharacteristics->setObject(kIOPropertyBytesPerPhysicalSectorKey,
+	    dataNumber);
+
+	dprintf("physicalBytesPerSector %llu\n",
+	    dataNumber->unsigned64BitValue());
+
+	dataNumber->release();
+	dataNumber	= 0;
+
+	/* Publish the Device / Media name */
+	(void) snprintf(product_name, sizeof (product_name), "%s%s",
+	    ZVOL_PRODUCT_NAME_PREFIX, zv->zv_name);
+	dataString = OSString::withCString(product_name);
+	deviceCharacteristics->setObject(kIOPropertyProductNameKey, dataString);
+	dataString->release();
+	dataString = 0;
+
+	/* Apply these characteristics */
+	setProperty(kIOPropertyDeviceCharacteristicsKey,
+	    deviceCharacteristics);
+
+	deviceCharacteristics->release();
+	deviceCharacteristics	= 0;
+
+	/*
+	 * ZVOL unmap support
+	 *
+	 * These properties are defined in IOStorageFeatures
+	 */
+
+	storageFeatures =	OSDictionary::withCapacity(1);
+	if (!storageFeatures) {
+		IOLog("failed to create dictionary for storageFeatures.\n");
+		return (true);
+	}
+
+	/* Set unmap feature */
+	unmapFeature =	OSBoolean::withBoolean(true);
+	storageFeatures->setObject(kIOStorageFeatureUnmap, unmapFeature);
+	unmapFeature->release();
+	unmapFeature	= 0;
+
+	/* Apply these storage features */
+	setProperty(kIOStorageFeaturesKey, storageFeatures);
+	storageFeatures->release();
+	storageFeatures	= 0;
+
+
+	/*
+	 * Set transfer limits:
+	 *
+	 *  Maximum transfer size (bytes)
+	 *  Maximum transfer block count
+	 *  Maximum transfer block size (bytes)
+	 *  Maximum transfer segment count
+	 *  Maximum transfer segment size (bytes)
+	 *  Minimum transfer segment size (bytes)
+	 *
+	 *  We will need to establish safe defaults for all / per volblocksize
+	 *
+	 *  Example: setProperty(kIOMinimumSegmentAlignmentByteCountKey, 1, 1);
+	 */
+
+	/*
+	 * Finally "Generic" type, set as a device property. Tried setting this
+	 * to the string "ZVOL" however the OS does not recognize it as a block
+	 * storage device. This would probably be possible by extending the
+	 * IOBlockStorage Device / Driver relationship.
+	 */
+
+	setProperty(kIOBlockStorageDeviceTypeKey,
+	    kIOBlockStorageDeviceTypeGeneric);
+
+	return (true);
+}
+
+int
+net_lundman_zfs_zvol_device::renameDevice(void)
+{
+	OSDictionary *deviceDict;
+	OSString *nameStr;
+	char *newstr;
+	int len;
+
+	/* Length of string and null terminating character */
+	len = strlen(ZVOL_PRODUCT_NAME_PREFIX) + strlen(zv->zv_name) + 1;
+	newstr = (char *)kmem_alloc(len, KM_SLEEP);
+	if (!newstr) {
+		dprintf("%s string alloc failed\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Append prefix and dsl name */
+	snprintf(newstr, len, "%s%s", ZVOL_PRODUCT_NAME_PREFIX, zv->zv_name);
+	nameStr = OSString::withCString(newstr);
+	kmem_free(newstr, len);
+
+	if (!nameStr) {
+		dprintf("%s couldn't allocate name string\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Fetch current device characteristics dictionary */
+	deviceDict = OSDynamicCast(OSDictionary,
+	    getProperty(kIOPropertyDeviceCharacteristicsKey));
+	if (!deviceDict || (deviceDict =
+	    OSDictionary::withDictionary(deviceDict)) == NULL) {
+		dprintf("couldn't clone device characteristics\n");
+		/* Allocate new dict */
+		if (!deviceDict &&
+		    (deviceDict = OSDictionary::withCapacity(1)) == NULL) {
+			dprintf("%s OSDictionary alloc failed\n", __func__);
+			nameStr->release();
+			return (ENOMEM);
+		}
+
+	}
+
+	/* Add or replace the product name */
+	if (deviceDict->setObject(kIOPropertyProductNameKey,
+	    nameStr) == false) {
+		dprintf("%s couldn't set product name\n", __func__);
+		nameStr->release();
+		deviceDict->release();
+		return (ENXIO);
+	}
+	nameStr->release();
+	nameStr = 0;
+
+	/* Set IORegistry property */
+	if (setProperty(kIOPropertyDeviceCharacteristicsKey,
+	    deviceDict) == false) {
+		dprintf("%s couldn't set IORegistry property\n", __func__);
+		deviceDict->release();
+		return (ENXIO);
+	}
+	deviceDict->release();
+	deviceDict = 0;
+
+	/* Apply the name from the full dataset path */
+	setName(zv->zv_name);
+
+	return (0);
+}
+
+int
+net_lundman_zfs_zvol_device::offlineDevice(void)
+{
+	IOService *client;
+
+	if ((client = this->getClient()) == NULL) {
+		return (ENOENT);
+	}
+
+	/* Ask IOBlockStorageDevice to offline media */
+	if (client->message(kIOMessageMediaStateHasChanged,
+	    this, (void *)kIOMediaStateOffline) != kIOReturnSuccess) {
+		dprintf("%s failed\n", __func__);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+int
+net_lundman_zfs_zvol_device::onlineDevice(void)
+{
+	IOService *client;
+
+	if ((client = this->getClient()) == NULL) {
+		return (ENOENT);
+	}
+
+	/* Ask IOBlockStorageDevice to online media */
+	if (client->message(kIOMessageMediaStateHasChanged,
+	    this, (void *)kIOMediaStateOnline) != kIOReturnSuccess) {
+		dprintf("%s failed\n", __func__);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+int
+net_lundman_zfs_zvol_device::refreshDevice(void)
+{
+	IOService *client;
+
+	if ((client = this->getClient()) == NULL) {
+		return (ENOENT);
+	}
+
+	/* Ask IOBlockStorageDevice to reset the media params */
+	if (client->message(kIOMessageMediaParametersHaveChanged,
+	    this) != kIOReturnSuccess) {
+		dprintf("%s failed\n", __func__);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+int
+net_lundman_zfs_zvol_device::getBSDName(void)
+{
+	IORegistryEntry *ioregdevice = 0;
+	OSObject *bsdnameosobj = 0;
+	OSString* bsdnameosstr = 0;
+
+	ioregdevice = OSDynamicCast(IORegistryEntry, this);
+
+	if (!ioregdevice)
+		return (-1);
+
+	bsdnameosobj = ioregdevice->getProperty(kIOBSDNameKey,
+	    gIOServicePlane, kIORegistryIterateRecursively);
+
+	if (!bsdnameosobj)
+		return (-1);
+
+	bsdnameosstr = OSDynamicCast(OSString, bsdnameosobj);
+
+	IOLog("zvol: bsd name is '%s'\n",
+	    bsdnameosstr->getCStringNoCopy());
+
+	if (!zv)
+		return (-1);
+
+	zv->zv_zso->zvo_bsdname[0] = 'r'; // for 'rdiskX'.
+	strlcpy(&zv->zv_zso->zvo_bsdname[1],
+	    bsdnameosstr->getCStringNoCopy(),
+	    sizeof (zv->zv_zso->zvo_bsdname)-1);
+	/*
+	 * IOLog("name assigned '%s'\n", zv->zv_zso->zvo_bsdname);
+	 */
+
+	return (0);
+}
+
+void
+net_lundman_zfs_zvol_device::detach(IOService *provider)
+{
+	super::detach(provider);
+}
+
+void
+net_lundman_zfs_zvol_device::clearState(void)
+{
+	zv = NULL;
+}
+
+bool
+net_lundman_zfs_zvol_device::handleOpen(IOService *client,
+    IOOptionBits options, void *argument)
+{
+	IOStorageAccess access = (uintptr_t)argument;
+	bool ret = false;
+	int openflags = 0;
+
+	if (super::handleOpen(client, options, argument) == false)
+		return (false);
+
+	/* Device terminating? */
+	if (zv == NULL ||
+	    zv->zv_zso == NULL ||
+	    zv->zv_zso->zvo_iokitdev == NULL)
+		return (false);
+
+	if (access & kIOStorageAccessReaderWriter) {
+		openflags = FWRITE | ZVOL_EXCL;
+	} else {
+		openflags = FREAD;
+	}
+
+	/*
+	 * Don't use 'zv' until it has been verified by zvol_os_open_zv()
+	 * and returned as opened, then it holds an open count and can be
+	 * used.
+	 */
+	if (zvol_os_open_zv(zv, zv->zv_zso->zvo_openflags, 0, NULL) == 0) {
+		ret = true;
+	} else {
+		openflags = FREAD;
+		if (zvol_os_open_zv(zv, FREAD /* ZVOL_EXCL */, 0, NULL) == 0) {
+			ret = true;
+		}
+	}
+
+	if (ret)
+		zv->zv_zso->zvo_openflags = openflags;
+
+
+	dprintf("Open %s (openflags %llx)\n", (ret ? "done" : "failed"),
+	    ret ? zv->zv_zso->zvo_openflags : 0);
+
+	if (ret == false)
+		super::handleClose(client, options);
+
+	return (ret);
+}
+
+void
+net_lundman_zfs_zvol_device::handleClose(IOService *client,
+    IOOptionBits options)
+{
+	super::handleClose(client, options);
+
+	/* Terminating ? */
+	if (zv == NULL ||
+	    zv->zv_zso == NULL ||
+	    zv->zv_zso->zvo_iokitdev == NULL)
+		return;
+
+	zvol_os_close_zv(zv, zv->zv_zso->zvo_openflags, 0, NULL);
+
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doAsyncReadWrite(
+    IOMemoryDescriptor *buffer, UInt64 block, UInt64 nblks,
+    IOStorageAttributes *attributes, IOStorageCompletion *completion)
+{
+	IODirection direction;
+	IOByteCount actualByteCount;
+	struct iomem iomem;
+	iomem.buf = NULL;
+
+	// Return errors for incoming I/O if we have been terminated.
+	if (isInactive() == true) {
+		dprintf("asyncReadWrite notActive fail\n");
+		return (kIOReturnNotAttached);
+	}
+
+	// These variables are set in zvol_first_open(), which should have been
+	// called already.
+	if (!zv->zv_dn) {
+		dprintf("asyncReadWrite no zvol dnode\n");
+		return (kIOReturnNotAttached);
+	}
+
+	// Ensure the start block is within the disk capacity.
+	if ((block)*(ZVOL_BSIZE) >= zv->zv_volsize) {
+		dprintf("asyncReadWrite start block outside volume\n");
+		return (kIOReturnBadArgument);
+	}
+
+	// Shorten the read, if beyond the end
+	if (((block + nblks)*(ZVOL_BSIZE)) > zv->zv_volsize) {
+		dprintf("asyncReadWrite block shortening needed\n");
+		return (kIOReturnBadArgument);
+	}
+
+	// Get the buffer direction, whether this is a read or a write.
+	direction = buffer->getDirection();
+	if ((direction != kIODirectionIn) && (direction != kIODirectionOut)) {
+		dprintf("asyncReadWrite kooky direction\n");
+		return (kIOReturnBadArgument);
+	}
+
+	// dprintf("%s offset @block %llu numblocks %llu: blksz %u\n",
+	//   direction == kIODirectionIn ? "Read" : "Write",
+	//  block, nblks, (ZVOL_BSIZE));
+
+	/* Perform the read or write operation through the transport driver. */
+	actualByteCount = (nblks*(ZVOL_BSIZE));
+
+	iomem.buf = buffer;
+
+	/* Make sure we don't go away while the command is being executed */
+	/* Open should be holding a retain */
+
+	if (direction == kIODirectionIn) {
+
+		if (zvol_os_read_zv(zv, (block*(ZVOL_BSIZE)),
+		    actualByteCount, &iomem)) {
+
+			actualByteCount = 0;
+		}
+
+	} else {
+
+		if (zvol_os_write_zv(zv, (block*(ZVOL_BSIZE)),
+		    actualByteCount, &iomem)) {
+			actualByteCount = 0;
+		}
+
+	}
+
+	/* Open should be holding a retain */
+	iomem.buf = NULL; // overkill
+
+	if (actualByteCount != nblks*(ZVOL_BSIZE))
+		dprintf("Read/Write operation failed\n");
+
+	// Call the completion function.
+	(completion->action)(completion->target, completion->parameter,
+	    kIOReturnSuccess, actualByteCount);
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doDiscard(UInt64 block, UInt64 nblks)
+{
+	dprintf("doDiscard called with block, nblks (%llu, %llu)\n",
+	    block, nblks);
+	uint64_t bytes		= 0;
+	uint64_t off		= 0;
+
+	/* Convert block/nblks to offset/bytes */
+	off =	block * ZVOL_BSIZE;
+	bytes =	nblks * ZVOL_BSIZE;
+	dprintf("calling zvol_unmap with offset, bytes (%llu, %llu)\n",
+	    off, bytes);
+
+	if (zvol_os_unmap(zv, off, bytes) == 0)
+		return (kIOReturnSuccess);
+	else
+		return (kIOReturnError);
+}
+
+
+IOReturn
+net_lundman_zfs_zvol_device::doUnmap(IOBlockStorageDeviceExtent *extents,
+    UInt32 extentsCount, UInt32 options = 0)
+{
+	UInt32 i = 0;
+	IOReturn result;
+
+	dprintf("doUnmap called with (%u) extents and options (%u)\n",
+	    (uint32_t)extentsCount, (uint32_t)options);
+
+	if (options > 0 || !extents) {
+		return (kIOReturnUnsupported);
+	}
+
+	for (i = 0; i < extentsCount; i++) {
+
+		result = doDiscard(extents[i].blockStart,
+		    extents[i].blockCount);
+
+		if (result != kIOReturnSuccess) {
+			return (result);
+		}
+	}
+
+	return (kIOReturnSuccess);
+}
+
+UInt32
+net_lundman_zfs_zvol_device::doGetFormatCapacities(UInt64* capacities,
+    UInt32 capacitiesMaxCount) const
+{
+	dprintf("formatCap\n");
+
+	/*
+	 * Ensure that the array is sufficient to hold all our formats
+	 * (we require one element).
+	 */
+	if ((capacities != NULL) && (capacitiesMaxCount < 1))
+		return (0);
+		/* Error, return an array size of 0. */
+
+	/*
+	 * The caller may provide a NULL array if it wishes to query the number
+	 * of formats that we support.
+	 */
+	if (capacities != NULL)
+		capacities[0] = zv->zv_volsize;
+
+	dprintf("returning capacity[0] size %llu\n", zv->zv_volsize);
+
+	return (1);
+}
+
+char *
+net_lundman_zfs_zvol_device::getProductString(void)
+{
+	dprintf("getProduct %p\n", zv);
+
+	if (zv)
+		return (zv->zv_name);
+
+	return ((char *)"ZVolume");
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportBlockSize(UInt64 *blockSize)
+{
+	dprintf("reportBlockSize %llu\n", *blockSize);
+
+	if (blockSize) *blockSize = (ZVOL_BSIZE);
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportMaxValidBlock(UInt64 *maxBlock)
+{
+	dprintf("reportMaxValidBlock %llu\n", *maxBlock);
+
+	if (maxBlock) *maxBlock = ((zv->zv_volsize / (ZVOL_BSIZE)) - 1);
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportMediaState(bool *mediaPresent,
+    bool *changedState)
+{
+	dprintf("reportMediaState\n");
+	if (mediaPresent) *mediaPresent = true;
+	if (changedState) *changedState = false;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportPollRequirements(bool *pollRequired,
+    bool *pollIsExpensive)
+{
+	dprintf("reportPollReq\n");
+	if (pollRequired) *pollRequired = false;
+	if (pollIsExpensive) *pollIsExpensive = false;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportRemovability(bool *isRemovable)
+{
+	dprintf("reportRemova\n");
+	if (isRemovable) *isRemovable = false;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doEjectMedia(void)
+{
+	dprintf("ejectMedia\n");
+/* XXX */
+	// Only 10.6 needs special work to eject
+	// if ((version_major == 10) && (version_minor == 8))
+	//	destroyBlockStorageDevice(zvol);
+	// }
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doFormatMedia(UInt64 byteCapacity)
+{
+	dprintf("doFormat\n");
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doLockUnlockMedia(bool doLock)
+{
+	dprintf("doLockUnlock\n");
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::doSynchronizeCache(void)
+{
+	dprintf("doSync\n");
+	if (zv && zv->zv_zilog) {
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+	}
+	return (kIOReturnSuccess);
+}
+
+char *
+net_lundman_zfs_zvol_device::getVendorString(void)
+{
+	dprintf("getVendor\n");
+	return ((char *)"ZVOL");
+}
+
+char *
+net_lundman_zfs_zvol_device::getRevisionString(void)
+{
+	dprintf("getRevision\n");
+	return ((char *)ZFS_META_VERSION);
+}
+
+char *
+net_lundman_zfs_zvol_device::getAdditionalDeviceInfoString(void)
+{
+	dprintf("getAdditional\n");
+	return ((char *)"ZFS Volume");
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportEjectability(bool *isEjectable)
+{
+	dprintf("reportEjecta\n");
+	/*
+	 * Which do we prefer? If you eject it, you can't get volume back until
+	 * you import it again.
+	 */
+
+	if (isEjectable) *isEjectable = false;
+	return (kIOReturnSuccess);
+}
+
+/* XXX deprecated function */
+IOReturn
+net_lundman_zfs_zvol_device::reportLockability(bool *isLockable)
+{
+	dprintf("reportLocka\n");
+	if (isLockable) *isLockable = true;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::reportWriteProtection(bool *isWriteProtected)
+{
+	dprintf("reportWritePro: %d\n", *isWriteProtected);
+
+	if (!isWriteProtected)
+		return (kIOReturnSuccess);
+
+	if (zv && (zv->zv_flags & ZVOL_RDONLY))
+		*isWriteProtected = true;
+	else
+		*isWriteProtected = false;
+
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::getWriteCacheState(bool *enabled)
+{
+	dprintf("getCacheState\n");
+	if (enabled) *enabled = true;
+	return (kIOReturnSuccess);
+}
+
+IOReturn
+net_lundman_zfs_zvol_device::setWriteCacheState(bool enabled)
+{
+	dprintf("setWriteCache\n");
+	return (kIOReturnSuccess);
+}
+
+extern "C" {
+
+/* C interfaces */
+int
+zvolCreateNewDevice(zvol_state_t *zv)
+{
+	net_lundman_zfs_zvol_device *zvol;
+	ZFSPool *pool_proxy;
+	spa_t *spa;
+	dprintf("%s\n", __func__);
+
+	/* We must have a valid zvol_state_t */
+	if (!zv || !zv->zv_objset) {
+		dprintf("%s missing zv or objset\n", __func__);
+		return (EINVAL);
+	}
+
+	/* We need the spa to get the pool proxy */
+	if ((spa = dmu_objset_spa(zv->zv_objset)) == NULL) {
+		dprintf("%s couldn't get spa\n", __func__);
+		return (EINVAL);
+	}
+	if (spa->spa_iokit_proxy == NULL ||
+	    (pool_proxy = spa->spa_iokit_proxy->proxy) == NULL) {
+		dprintf("%s missing IOKit pool proxy\n", __func__);
+		return (EINVAL);
+	}
+
+	zvol = new net_lundman_zfs_zvol_device;
+
+	/* Validate creation, initialize and attach */
+	if (!zvol || zvol->init(zv) == false ||
+	    zvol->attach(pool_proxy) == false) {
+		dprintf("%s device creation failed\n", __func__);
+		if (zvol) zvol->release();
+		return (ENOMEM);
+	}
+	/* Start the service */
+	if (zvol->start(pool_proxy) == false) {
+		dprintf("%s device start failed\n", __func__);
+		zvol->detach(pool_proxy);
+		zvol->release();
+		return (ENXIO);
+	}
+
+	/* Open pool_proxy provider */
+	if (pool_proxy->open(zvol) == false) {
+		dprintf("%s open provider failed\n", __func__);
+		zvol->stop(pool_proxy);
+		zvol->detach(pool_proxy);
+		zvol->release();
+		return (ENXIO);
+	}
+	/* Is retained by provider */
+	zvol->release();
+	zvol = 0;
+
+	return (0);
+}
+
+int
+zvolRegisterDevice(zvol_state_t *zv)
+{
+	net_lundman_zfs_zvol_device *zvol;
+	OSDictionary *matching;
+	IOService *service = 0;
+	IOMedia *media = 0;
+	OSString *nameStr = 0, *bsdName = 0;
+	uint64_t timeout = (5ULL * kSecondScale);
+	bool ret = false;
+
+	if (!zv || !zv->zv_zso->zvo_iokitdev || zv->zv_name[0] == 0) {
+		dprintf("%s missing zv, iokitdev, or name\n", __func__);
+		return (EINVAL);
+	}
+
+	if ((zvol = zv->zv_zso->zvo_iokitdev->dev) == NULL) {
+		dprintf("%s couldn't get zvol device\n", __func__);
+		return (EINVAL);
+	}
+
+	if (!zvol->getVendorString()) {
+		return (EINVAL);
+	}
+
+	/* Create matching string and dictionary */
+	{
+		char str[MAXNAMELEN];
+		snprintf(str, MAXNAMELEN, "%s %s Media",
+		    zvol->getVendorString(), zv->zv_name);
+		if ((nameStr = OSString::withCString(str)) == NULL) {
+			dprintf("%s problem with name string\n", __func__);
+			return (ENOMEM);
+		}
+	}
+	matching = IOService::serviceMatching("IOMedia");
+	if (!matching || !matching->setObject(gIONameMatchKey, nameStr)) {
+		dprintf("%s couldn't get matching dictionary\n", __func__);
+		return (ENOMEM);
+	}
+
+	/* Register device for service matching */
+	zvol->registerService(kIOServiceAsynchronous);
+
+	/* Wait for upper layer BSD client */
+	dprintf("%s waiting for IOMedia\n", __func__);
+	/* Wait for up to 5 seconds */
+	service = IOService::waitForMatchingService(matching, timeout);
+	dprintf("%s %s service\n", __func__, (service ? "got" : "no"));
+
+	if (!service) {
+		dprintf("%s couldn't get matching service\n", __func__);
+		return (false);
+	}
+
+	dprintf("%s casting to IOMedia\n", __func__);
+	media = OSDynamicCast(IOMedia, service);
+
+	if (!media) {
+		dprintf("%s no IOMedia\n", __func__);
+		service->release();
+		return (false);
+	}
+
+	dprintf("%s getting IOBSDNameKey\n", __func__);
+	bsdName = OSDynamicCast(OSString,
+	    media->getProperty(kIOBSDNameKey));
+
+	if (bsdName) {
+		const char *str = bsdName->getCStringNoCopy();
+		dprintf("%s Got bsd name [%s]\n",
+		    __func__, str);
+		zv->zv_zso->zvo_bsdname[0] = 'r';
+		snprintf(zv->zv_zso->zvo_bsdname+1,
+		    sizeof (zv->zv_zso->zvo_bsdname)-1,
+		    "%s", str);
+		dprintf("%s zvol bsdname set to %s\n", __func__,
+		    zv->zv_zso->zvo_bsdname);
+//		zvol_add_symlink(zv, zv->zv_zso->zvo_bsdname+1,
+//		    zv->zv_zso->zvo_bsdname);
+		ret = true;
+	} else {
+		dprintf("%s couldn't get BSD Name\n", __func__);
+	}
+
+	/* Release retain held by waitForMatchingService */
+	service->release();
+
+	printf("%s complete\n", __func__);
+	return (ret);
+}
+
+/* Struct passed in will be freed before returning */
+void *
+zvolRemoveDevice(zvol_iokit_t *iokitdev)
+{
+	net_lundman_zfs_zvol_device *zvol;
+	dprintf("%s\n", __func__);
+
+	if (!iokitdev) {
+		dprintf("%s missing argument\n", __func__);
+		return (NULL);
+	}
+
+	zvol = iokitdev->dev;
+	/* Free the wrapper struct */
+	kmem_free(iokitdev, sizeof (zvol_iokit_t));
+
+	if (zvol == NULL) {
+		dprintf("%s couldn't get IOKit handle\n", __func__);
+		return (NULL);
+	}
+
+	/* Mark us as terminating */
+	zvol->clearState();
+
+	return (zvol);
+}
+
+/*
+ * zvolRemoveDevice continued..
+ * terminate() will block and we can deadlock, so it is issued as a
+ * separate thread. Done from zvol_os.c as it is easier in C.
+ */
+int
+zvolRemoveDeviceTerminate(void *arg)
+{
+	net_lundman_zfs_zvol_device *zvol = (net_lundman_zfs_zvol_device *)arg;
+
+	/* Terminate */
+	if (zvol->terminate(kIOServiceTerminate|kIOServiceAsynchronous|
+	    kIOServiceRequired) == false) {
+		IOLog("%s terminate failed\n", __func__);
+	}
+
+	return (0);
+}
+
+/* Called with zv->zv_name already updated */
+int
+zvolRenameDevice(zvol_state_t *zv)
+{
+	net_lundman_zfs_zvol_device *zvol = NULL;
+	int error;
+
+	if (!zv || strnlen(zv->zv_name, 1) == 0) {
+		dprintf("%s missing argument\n", __func__);
+		return (EINVAL);
+	}
+
+	if ((zvol = zv->zv_zso->zvo_iokitdev->dev) == NULL) {
+		dprintf("%s couldn't get zvol device\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Set IORegistry name and property */
+	if ((error = zvol->renameDevice()) != 0) {
+		dprintf("%s renameDevice error %d\n", __func__, error);
+		return (error);
+	}
+
+	/*
+	 * XXX This works, but if there is a volume mounted on
+	 * the zvol at the time it is uncleanly ejected.
+	 * We just need to add diskutil unmount to `zfs rename`,
+	 * like zpool export.
+	 */
+	/* Inform clients of this device that name has changed */
+	if (zvol->offlineDevice() != 0 ||
+	    zvol->onlineDevice() != 0) {
+		dprintf("%s media reset failed\n", __func__);
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+/* Called with zvol volsize already updated */
+int
+zvolSetVolsize(zvol_state_t *zv)
+{
+	net_lundman_zfs_zvol_device *zvol;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	if (!zv || !zv->zv_zso->zvo_iokitdev) {
+		dprintf("%s invalid zvol\n", __func__);
+		return (EINVAL);
+	}
+
+	/* Cast to correct type */
+	if ((zvol = zv->zv_zso->zvo_iokitdev->dev) == NULL) {
+		dprintf("%s couldn't cast IOKit handle\n", __func__);
+		return (ENXIO);
+	}
+	/*
+	 * XXX This works fine, even if volume is mounted,
+	 * but only tested expanding the zvol and only with
+	 * GPT/APM/MBR partition map (not volume on whole-zvol).
+	 */
+	/* Inform clients of this device that size has changed */
+	if ((error = zvol->refreshDevice()) != 0) {
+		dprintf("%s refreshDevice error %d\n", __func__, error);
+		return (error);
+	}
+
+	return (0);
+}
+
+uint64_t
+zvolIO_kit_read(struct iomem *iomem, uint64_t offset,
+    char *address, uint64_t len)
+{
+	IOByteCount done;
+	// IOLog("zvolIO_kit_read offset %p count %llx to offset %llx\n",
+	//    address, len, offset);
+	ASSERT(iomem && address && len > 0);
+
+	done = iomem->buf->writeBytes(offset, (void *)address, len);
+
+	return (done);
+}
+
+uint64_t
+zvolIO_kit_write(struct iomem *iomem, uint64_t offset,
+    char *address, uint64_t len)
+{
+	IOByteCount done;
+	// IOLog("zvolIO_kit_write offset %p count %llx to offset %llx\n",
+	//    address, len, offset);
+	ASSERT(iomem && address && len > 0);
+
+	done = iomem->buf->readBytes(offset, (void *)address, len);
+
+	return (done);
+}
+
+} /* extern "C" */
diff --git a/module/os/macos/zfs/zvol_os.c b/module/os/macos/zfs/zvol_os.c
new file mode 100644
index 0000000000..1c985598c4
--- /dev/null
+++ b/module/os/macos/zfs/zvol_os.c
@@ -0,0 +1,1024 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2020 by Jorgen Lundman. All rights reserved.
+ */
+
+#include <sys/dataset_kstats.h>
+#include <sys/disk.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
+#include <sys/zap.h>
+#include <sys/zfeature.h>
+#include <sys/zil_impl.h>
+#include <sys/dmu_tx.h>
+#include <sys/zio.h>
+#include <sys/zfs_rlock.h>
+#include <sys/spa_impl.h>
+#include <sys/zvol.h>
+#include <sys/zvol_impl.h>
+#include <sys/zvol_os.h>
+
+static uint32_t zvol_major = ZVOL_MAJOR;
+
+unsigned int zvol_request_sync = 0;
+unsigned int zvol_prefetch_bytes = (128 * 1024);
+unsigned long zvol_max_discard_blocks = 16384;
+unsigned int zvol_threads = 8;
+
+taskq_t *zvol_taskq;
+
+typedef struct zv_request {
+	zvol_state_t	*zv;
+
+	void (*zv_func)(void *);
+	void *zv_arg;
+
+	taskq_ent_t	ent;
+} zv_request_t;
+
+int
+dmu_read_iokit_dnode(dnode_t *dn, uint64_t *offset,
+	uint64_t position, uint64_t *size, struct iomem *iomem);
+int
+dmu_write_iokit_dnode(dnode_t *dn, uint64_t *offset, uint64_t position,
+    uint64_t *size, struct iomem *iomem, dmu_tx_t *tx);
+
+
+static void
+zvol_os_spawn_cb(void *param)
+{
+	zv_request_t *zvr = (zv_request_t *)param;
+
+	zvr->zv_func(zvr->zv_arg);
+
+	kmem_free(zvr, sizeof (zv_request_t));
+}
+
+static void
+zvol_os_spawn(void (*func)(void *), void *arg)
+{
+	zv_request_t *zvr;
+	zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
+	zvr->zv_arg = arg;
+	zvr->zv_func = func;
+
+	taskq_init_ent(&zvr->ent);
+
+	taskq_dispatch_ent(zvol_taskq,
+	    zvol_os_spawn_cb, zvr, 0, &zvr->ent);
+}
+
+/*
+ * Given a path, return TRUE if path is a ZVOL.
+ */
+static boolean_t
+zvol_os_is_zvol(const char *device)
+{
+#if 0
+	struct stat stbf;
+
+	// Stat device, get major/minor, match zv
+	if (stat(device, &stbf) == 0) {
+		if (S_ISBLK(stbf.st_mode) || S_ISCHR(stbf.st_mode)) {
+			dev_t dev = makedevice(stbf.st_major, stbf.st_minor);
+
+			zvol_state_t *zv;
+			zv = zvol_find_by_dev(dev);
+			if (zv != NULL) {
+				mutex_exit(&zv->zv_state_lock);
+				return (B_TRUE);
+			}
+		}
+	}
+#endif
+	return (B_FALSE);
+}
+
+/*
+ * Make sure zv is still in the list (not freed) and if it is
+ * grab the locks in the correct order.
+ * Can we rely on list_link_active() instead of looping list?
+ */
+static int
+zvol_os_verify_and_lock(zvol_state_t *node)
+{
+	zvol_state_t *zv;
+
+	rw_enter(&zvol_state_lock, RW_READER);
+	for (zv = list_head(&zvol_state_list); zv != NULL;
+	    zv = list_next(&zvol_state_list, zv)) {
+		mutex_enter(&zv->zv_state_lock);
+		if (zv == node) {
+
+			if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
+				mutex_exit(&zv->zv_state_lock);
+				rw_enter(&zv->zv_suspend_lock, RW_READER);
+				mutex_enter(&zv->zv_state_lock);
+			}
+			rw_exit(&zvol_state_lock);
+			return (1);
+		}
+		mutex_exit(&zv->zv_state_lock);
+	}
+	rw_exit(&zvol_state_lock);
+	return (0);
+}
+
+static void
+zvol_os_register_device_cb(void *param)
+{
+	zvol_state_t *zv = (zvol_state_t *)param;
+
+	if (zvol_os_verify_and_lock(zv) == 0)
+		return;
+
+	zvolRegisterDevice(zv);
+
+	mutex_exit(&zv->zv_state_lock);
+	rw_exit(&zv->zv_suspend_lock);
+}
+
+int
+zvol_os_write(dev_t dev, struct uio *uio, int p)
+{
+	return (ENOTSUP);
+}
+
+int
+zvol_os_read(dev_t dev, struct uio *uio, int p)
+{
+	return (ENOTSUP);
+}
+
+int
+zvol_os_write_zv(zvol_state_t *zv, uint64_t position,
+    uint64_t count, struct iomem *iomem)
+{
+	uint64_t volsize;
+	zfs_locked_range_t *lr;
+	int error = 0;
+	boolean_t sync;
+	uint64_t offset = 0;
+	uint64_t bytes;
+	uint64_t off;
+
+	if (zv == NULL)
+		return (ENXIO);
+
+	/* Some requests are just for flush and nothing else. */
+	if (count == 0)
+		return (0);
+
+	volsize = zv->zv_volsize;
+	if (count > 0 &&
+	    (position >= volsize))
+		return (EIO);
+
+	rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+	/*
+	 * Open a ZIL if this is the first time we have written to this
+	 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
+	 * than zv_state_lock so that we don't need to acquire an
+	 * additional lock in this path.
+	 */
+	if (zv->zv_zilog == NULL) {
+		rw_exit(&zv->zv_suspend_lock);
+		rw_enter(&zv->zv_suspend_lock, RW_WRITER);
+		if (zv->zv_zilog == NULL) {
+			zv->zv_zilog = zil_open(zv->zv_objset,
+			    zvol_get_data);
+			zv->zv_flags |= ZVOL_WRITTEN_TO;
+		}
+		rw_downgrade(&zv->zv_suspend_lock);
+	}
+
+	dprintf("zvol_write_iokit(position %llu offset "
+	    "0x%llx bytes 0x%llx)\n", position, offset, count);
+
+	sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
+
+	/* Lock the entire range */
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, position, count,
+	    RL_WRITER);
+
+	/* Iterate over (DMU_MAX_ACCESS/2) segments */
+	while (count > 0 && (position + offset) < volsize) {
+		/* bytes for this segment */
+		bytes = MIN(count, DMU_MAX_ACCESS >> 1);
+		off = offset;
+		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
+
+		/* don't write past the end */
+		if (bytes > volsize - (position + off))
+			bytes = volsize - (position + off);
+
+		dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
+		error = dmu_tx_assign(tx, TXG_WAIT);
+		if (error) {
+			dmu_tx_abort(tx);
+			break;
+		}
+
+		error = dmu_write_iokit_dnode(zv->zv_dn, &offset,
+		    position, &bytes, iomem, tx);
+
+		if (error == 0) {
+			count -= MIN(count,
+			    (DMU_MAX_ACCESS >> 1)) + bytes;
+			zvol_log_write(zv, tx, offset, bytes, sync);
+		}
+		dmu_tx_commit(tx);
+
+		if (error)
+			break;
+	}
+	zfs_rangelock_exit(lr);
+
+	if (sync)
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+	rw_exit(&zv->zv_suspend_lock);
+
+	return (error);
+}
+
+int
+zvol_os_read_zv(zvol_state_t *zv, uint64_t position,
+    uint64_t count, struct iomem *iomem)
+{
+	uint64_t volsize;
+	zfs_locked_range_t *lr;
+	int error = 0;
+	uint64_t offset = 0;
+
+	if (zv == NULL)
+		return (ENXIO);
+
+	volsize = zv->zv_volsize;
+	if (count > 0 &&
+	    (position >= volsize))
+		return (EIO);
+
+	rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, position, count,
+	    RL_READER);
+
+	while (count > 0 && (position+offset) < volsize) {
+		uint64_t bytes = MIN(count, DMU_MAX_ACCESS >> 1);
+
+		/* don't read past the end */
+		if (bytes > volsize - (position + offset))
+			bytes = volsize - (position + offset);
+
+		dprintf("%s %llu offset %llu len %llu bytes %llu\n",
+		    "zvol_read_iokit: position",
+		    position, offset, count, bytes);
+
+		error = dmu_read_iokit_dnode(zv->zv_dn, &offset, position,
+		    &bytes, iomem);
+
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = EIO;
+			break;
+		}
+		count -= MIN(count, DMU_MAX_ACCESS >> 1) - bytes;
+	}
+	zfs_rangelock_exit(lr);
+
+	rw_exit(&zv->zv_suspend_lock);
+	return (error);
+}
+
+int
+zvol_os_unmap(zvol_state_t *zv, uint64_t off, uint64_t bytes)
+{
+	zfs_locked_range_t *lr = NULL;
+	dmu_tx_t *tx = NULL;
+	int error = 0;
+	uint64_t end = off + bytes;
+
+	if (zv == NULL)
+		return (ENXIO);
+
+	/*
+	 * XNU's wipefs_wipe() will issue one giant unmap for the entire
+	 * device;
+	 * zfs create -V 8g BOOM/vol
+	 * zvolIO doDiscard calling zvol_unmap with offset, bytes (0, 858992)
+	 * Which will both take too long, and is uneccessary. We will ignore
+	 * any unmaps deemed "too large".
+	 */
+	if ((off == 0ULL) &&
+	    (zv->zv_volsize > (1ULL << 24)) && /* 16Mb slop */
+	    (bytes >= (zv->zv_volsize - (1ULL << 24))))
+		return (0);
+
+	rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+	/*
+	 * Open a ZIL if this is the first time we have written to this
+	 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
+	 * than zv_state_lock so that we don't need to acquire an
+	 * additional lock in this path.
+	 */
+	if (zv->zv_zilog == NULL) {
+		rw_exit(&zv->zv_suspend_lock);
+		rw_enter(&zv->zv_suspend_lock, RW_WRITER);
+		if (zv->zv_zilog == NULL) {
+			zv->zv_zilog = zil_open(zv->zv_objset,
+			    zvol_get_data);
+			zv->zv_flags |= ZVOL_WRITTEN_TO;
+		}
+		rw_downgrade(&zv->zv_suspend_lock);
+	}
+
+	off = P2ROUNDUP(off, zv->zv_volblocksize);
+	end = P2ALIGN(end, zv->zv_volblocksize);
+
+	if (end > zv->zv_volsize) /* don't write past the end */
+		end = zv->zv_volsize;
+
+	if (off >= end) {
+		/* Return success- caller does not need to know */
+		goto out;
+	}
+
+	bytes = end - off;
+	lr = zfs_rangelock_enter(&zv->zv_rangelock, off, bytes, RL_WRITER);
+
+	tx = dmu_tx_create(zv->zv_objset);
+
+	dmu_tx_mark_netfree(tx);
+
+	error = dmu_tx_assign(tx, TXG_WAIT);
+
+	if (error) {
+		dmu_tx_abort(tx);
+	} else {
+
+		zvol_log_truncate(zv, tx, off, bytes, B_TRUE);
+
+		dmu_tx_commit(tx);
+
+		error = dmu_free_long_range(zv->zv_objset,
+		    ZVOL_OBJ, off, bytes);
+	}
+
+	zfs_rangelock_exit(lr);
+
+	if (error == 0) {
+		/*
+		 * If the 'sync' property is set to 'always' then
+		 * treat this as a synchronous operation
+		 * (i.e. commit to zil).
+		 */
+		if (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) {
+			zil_commit(zv->zv_zilog, ZVOL_OBJ);
+		}
+	}
+
+out:
+	rw_exit(&zv->zv_suspend_lock);
+	return (error);
+}
+
+int
+zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
+{
+	zv->zv_volsize = volsize;
+	return (0);
+}
+
+static void
+zvol_os_clear_private_cb(void *param)
+{
+	zvolRemoveDeviceTerminate(param);
+}
+
+static void
+zvol_os_clear_private(zvol_state_t *zv)
+{
+	void *term;
+
+	printf("%s\n", __func__);
+	/* We can do all removal work, except call terminate. */
+	term = zvolRemoveDevice(zv->zv_zso->zvo_iokitdev);
+	if (term == NULL)
+		return;
+
+	zv->zv_zso->zvo_iokitdev = NULL;
+
+	/* Call terminate in the background */
+	zvol_os_spawn(zvol_os_clear_private_cb, term);
+
+}
+
+/*
+ * Find a zvol_state_t given the full major+minor dev_t. If found,
+ * return with zv_state_lock taken, otherwise, return (NULL) without
+ * taking zv_state_lock.
+ */
+static zvol_state_t *
+zvol_os_find_by_dev(dev_t dev)
+{
+	zvol_state_t *zv;
+
+	printf("%s\n", __func__);
+
+	rw_enter(&zvol_state_lock, RW_READER);
+	for (zv = list_head(&zvol_state_list); zv != NULL;
+	    zv = list_next(&zvol_state_list, zv)) {
+		mutex_enter(&zv->zv_state_lock);
+		if (zv->zv_zso->zvo_dev == dev) {
+			rw_exit(&zvol_state_lock);
+			return (zv);
+		}
+		mutex_exit(&zv->zv_state_lock);
+	}
+	rw_exit(&zvol_state_lock);
+
+	return (NULL);
+}
+
+void
+zvol_os_validate_dev(zvol_state_t *zv)
+{
+	ASSERT3U(MINOR(zv->zv_zso->zvo_dev) & ZVOL_MINOR_MASK, ==, 0);
+}
+
+/*
+ * Allocate memory for a new zvol_state_t and setup the required
+ * request queue and generic disk structures for the block device.
+ */
+static zvol_state_t *
+zvol_os_alloc(dev_t dev, const char *name)
+{
+	zvol_state_t *zv;
+	struct zvol_state_os *zso;
+	uint64_t volmode;
+
+	printf("%s\n", __func__);
+	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
+		return (NULL);
+
+	printf("%s 2\n", __func__);
+	if (volmode == ZFS_VOLMODE_DEFAULT)
+		volmode = zvol_volmode;
+
+	printf("%s 3\n", __func__);
+	if (volmode == ZFS_VOLMODE_NONE)
+		return (NULL);
+
+	printf("%s 4\n", __func__);
+	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
+	zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
+	zv->zv_zso = zso;
+
+	list_link_init(&zv->zv_next);
+	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	zv->zv_open_count = 0;
+	strlcpy(zv->zv_name, name, MAXNAMELEN);
+
+	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
+	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
+
+	return (zv);
+#if 0
+out_kmem:
+	kmem_free(zso, sizeof (struct zvol_state_os));
+	kmem_free(zv, sizeof (zvol_state_t));
+	return (NULL);
+#endif
+}
+
+/*
+ * Cleanup then free a zvol_state_t which was created by zvol_alloc().
+ * At this time, the structure is not opened by anyone, is taken off
+ * the zvol_state_list, and has its private data set to NULL.
+ * The zvol_state_lock is dropped.
+ *
+ */
+static void
+zvol_os_free(zvol_state_t *zv)
+{
+	printf("%s\n", __func__);
+
+	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT(zv->zv_open_count == 0);
+
+	rw_destroy(&zv->zv_suspend_lock);
+	zfs_rangelock_fini(&zv->zv_rangelock);
+
+	mutex_destroy(&zv->zv_state_lock);
+
+	kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
+	kmem_free(zv, sizeof (zvol_state_t));
+}
+
+
+
+/*
+ * Create a block device minor node and setup the linkage between it
+ * and the specified volume.  Once this function returns the block
+ * device is live and ready for use.
+ */
+static int
+zvol_os_create_minor(const char *name)
+{
+	zvol_state_t *zv;
+	objset_t *os;
+	dmu_object_info_t *doi;
+	uint64_t volsize;
+	unsigned minor = 0;
+	int error = 0;
+	uint64_t hash = zvol_name_hash(name);
+
+	printf("%s\n", __func__);
+
+	if (zvol_inhibit_dev)
+		return (0);
+
+	// minor?
+	zv = zvol_find_by_name_hash(name, hash, RW_NONE);
+	if (zv) {
+		ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+		mutex_exit(&zv->zv_state_lock);
+		return (SET_ERROR(EEXIST));
+	}
+
+	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
+
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
+	if (error)
+		goto out_doi;
+
+	error = dmu_object_info(os, ZVOL_OBJ, doi);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
+	if (error)
+		goto out_dmu_objset_disown;
+
+	zv = zvol_os_alloc(makedevice(zvol_major, minor), name);
+	if (zv == NULL) {
+		error = SET_ERROR(EAGAIN);
+		goto out_dmu_objset_disown;
+	}
+	zv->zv_hash = hash;
+
+	if (dmu_objset_is_snapshot(os))
+		zv->zv_flags |= ZVOL_RDONLY;
+
+	zv->zv_volblocksize = doi->doi_data_block_size;
+	zv->zv_volsize = volsize;
+	zv->zv_objset = os;
+
+	// set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
+
+	if (spa_writeable(dmu_objset_spa(os))) {
+		if (zil_replay_disable)
+			zil_destroy(dmu_objset_zil(os), B_FALSE);
+		else
+			zil_replay(os, zv, zvol_replay_vector);
+	}
+
+	/* Create the IOKit zvol while owned */
+	if ((error = zvolCreateNewDevice(zv)) != 0) {
+		dprintf("%s zvolCreateNewDevice error %d\n",
+		    __func__, error);
+	}
+
+	zv->zv_objset = NULL;
+out_dmu_objset_disown:
+	dmu_objset_disown(os, B_TRUE, FTAG);
+out_doi:
+	kmem_free(doi, sizeof (dmu_object_info_t));
+
+	if (error == 0) {
+		rw_enter(&zvol_state_lock, RW_WRITER);
+		zvol_insert(zv);
+		rw_exit(&zvol_state_lock);
+
+		/* Register (async) IOKit zvol after disown and unlock */
+		/* The callback with release the mutex */
+		zvol_os_spawn(zvol_os_register_device_cb, zv);
+
+	} else {
+
+	}
+
+	printf("%s complete\n", __func__);
+	return (error);
+}
+
+
+static void zvol_os_rename_device_cb(void *param)
+{
+	zvol_state_t *zv = (zvol_state_t *)param;
+	if (zvol_os_verify_and_lock(zv) == 0)
+		return;
+	zvolRenameDevice(zv);
+	mutex_exit(&zv->zv_state_lock);
+	rw_exit(&zv->zv_suspend_lock);
+}
+
+static void
+zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
+{
+	// int readonly = get_disk_ro(zv->zv_zso->zvo_disk);
+
+	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+
+	/* move to new hashtable entry  */
+	zv->zv_hash = zvol_name_hash(zv->zv_name);
+	hlist_del(&zv->zv_hlink);
+	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+
+	zvol_os_spawn(zvol_os_rename_device_cb, zv);
+
+	/*
+	 * The block device's read-only state is briefly changed causing
+	 * a KOBJ_CHANGE uevent to be issued.  This ensures udev detects
+	 * the name change and fixes the symlinks.  This does not change
+	 * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
+	 * changes.  This would normally be done using kobject_uevent() but
+	 * that is a GPL-only symbol which is why we need this workaround.
+	 */
+	// set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
+	// set_disk_ro(zv->zv_zso->zvo_disk, readonly);
+}
+
+static void
+zvol_os_set_disk_ro(zvol_state_t *zv, int flags)
+{
+	// set_disk_ro(zv->zv_zso->zvo_disk, flags);
+}
+
+static void
+zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity)
+{
+	// set_capacity(zv->zv_zso->zvo_disk, capacity);
+}
+
+int
+zvol_os_open_zv(zvol_state_t *zv, int flag, int otyp, struct proc *p)
+{
+	int error = 0;
+
+	printf("%s\n", __func__);
+
+	/*
+	 * make sure zvol is not suspended during first open
+	 * (hold zv_suspend_lock) and respect proper lock acquisition
+	 * ordering - zv_suspend_lock before zv_state_lock
+	 */
+	if (zvol_os_verify_and_lock(zv) == 0)
+		return (SET_ERROR(ENOENT));
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
+
+	if (zv->zv_open_count == 0) {
+		error = zvol_first_open(zv, !(flag & FWRITE));
+		if (error)
+			goto out_mutex;
+	}
+
+	if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+		error = EROFS;
+		goto out_open_count;
+	}
+
+	zv->zv_open_count++;
+
+	mutex_exit(&zv->zv_state_lock);
+
+	rw_exit(&zv->zv_suspend_lock);
+
+	return (0);
+
+out_open_count:
+	if (zv->zv_open_count == 0)
+		zvol_last_close(zv);
+
+out_mutex:
+	mutex_exit(&zv->zv_state_lock);
+
+	rw_exit(&zv->zv_suspend_lock);
+	if (error == EINTR) {
+		error = ERESTART;
+		schedule();
+	}
+	return (SET_ERROR(error));
+}
+
+int
+zvol_os_open(dev_t devp, int flag, int otyp, struct proc *p)
+{
+	zvol_state_t *zv;
+	int error = 0;
+
+	printf("%s\n", __func__);
+
+	if (!getminor(devp))
+		return (0);
+
+	zv = zvol_os_find_by_dev(devp);
+	if (zv == NULL) {
+		return (SET_ERROR(ENXIO));
+	}
+
+	error = zvol_os_open_zv(zv, flag, otyp, p);
+
+	mutex_exit(&zv->zv_state_lock);
+	return (SET_ERROR(error));
+}
+
+int
+zvol_os_close_zv(zvol_state_t *zv, int flag, int otyp, struct proc *p)
+{
+	printf("%s\n", __func__);
+
+	if (zvol_os_verify_and_lock(zv) == 0)
+		return (SET_ERROR(ENOENT));
+
+	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+	ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
+
+	zv->zv_open_count--;
+
+	if (zv->zv_open_count == 0)
+		zvol_last_close(zv);
+
+	mutex_exit(&zv->zv_state_lock);
+	rw_exit(&zv->zv_suspend_lock);
+
+	return (0);
+}
+
+int
+zvol_os_close(dev_t dev, int flag, int otyp, struct proc *p)
+{
+	zvol_state_t *zv;
+	int error = 0;
+
+	printf("%s\n", __func__);
+
+	if (!getminor(dev))
+		return (0);
+
+	zv = zvol_os_find_by_dev(dev);
+	if (zv == NULL) {
+		return (SET_ERROR(-ENXIO));
+	}
+
+	error = zvol_os_close_zv(zv, flag, otyp, p);
+
+	mutex_exit(&zv->zv_state_lock);
+	return (0);
+}
+
+void
+zvol_os_strategy(struct buf *bp)
+{
+
+}
+
+int
+zvol_os_get_volume_blocksize(dev_t dev)
+{
+	/* XNU can only handle two sizes. */
+	return (DEV_BSIZE);
+}
+
+int
+zvol_os_ioctl(dev_t dev, unsigned long cmd, caddr_t data, int isblk,
+    cred_t *cr, int *rvalp)
+{
+	int error = 0;
+	u_int32_t *f;
+	u_int64_t *o;
+	zvol_state_t *zv = NULL;
+
+	printf("%s\n", __func__);
+
+	if (!getminor(dev))
+		return (ENXIO);
+
+	zv = zvol_os_find_by_dev(dev);
+
+	if (zv == NULL) {
+		dprintf("zv is NULL\n");
+		return (ENXIO);
+	}
+
+	f = (u_int32_t *)data;
+	o = (u_int64_t *)data;
+
+	switch (cmd) {
+
+		case DKIOCGETMAXBLOCKCOUNTREAD:
+			dprintf("DKIOCGETMAXBLOCKCOUNTREAD\n");
+			*o = 32;
+			break;
+
+		case DKIOCGETMAXBLOCKCOUNTWRITE:
+			dprintf("DKIOCGETMAXBLOCKCOUNTWRITE\n");
+			*o = 32;
+			break;
+		case DKIOCGETMAXSEGMENTCOUNTREAD:
+			dprintf("DKIOCGETMAXSEGMENTCOUNTREAD\n");
+			*o = 32;
+			break;
+
+		case DKIOCGETMAXSEGMENTCOUNTWRITE:
+			dprintf("DKIOCGETMAXSEGMENTCOUNTWRITE\n");
+			*o = 32;
+			break;
+
+		case DKIOCGETBLOCKSIZE:
+			dprintf("DKIOCGETBLOCKSIZE: %llu\n",
+			    zv->zv_volblocksize);
+			*f = zv->zv_volblocksize;
+			break;
+
+		case DKIOCSETBLOCKSIZE:
+			dprintf("DKIOCSETBLOCKSIZE %lu\n", *f);
+
+			if (!isblk) {
+				/* We can only do this for a block device */
+				error = ENODEV;
+				break;
+			}
+
+			if (zvol_check_volblocksize(zv->zv_name,
+			    (uint64_t)*f)) {
+				error = EINVAL;
+				break;
+			}
+
+			/* set the new block size */
+			zv->zv_volblocksize = (uint64_t)*f;
+			dprintf("setblocksize changed: %llu\n",
+			    zv->zv_volblocksize);
+			break;
+
+		case DKIOCISWRITABLE:
+			dprintf("DKIOCISWRITABLE\n");
+			if (zv && (zv->zv_flags & ZVOL_RDONLY))
+				*f = 0;
+			else
+				*f = 1;
+			break;
+#ifdef DKIOCGETBLOCKCOUNT32
+		case DKIOCGETBLOCKCOUNT32:
+			dprintf("DKIOCGETBLOCKCOUNT32: %lu\n",
+			    (uint32_t)zv->zv_volsize / zv->zv_volblocksize);
+			*f = (uint32_t)zv->zv_volsize / zv->zv_volblocksize;
+			break;
+#endif
+
+		case DKIOCGETBLOCKCOUNT:
+			dprintf("DKIOCGETBLOCKCOUNT: %llu\n",
+			    zv->zv_volsize / zv->zv_volblocksize);
+			*o = (uint64_t)zv->zv_volsize / zv->zv_volblocksize;
+			break;
+
+		case DKIOCGETBASE:
+			dprintf("DKIOCGETBASE\n");
+			/*
+			 * What offset should we say?
+			 * 0 is ok for FAT but to HFS
+			 */
+			*o = zv->zv_volblocksize * 0;
+			break;
+
+		case DKIOCGETPHYSICALBLOCKSIZE:
+			dprintf("DKIOCGETPHYSICALBLOCKSIZE\n");
+			*f = zv->zv_volblocksize;
+			break;
+
+#ifdef DKIOCGETTHROTTLEMASK
+		case DKIOCGETTHROTTLEMASK:
+			dprintf("DKIOCGETTHROTTLEMASK\n");
+			*o = 0;
+			break;
+#endif
+
+		case DKIOCGETMAXBYTECOUNTREAD:
+			*o = SPA_MAXBLOCKSIZE;
+			break;
+
+		case DKIOCGETMAXBYTECOUNTWRITE:
+			*o = SPA_MAXBLOCKSIZE;
+			break;
+#ifdef DKIOCUNMAP
+		case DKIOCUNMAP:
+			dprintf("DKIOCUNMAP\n");
+			*f = 1;
+			break;
+#endif
+
+		case DKIOCGETFEATURES:
+			*f = 0;
+			break;
+
+#ifdef DKIOCISSOLIDSTATE
+		case DKIOCISSOLIDSTATE:
+			dprintf("DKIOCISSOLIDSTATE\n");
+			*f = 0;
+			break;
+#endif
+
+		case DKIOCISVIRTUAL:
+			*f = 1;
+			break;
+
+		case DKIOCGETMAXSEGMENTBYTECOUNTREAD:
+			*o = 32 * zv->zv_volblocksize;
+			break;
+
+		case DKIOCGETMAXSEGMENTBYTECOUNTWRITE:
+			*o = 32 * zv->zv_volblocksize;
+			break;
+
+		case DKIOCSYNCHRONIZECACHE:
+			dprintf("DKIOCSYNCHRONIZECACHE\n");
+			break;
+
+		default:
+			dprintf("unknown ioctl: ENOTTY\n");
+			error = ENOTTY;
+			break;
+	}
+
+	mutex_exit(&zv->zv_state_lock);
+
+	return (SET_ERROR(error));
+}
+
+const static zvol_platform_ops_t zvol_macos_ops = {
+	.zv_free = zvol_os_free,
+	.zv_rename_minor = zvol_os_rename_minor,
+	.zv_create_minor = zvol_os_create_minor,
+	.zv_update_volsize = zvol_os_update_volsize,
+	.zv_clear_private = zvol_os_clear_private,
+	.zv_is_zvol = zvol_os_is_zvol,
+	.zv_set_disk_ro = zvol_os_set_disk_ro,
+	.zv_set_capacity = zvol_os_set_capacity,
+};
+
+int
+zvol_init(void)
+{
+	int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
+	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	if (zvol_taskq == NULL) {
+		return (-ENOMEM);
+	}
+
+	zvol_init_impl();
+	zvol_register_ops(&zvol_macos_ops);
+	return (0);
+}
+
+void
+zvol_fini(void)
+{
+	zvol_fini_impl();
+	taskq_destroy(zvol_taskq);
+}
diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c
index 7eb6b2ff9c..af74b28d1e 100644
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@@ -221,7 +221,8 @@ zfs_mod_supported_feature(const char *name)
 	 * The equivalent _can_ be done on FreeBSD by way of the sysctl
 	 * tree, but this has not been done yet.
 	 */
-#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD) || defined(__FreeBSD__) || defined(__APPLE__)
+#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD) || \
+	defined(__FreeBSD__) || defined(__APPLE__)
 	return (B_TRUE);
 #else
 	return (zfs_mod_supported(ZFS_SYSFS_POOL_FEATURES, name));
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 1bce6f55f6..6b000471bc 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -515,8 +515,9 @@ zfs_prop_init(void)
 	    "RSNAPS");
 
 #ifdef __APPLE__
-	zprop_register_index(ZFS_PROP_BROWSE, "com.apple.browse", 1,PROP_INHERIT,
-	    ZFS_TYPE_FILESYSTEM, "on | off", "COM.APPLE.BROWSE", boolean_table);
+	zprop_register_index(ZFS_PROP_BROWSE, "com.apple.browse", 1,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "COM.APPLE.BROWSE",
+	    boolean_table);
 	zprop_register_index(ZFS_PROP_IGNOREOWNER, "com.apple.ignoreowner", 0,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off",
 	    "COM.APPLE.IGNOREOWNER", boolean_table);
@@ -648,7 +649,7 @@ zfs_prop_init(void)
 	 * that we don't have to change the values of the zfs_prop_t enum, or
 	 * have NULL pointers in the zfs_prop_table[].
 	 */
-#if defined (__FreeBSD__) || defined (__APPLE__)
+#if defined(__FreeBSD__) || defined(__APPLE__)
 	zprop_register_impl(ZFS_PROP_ACLTYPE, "acltype", PROP_TYPE_INDEX,
 	    ZFS_ACLTYPE_OFF, NULL, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c
index bc02716cb0..debec63433 100644
--- a/module/zcommon/zprop_common.c
+++ b/module/zcommon/zprop_common.c
@@ -77,7 +77,8 @@ zfs_mod_supported_prop(const char *name, zfs_type_t type)
  * The equivalent _can_ be done on FreeBSD by way of the sysctl
  * tree, but this has not been done yet.
  */
-#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD) || defined(__FreeBSD__) || defined(__APPLE__)
+#if defined(_KERNEL) || defined(LIB_ZPOOL_BUILD) || \
+	defined(__FreeBSD__) || defined(__APPLE__)
 	return (B_TRUE);
 #else
 	return (zfs_mod_supported(type == ZFS_TYPE_POOL ?
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index db5ae8d8c8..af9b3cc230 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1385,7 +1385,7 @@ extern uint64_t zvolIO_kit_write(struct iomem *iomem, uint64_t offset,
 
 int
 dmu_read_iokit_dnode(dnode_t *dn, uint64_t *offset,
-	uint64_t position, uint64_t *size, struct iomem *iomem)
+    uint64_t position, uint64_t *size, struct iomem *iomem)
 {
 	int err;
 
@@ -1551,12 +1551,12 @@ dmu_write_iokit_dnode(dnode_t *dn, uint64_t *offset, uint64_t position,
 	int err = 0;
 	int i;
 
-    err = dmu_buf_hold_array_by_dnode(dn, *offset+position, *size,
+	err = dmu_buf_hold_array_by_dnode(dn, *offset+position, *size,
 	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
 	if (err)
 		return (err);
 
-	while(*size > 0) {
+	while (*size > 0) {
 
 		for (i = 0; i < numbufs; i++) {
 			int tocpy;
@@ -1569,7 +1569,8 @@ dmu_write_iokit_dnode(dnode_t *dn, uint64_t *offset, uint64_t position,
 			bufoff = (position + *offset) - db->db_offset;
 			tocpy = (int)MIN(db->db_size - bufoff, *size);
 
-			ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
+			ASSERT(i == 0 || i == numbufs-1 ||
+			    tocpy == db->db_size);
 
 			if (tocpy == db->db_size)
 				dmu_buf_will_fill(db, tx);
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 68a4d9d907..2101d8b135 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1930,8 +1930,8 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
 }
 
 
-#if !defined (__OPTIMIZE__) && defined (__APPLE__)
-#warning "dsl_scan_visitbp is known to panic during scrubs without optimize (-O)"
+#if !defined(__OPTIMIZE__) && defined(__APPLE__) && defined(_KERNEL)
+#warning "dsl_scan_visitbp will panic during scrubs without optimize (-O)"
 #endif
 
 /*
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 9d6854380c..37ea7124d0 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1262,7 +1262,7 @@ spa_activate(spa_t *spa, spa_mode_t mode)
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 
-#if defined (_KERNEL) && defined (__APPLE__)
+#if defined(_KERNEL) && defined(__APPLE__)
 	spa_activate_os(spa);
 #endif
 
@@ -1400,7 +1400,7 @@ spa_deactivate(spa_t *spa)
 		spa->spa_did = 0;
 	}
 
-#if defined (_KERNEL) && defined (__APPLE__)
+#if defined(_KERNEL) && defined(__APPLE__)
 	spa_deactivate_os(spa);
 #endif
 
@@ -5923,7 +5923,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 	spa->spa_minref = zfs_refcount_count(&spa->spa_refcount);
 	spa->spa_load_state = SPA_LOAD_NONE;
 
-#if defined (__APPLE__) && defined (_KERNEL)
+#if defined(__APPLE__) && defined(_KERNEL)
 	spa_create_os(spa);
 #endif
 
@@ -6112,7 +6112,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
 
 	zvol_create_minors_recursive(pool);
 
-#if defined (__APPLE__) && defined (_KERNEL)
+#if defined(__APPLE__) && defined(_KERNEL)
 	spa_create_os(spa);
 #endif
 
@@ -6352,7 +6352,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
 	}
 
 export_spa:
-#if defined (__APPLE__) && defined (_KERNEL)
+#if defined(__APPLE__) && defined(_KERNEL)
 	spa_export_os(spa);
 #endif
 
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index f453caf5c8..79f467b2b7 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -1084,7 +1084,8 @@ zfs_ereport_snapshot_post(const char *subclass, spa_t *spa, const char *name)
 	    subclass,
 	    spa, NULL, NULL, NULL, 0, 0);
 
-	if (ereport == NULL) return;
+	if (ereport == NULL)
+		return;
 
 	VERIFY0(nvlist_add_string(ereport, "snapshot_name", name));
 
diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c
index dea9ffe4ca..f1e8d0cd51 100644
--- a/module/zfs/zfs_log.c
+++ b/module/zfs/zfs_log.c
@@ -257,14 +257,16 @@ zfs_xattr_owner_unlinked(znode_t *zp)
 	if (tzp != zp)
 		zrele(tzp);
 #elif __APPLE__
+	VERIFY(ZTOV(zp) != NULL);
 	if (VN_HOLD(ZTOV(zp)) == 0) {
 		/*
-		 * if zp is XATTR node, keep walking up via z_xattr_parent until we
-		 * get the owner
+		 * if zp is XATTR node, keep walking up via z_xattr_parent
+		 * until we get the owner
 		 */
 		while (zp->z_pflags & ZFS_XATTR) {
 			ASSERT3U(zp->z_xattr_parent, !=, 0);
-			if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
+			if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent,
+			    &dzp) != 0) {
 				unlinked = 1;
 				break;
 			}
diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c
index 5b215c0470..83be2ab817 100644
--- a/module/zfs/zfs_replay.c
+++ b/module/zfs/zfs_replay.c
@@ -71,7 +71,7 @@ zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
 	bzero(vap, sizeof (*vap));
 	vap->va_mask = (uint_t)mask;
 	vap->va_mode = mode;
-#if defined (__FreeBSD__) || defined (__APPLE__)
+#if defined(__FreeBSD__) || defined(__APPLE__)
 	vap->va_type = IFTOVT(mode);
 #endif
 	vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c
index 9f67112148..e7ca31fcba 100644
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -66,9 +66,9 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
 	{"ZPL_DACL_ACES", 0, SA_ACL, 0},
 	{"ZPL_DXATTR", 0, SA_UINT8_ARRAY, 0},
 	{"ZPL_PROJID", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
-#if defined (__APPLE__)
-    {"ZPL_ADDTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
-    {"ZPL_DOCUMENTID", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
+#if defined(__APPLE__)
+	{"ZPL_ADDTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
+	{"ZPL_DOCUMENTID", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
 #endif
 	{NULL, 0, 0, 0}
 };
diff --git a/scripts/cmd-macos.sh b/scripts/cmd-macos.sh
new file mode 100755
index 0000000000..a80592b3a5
--- /dev/null
+++ b/scripts/cmd-macos.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if test x"$#" = "x0" ; then
+	printf "You need to supply a command.\n"
+	exit 1
+fi
+
+cmd=$1
+shift
+
+READLINK=`which greadlink 2>/dev/null`
+if test x$READLINK = "x" ; then
+	READLINK=`which readlink 2>/dev/null`
+fi
+
+if ! test x$READLINK = "x" ; then
+	$READLINK -f . > /dev/null 2>&1
+	if ! test x$? = "x0" ; then
+		unset READLINK
+	else
+		CANONICALIZE="$READLINK -f"
+	fi
+fi
+
+if test x$READLINK = "x" ; then
+	REALPATH=`which grealpath 2>/dev/null`
+	if test x$REALPATH = "x" ; then
+		REALPATH=`which realpath 2>/dev/null`
+	fi
+	if test x$REALPATH = "x" ; then
+		CANONICALIZE=readlink
+	else
+		CANONICALIZE=$REALPATH
+	fi
+fi
+
+topdir=`dirname "$($CANONICALIZE "$0")"`
+
+if test x$topdir = x"." ; then
+	if ! test -f zfs.release.in ; then
+		printf "cd into the zfs source directory or install GNU readlink or realpath.\n"
+		printf "Homebrew: brew install coreutils\n"
+		printf "MacPorts: port install coreutils\n"
+		printf "Gentoo Prefix: emerge sys-apps/coreutils\n"
+		exit 1
+	fi
+fi
+
+topdir=$topdir/../
+
+for lib in nvpair uutil zpool zfs zfs_core diskmgt; do
+	export DYLD_LIBRARY_PATH=$topdir/lib/lib${lib}/.libs:$DYLD_LIBRARY_PATH
+done
+for c in zdb zfs zpool ztest; do
+	export PATH=${topdir}/cmd/${c}/.libs:$PATH
+done
+
+#echo PATH=$PATH
+#echo DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH
+exec ${topdir}/cmd/$cmd/.libs/$cmd "$@"
diff --git a/scripts/debug-macos.sh b/scripts/debug-macos.sh
new file mode 100755
index 0000000000..1f049df437
--- /dev/null
+++ b/scripts/debug-macos.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if test x"$#" = "x0" ; then
+	printf "You need to supply a command.\n"
+	exit 1
+fi
+
+cmd=$1
+shift
+
+READLINK=`which greadlink 2>/dev/null`
+if test x$READLINK = "x" ; then
+	READLINK=`which readlink 2>/dev/null`
+fi
+
+if ! test x$READLINK = "x" ; then
+	$READLINK -f . > /dev/null 2>&1
+	if ! test x$? = "x0" ; then
+		unset READLINK
+	else
+		CANONICALIZE="$READLINK -f"
+	fi
+fi
+
+if test x$READLINK = "x" ; then
+	REALPATH=`which grealpath 2>/dev/null`
+	if test x$REALPATH = "x" ; then
+		REALPATH=`which realpath 2>/dev/null`
+	fi
+	if test x$REALPATH = "x" ; then
+		CANONICALIZE=readlink
+	else
+		CANONICALIZE=$REALPATH
+	fi
+fi
+
+topdir=`dirname "$($CANONICALIZE "$0")"`
+
+if test x$topdir = x"." ; then
+	if ! test -f zfs.release.in ; then
+		printf "cd into the zfs source directory or install GNU readlink or realpath.\n"
+		printf "Homebrew: brew install coreutils\n"
+		printf "MacPorts: port install coreutils\n"
+		printf "Gentoo Prefix: emerge sys-apps/coreutils\n"
+		exit 1
+	fi
+fi
+
+topdir=$topdir/../
+
+for lib in nvpair uutil zpool zfs zfs_core diskmgt; do
+	export DYLD_LIBRARY_PATH=$topdir/lib/lib${lib}/.libs:$DYLD_LIBRARY_PATH
+done
+for c in zdb zfs zpool ztest; do
+	export PATH=${topdir}/cmd/${c}/.libs:$PATH
+done
+
+#echo PATH=$PATH
+#echo DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH
+exec lldb ${topdir}/cmd/$cmd/.libs/$cmd "$@"
diff --git a/scripts/load_macos.sh b/scripts/load_macos.sh
new file mode 100755
index 0000000000..2e8a1880b7
--- /dev/null
+++ b/scripts/load_macos.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Expected to be run from the root of the source tree, as root;
+# ./scripts/load_macos.sh
+#
+# Copies compiled zfs.kext to /tmp/ and prepares the requirements
+# for load.
+#
+
+rsync -ar module/os/macos/zfs/zfs.kext/ /tmp/zfs.kext/
+
+chown -R root:wheel /tmp/zfs.kext
+
+kextload -v /tmp/zfs.kext || kextutil /tmp/zfs.kext
+
+# log stream --source --predicate 'sender == "zfs"' --style compact
+