diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index a2c47d65a5aa..15bea3c22cc4 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,15 +2,9 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel -dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD. -dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD -dnl # -dnl # 5.0: As an optimization SIMD operations performed by kernel -dnl # threads can skip saving and restoring their FPU context. -dnl # Wrappers have been introduced to determine the running -dnl # context and use either the SIMD or generic implementation. +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. -dnl # HAVE_KERNEL_FPU_INITIALIZED +dnl # HAVE_KERNEL_FPU_INTERNAL dnl # dnl # 4.2: Use __kernel_fpu_{begin,end}() dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU @@ -61,22 +55,40 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ __kernel_fpu_end(); ], [], [$ZFS_META_LICENSE]) - ZFS_LINUX_TEST_SRC([fpu_initialized], [ - #include - #include - ],[ - struct fpu *fpu = ¤t->thread.fpu; - if (fpu->initialized) { return (0); }; - ]) + ZFS_LINUX_TEST_SRC([fpu_internal], [ + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif - ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [ - #include - #include + #if !defined(__x86) + #error Unsupported architecture + #endif - #if !defined(TIF_NEED_FPU_LOAD) - #error "TIF_NEED_FPU_LOAD undefined" + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + #if !defined(XSTATE_XSAVE) + #error XSTATE_XSAVE not defined #endif - ],[]) + + #if !defined(XSTATE_XRESTORE) + #error XSTATE_XRESTORE not defined + #endif + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->state; + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_FPU], [ @@ -104,25 +116,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ - dnl # - dnl # Linux 5.0 kernel - dnl # - ZFS_LINUX_TEST_RESULT([fpu_initialized], [ - AC_MSG_RESULT(fpu.initialized) - AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1, - [kernel fpu.initialized exists]) + ZFS_LINUX_TEST_RESULT([fpu_internal], [ + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) ],[ - dnl # - dnl # Linux 5.2 kernel - dnl # - ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [ - AC_MSG_RESULT(TIF_NEED_FPU_LOAD) - AC_DEFINE( - HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1, - [kernel TIF_NEED_FPU_LOAD exists]) - ],[ - AC_MSG_RESULT(unavailable) - ]) + AC_MSG_RESULT(unavailable) ]) ]) ]) diff --git a/include/os/linux/kernel/linux/simd.h b/include/os/linux/kernel/linux/simd.h index 1f6574a90e49..ce317d52e604 100644 --- a/include/os/linux/kernel/linux/simd.h +++ b/include/os/linux/kernel/linux/simd.h @@ -33,9 +33,10 @@ #else #define kfpu_allowed() 0 -#define kfpu_initialize(tsk) do {} while (0) #define kfpu_begin() do {} while (0) #define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #endif #endif /* _LINUX_SIMD_H */ diff --git a/include/os/linux/kernel/linux/simd_aarch64.h b/include/os/linux/kernel/linux/simd_aarch64.h index ac530d920015..50937e97ced1 100644 --- a/include/os/linux/kernel/linux/simd_aarch64.h +++ b/include/os/linux/kernel/linux/simd_aarch64.h @@ -27,9 +27,10 @@ * * Kernel fpu methods: * kfpu_allowed() - * kfpu_initialize() * kfpu_begin() * kfpu_end() + * kfpu_init() + * kfpu_fini() */ #ifndef _LINUX_SIMD_AARCH64_H @@ -43,9 +44,10 @@ #include #define kfpu_allowed() 1 -#define kfpu_initialize(tsk) do {} while (0) #define kfpu_begin() kernel_neon_begin() #define kfpu_end() kernel_neon_end() +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #endif /* __aarch64__ */ diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index c59ba4174d97..5614ad1907d8 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -27,9 +27,10 @@ * * Kernel fpu methods: * kfpu_allowed() - * kfpu_initialize() * kfpu_begin() * kfpu_end() + * kfpu_init() + * kfpu_fini() * * SIMD support: * @@ -99,7 +100,8 @@ #if defined(KERNEL_EXPORTS_X86_FPU) #define kfpu_allowed() 1 -#define kfpu_initialize(tsk) do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) #if defined(HAVE_UNDERSCORE_KERNEL_FPU) #define kfpu_begin() \ @@ -126,45 +128,100 @@ #endif #else /* defined(KERNEL_EXPORTS_X86_FPU) */ + /* * When the kernel_fpu_* symbols are unavailable then provide our own - * versions which allow the FPU to be safely used in kernel threads. - * In practice, this is not a significant restriction for ZFS since the - * vast majority of SIMD operations are performed by the IO pipeline. + * versions which allow the FPU to be safely used. */ +#if defined(HAVE_KERNEL_FPU_INTERNAL) + +extern union fpregs_state **zfs_kfpu_fpregs; /* - * Returns non-zero if FPU operations are allowed in the current context. + * Initialize per-cpu variables to store FPU state. */ -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ - test_thread_flag(TIF_NEED_FPU_LOAD)) -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ - current->thread.fpu.initialized) -#else -#define kfpu_allowed() 0 -#endif +static inline void +kfpu_fini(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (zfs_kfpu_fpregs[cpu] != NULL) { + kfree(zfs_kfpu_fpregs[cpu]); + } + } + + kfree(zfs_kfpu_fpregs); +} + +static inline int +kfpu_init(void) +{ + int cpu; + + zfs_kfpu_fpregs = kzalloc(num_possible_cpus() * + sizeof (union fpregs_state *), GFP_KERNEL); + if (zfs_kfpu_fpregs == NULL) + return (-ENOMEM); + + for_each_possible_cpu(cpu) { + zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (union fpregs_state), + GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); + if (zfs_kfpu_fpregs[cpu] == NULL) { + kfpu_fini(); + return (-ENOMEM); + } + } + + return (0); +} + +#define kfpu_allowed() 1 +#define ex_handler_fprestore ex_handler_default + +/* + * FPU save and restore instructions. + */ +#define __asm __asm__ __volatile__ +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ + : : [addr] "m" (rval)); static inline void -kfpu_initialize(void) +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) { - WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); + uint32_t low, hi; + int err; -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) - __fpu_invalidate_fpregs_state(¤t->thread.fpu); - set_thread_flag(TIF_NEED_FPU_LOAD); -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) - __fpu_invalidate_fpregs_state(¤t->thread.fpu); - current->thread.fpu.initialized = 1; -#endif + low = mask; + hi = mask >> 32; + XSTATE_XSAVE(addr, low, hi, err); + WARN_ON_ONCE(err); } static inline void -kfpu_begin(void) +kfpu_save_fxsr(struct fxregs_state *addr) { - WARN_ON_ONCE(!kfpu_allowed()); + if (IS_ENABLED(CONFIG_X86_32)) + kfpu_fxsave(addr); + else + kfpu_fxsaveq(addr); +} +static inline void +kfpu_save_fsave(struct fregs_state *addr) +{ + kfpu_fnsave(addr); +} + +static inline void +kfpu_begin(void) +{ /* * Preemption and interrupts must be disabled for the critical * region where the FPU state is being modified. @@ -172,50 +229,90 @@ kfpu_begin(void) preempt_disable(); local_irq_disable(); -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) /* * The current FPU registers need to be preserved by kfpu_begin() - * and restored by kfpu_end(). This is required because we can - * not call __cpu_invalidate_fpregs_state() to invalidate the - * per-cpu FPU state and force them to be restored during a - * context switch. + * and restored by kfpu_end(). They are stored in a dedicated + * per-cpu variable, not in the task struct, this allows any user + * FPU state to be correctly preserved and restored. */ - copy_fpregs_to_fpstate(¤t->thread.fpu); -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; + + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_save_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(&state->fxsave); + } else { + kfpu_save_fsave(&state->fsave); + } +} + +static inline void +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + XSTATE_XRESTORE(addr, low, hi); +} + +static inline void +kfpu_restore_fxsr(struct fxregs_state *addr) +{ /* - * There is no need to preserve and restore the FPU registers. - * They will always be restored from the task's stored FPU state - * when switching contexts. + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only + * restores the _x87 FOP, FIP, and FDP registers when an exception + * is pending. Clean the _x87 state to force the restore. */ - WARN_ON_ONCE(current->thread.fpu.initialized == 0); -#endif + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) + kfpu_fxsr_clean(addr); + + if (IS_ENABLED(CONFIG_X86_32)) { + kfpu_fxrstor(addr); + } else { + kfpu_fxrstorq(addr); + } +} + +static inline void +kfpu_restore_fsave(struct fregs_state *addr) +{ + kfpu_frstor(addr); } static inline void kfpu_end(void) { -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) - union fpregs_state *state = ¤t->thread.fpu.state; - int error; + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; - if (use_xsave()) { - error = copy_kernel_to_xregs_err(&state->xsave, -1); - } else if (use_fxsr()) { - error = copy_kernel_to_fxregs_err(&state->fxsave); + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_restore_xsave(&state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_restore_fxsr(&state->fxsave); } else { - error = copy_kernel_to_fregs_err(&state->fsave); + kfpu_restore_fsave(&state->fsave); } - WARN_ON_ONCE(error); -#endif local_irq_enable(); preempt_enable(); } -#endif /* defined(HAVE_KERNEL_FPU) */ + +#else + +/* + * FPU support is unavailable. + */ +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) + +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */ /* * Linux kernel provides an interface for CPU feature testing. */ + /* * Detect register set support */ diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h index c3d165c8b124..a029127914b2 100644 --- a/include/sys/zio_crypt.h +++ b/include/sys/zio_crypt.h @@ -107,11 +107,11 @@ void zio_crypt_key_destroy(zio_crypt_key_t *key); int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key); int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); -int zio_crypt_key_wrap(spa_t *spa, crypto_key_t *cwkey, zio_crypt_key_t *key, - uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); -int zio_crypt_key_unwrap(spa_t *spa, crypto_key_t *cwkey, uint64_t crypt, - uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, - uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key); +int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key); int zio_crypt_generate_iv(uint8_t *ivbuf); int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *ivbuf, uint8_t *salt); @@ -132,11 +132,11 @@ int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *digestbuf, uint_t digestlen); int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac); -int zio_do_crypt_data(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, +int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt); -int zio_do_crypt_abd(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, +int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); diff --git a/lib/libspl/include/sys/simd.h b/lib/libspl/include/sys/simd.h index 6a2b3a0226f6..b25e476a33b8 100644 --- a/lib/libspl/include/sys/simd.h +++ b/lib/libspl/include/sys/simd.h @@ -34,9 +34,10 @@ #include #define kfpu_allowed() 1 -#define kfpu_initialize(tsk) do {} while (0) #define kfpu_begin() do {} while (0) #define kfpu_end() do {} while (0) +#define kfpu_init() 0 +#define kfpu_fini() ((void) 0) /* * CPUID feature tests for user-space. diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index b60b16cc01d3..7a60ba99451c 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -295,9 +295,8 @@ aes_impl_get_ops(void) /* * Initialize all supported implementations. */ -/* ARGSUSED */ void -aes_impl_init(void *arg) +aes_impl_init(void) { aes_impl_ops_t *curr_impl; int i, c; diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 1fb8e256a52b..24ec2606bfd0 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -703,9 +703,8 @@ gcm_impl_get_ops() /* * Initialize all supported implementations. */ -/* ARGSUSED */ void -gcm_impl_init(void *arg) +gcm_impl_init(void) { gcm_impl_ops_t *curr_impl; int i, c; diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index 329e32a8e661..a0b82ade4559 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -198,7 +198,7 @@ extern const aes_impl_ops_t aes_aesni_impl; /* * Initializes fastest implementation */ -void aes_impl_init(void *arg); +void aes_impl_init(void); /* * Returns optimal allowed AES implementation diff --git a/module/icp/include/modes/gcm_impl.h b/module/icp/include/modes/gcm_impl.h index dff372ef8ba2..28c8f63a7d46 100644 --- a/module/icp/include/modes/gcm_impl.h +++ b/module/icp/include/modes/gcm_impl.h @@ -61,7 +61,7 @@ extern const gcm_impl_ops_t gcm_pclmulqdq_impl; /* * Initializes fastest implementation */ -void gcm_impl_init(void *arg); +void gcm_impl_init(void); /* * Returns optimal allowed GCM implementation diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c index 4b2dbd6e170e..788bcef7d1e2 100644 --- a/module/icp/io/aes.c +++ b/module/icp/io/aes.c @@ -206,35 +206,9 @@ aes_mod_init(void) { int ret; -#if defined(_KERNEL) - /* - * Determine the fastest available implementation. The benchmarks - * are run in dedicated kernel threads to allow Linux 5.0+ kernels - * to use SIMD operations. If for some reason this isn't possible, - * fallback to the generic implementations. See the comment in - * linux/simd_x86.h for additional details. Additionally, this has - * the benefit of allowing them to be run in parallel. - */ - taskqid_t aes_id = taskq_dispatch(system_taskq, aes_impl_init, - NULL, TQ_SLEEP); - taskqid_t gcm_id = taskq_dispatch(system_taskq, gcm_impl_init, - NULL, TQ_SLEEP); - - if (aes_id != TASKQID_INVALID) { - taskq_wait_id(system_taskq, aes_id); - } else { - aes_impl_init(NULL); - } - - if (gcm_id != TASKQID_INVALID) { - taskq_wait_id(system_taskq, gcm_id); - } else { - gcm_impl_init(NULL); - } -#else - aes_impl_init(NULL); - gcm_impl_init(NULL); -#endif + /* Determine the fastest available implementation. */ + aes_impl_init(); + gcm_impl_init(); if ((ret = mod_install(&modlinkage)) != 0) return (ret); diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 2e6280084fd6..8910c109eb48 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -28,7 +28,6 @@ #include #include #include -#include int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); @@ -854,7 +853,6 @@ taskq_thread(void *args) sigfillset(&blocked); sigprocmask(SIG_BLOCK, &blocked, NULL); flush_signals(current); - kfpu_initialize(); tsd_set(taskq_tsd, tq); spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class); diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index 29de9252a48a..0352a31ea835 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -27,7 +27,6 @@ #include #include #include -#include /* * Thread interfaces @@ -55,7 +54,6 @@ thread_generic_wrapper(void *arg) args = tp->tp_args; set_current_state(tp->tp_state); set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri)); - kfpu_initialize(); kmem_free(tp->tp_name, tp->tp_name_size); kmem_free(tp, sizeof (thread_priv_t)); diff --git a/module/os/linux/zfs/zio_crypt.c b/module/os/linux/zfs/zio_crypt.c index 5b4aa664c893..96dabe55a138 100644 --- a/module/os/linux/zfs/zio_crypt.c +++ b/module/os/linux/zfs/zio_crypt.c @@ -25,8 +25,6 @@ #include #include #include -#include -#include #include #include @@ -376,7 +374,7 @@ zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) * plaintext / ciphertext alone. */ static int -zio_do_crypt_uio_impl(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, +zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len) { @@ -476,75 +474,9 @@ zio_do_crypt_uio_impl(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, return (ret); } -typedef struct crypt_uio_arg { - boolean_t cu_encrypt; - uint64_t cu_crypt; - crypto_key_t *cu_key; - crypto_ctx_template_t cu_tmpl; - uint8_t *cu_ivbuf; - uint_t cu_datalen; - uio_t *cu_puio; - uio_t *cu_cuio; - uint8_t *cu_authbuf; - uint_t cu_auth_len; - int cu_error; -} crypt_uio_arg_t; - -static void -zio_do_crypt_uio_func(void *arg) -{ - crypt_uio_arg_t *cu = (crypt_uio_arg_t *)arg; - - cu->cu_error = zio_do_crypt_uio_impl(cu->cu_encrypt, cu->cu_crypt, - cu->cu_key, cu->cu_tmpl, cu->cu_ivbuf, cu->cu_datalen, - cu->cu_puio, cu->cu_cuio, cu->cu_authbuf, cu->cu_auth_len); -} - -static int -zio_do_crypt_uio(spa_t *spa, boolean_t encrypt, uint64_t crypt, - crypto_key_t *key, crypto_ctx_template_t tmpl, uint8_t *ivbuf, - uint_t datalen, uio_t *puio, uio_t *cuio, uint8_t *authbuf, - uint_t auth_len) -{ - int error; - - /* - * Dispatch to the I/O pipeline as required by the context in order - * to take advantage of the SIMD optimization when available. - */ - if (kfpu_allowed()) { - error = zio_do_crypt_uio_impl(encrypt, crypt, key, tmpl, - ivbuf, datalen, puio, cuio, authbuf, auth_len); - } else { - crypt_uio_arg_t *cu; - - cu = kmem_alloc(sizeof (*cu), KM_SLEEP); - cu->cu_encrypt = encrypt; - cu->cu_crypt = crypt; - cu->cu_key = key; - cu->cu_tmpl = tmpl; - cu->cu_ivbuf = ivbuf; - cu->cu_datalen = datalen; - cu->cu_puio = puio; - cu->cu_cuio = cuio; - cu->cu_authbuf = authbuf; - cu->cu_auth_len = auth_len; - cu->cu_error = 0; - - spa_taskq_dispatch_sync(spa, - encrypt ? ZIO_TYPE_WRITE : ZIO_TYPE_READ, - ZIO_TASKQ_ISSUE, zio_do_crypt_uio_func, cu, TQ_SLEEP); - - error = cu->cu_error; - kmem_free(cu, sizeof (*cu)); - } - - return (error); -} - int -zio_crypt_key_wrap(spa_t *spa, crypto_key_t *cwkey, zio_crypt_key_t *key, - uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) +zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) { int ret; uio_t puio, cuio; @@ -601,7 +533,7 @@ zio_crypt_key_wrap(spa_t *spa, crypto_key_t *cwkey, zio_crypt_key_t *key, cuio.uio_segflg = UIO_SYSSPACE; /* encrypt the keys and store the resulting ciphertext and mac */ - ret = zio_do_crypt_uio(spa, B_TRUE, crypt, cwkey, NULL, iv, enc_len, + ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -612,33 +544,12 @@ zio_crypt_key_wrap(spa_t *spa, crypto_key_t *cwkey, zio_crypt_key_t *key, return (ret); } -static void -zio_crypt_create_ctx_templates(void *arg) -{ - zio_crypt_key_t *key = (zio_crypt_key_t *)arg; - crypto_mechanism_t mech; - int ret; - - mech.cm_type = crypto_mech2id( - zio_crypt_table[key->zk_crypt].ci_mechname); - - ret = crypto_create_ctx_template(&mech, &key->zk_current_key, - &key->zk_current_tmpl, KM_SLEEP); - if (ret != CRYPTO_SUCCESS) - key->zk_current_tmpl = NULL; - - mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); - ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, - &key->zk_hmac_tmpl, KM_SLEEP); - if (ret != CRYPTO_SUCCESS) - key->zk_hmac_tmpl = NULL; -} - int -zio_crypt_key_unwrap(spa_t *spa, crypto_key_t *cwkey, uint64_t crypt, - uint64_t version, uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, - uint8_t *iv, uint8_t *mac, zio_crypt_key_t *key) +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key) { + crypto_mechanism_t mech; uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; @@ -685,7 +596,7 @@ zio_crypt_key_unwrap(spa_t *spa, crypto_key_t *cwkey, uint64_t crypt, cuio.uio_segflg = UIO_SYSSPACE; /* decrypt the keys and store the result in the output buffers */ - ret = zio_do_crypt_uio(spa, B_FALSE, crypt, cwkey, NULL, iv, enc_len, + ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -711,18 +622,27 @@ zio_crypt_key_unwrap(spa_t *spa, crypto_key_t *cwkey, uint64_t crypt, key->zk_hmac_key.ck_data = key->zk_hmac_keydata; key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + key->zk_crypt = crypt; key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; - /* - * Initialize the crypto templates in the context they will be - * primarily used. It's ok if this fails, it's just an optimization. - */ - spa_taskq_dispatch_sync(spa, ZIO_TYPE_READ, ZIO_TASKQ_ISSUE, - zio_crypt_create_ctx_templates, key, TQ_SLEEP); - return (0); error: @@ -1941,7 +1861,7 @@ zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, * Primary encryption / decryption entrypoint for zio data. */ int -zio_do_crypt_data(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, +zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, boolean_t *no_crypt) @@ -2028,8 +1948,8 @@ zio_do_crypt_data(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, goto error; /* perform the encryption / decryption in software */ - ret = zio_do_crypt_uio(spa, encrypt, key->zk_crypt, ckey, tmpl, iv, - enc_len, &puio, &cuio, authbuf, auth_len); + ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, + &puio, &cuio, authbuf, auth_len); if (ret != 0) goto error; @@ -2065,10 +1985,9 @@ zio_do_crypt_data(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, * linear buffers. */ int -zio_do_crypt_abd(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, - dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, - uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, - boolean_t *no_crypt) +zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, + boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, + uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) { int ret; void *ptmp, *ctmp; @@ -2081,7 +2000,7 @@ zio_do_crypt_abd(spa_t *spa, boolean_t encrypt, zio_crypt_key_t *key, ctmp = abd_borrow_buf_copy(cabd, datalen); } - ret = zio_do_crypt_data(spa, encrypt, key, ot, byteswap, salt, iv, mac, + ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac, datalen, ptmp, ctmp, no_crypt); if (ret != 0) goto error; diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index c14c95fa2285..9523f4d484c7 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -726,7 +726,7 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) * Initialize and benchmark all supported implementations. */ static void -fletcher_4_benchmark(void *arg) +fletcher_4_benchmark(void) { fletcher_4_ops_t *curr_impl; int i, c; @@ -769,20 +769,10 @@ fletcher_4_benchmark(void *arg) void fletcher_4_init(void) { -#if defined(_KERNEL) - /* - * For 5.0 and latter Linux kernels the fletcher 4 benchmarks are - * run in a kernel threads. This is needed to take advantage of the - * SIMD functionality, see linux/simd_x86.h for details. - */ - taskqid_t id = taskq_dispatch(system_taskq, fletcher_4_benchmark, - NULL, TQ_SLEEP); - if (id != TASKQID_INVALID) { - taskq_wait_id(system_taskq, id); - } else { - fletcher_4_benchmark(NULL); - } + /* Determine the fastest available implementation. */ + fletcher_4_benchmark(); +#if defined(_KERNEL) /* Install kstats for all implementations */ fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); @@ -795,8 +785,6 @@ fletcher_4_init(void) fletcher_4_kstat_addr); kstat_install(fletcher_4_kstat); } -#else - fletcher_4_benchmark(NULL); #endif /* Finish initialization */ diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index c42f046daa07..991f7ced882f 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -865,10 +865,23 @@ zfs_prop_align_right(zfs_prop_t prop) #endif #if defined(_KERNEL) + +#include + +#if defined(HAVE_KERNEL_FPU_INTERNAL) +union fpregs_state **zfs_kfpu_fpregs; +EXPORT_SYMBOL(zfs_kfpu_fpregs); +#endif /* HAVE_KERNEL_FPU_INTERNAL */ + static int __init zcommon_init(void) { + int error = kfpu_init(); + if (error) + return (error); + fletcher_4_init(); + return (0); } @@ -876,6 +889,7 @@ static void __exit zcommon_fini(void) { fletcher_4_fini(); + kfpu_fini(); } module_init(zcommon_init); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index c1ad8785df69..e191263dd907 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -8552,7 +8552,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, if (ret != 0) goto error; - ret = zio_do_crypt_abd(spa, B_TRUE, &dck->dck_key, + ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key, hdr->b_crypt_hdr.b_ot, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv, mac, psize, to_write, eabd, &no_crypt); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 327d3ee91f38..1545af53af70 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -601,8 +601,8 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ - ret = zio_crypt_key_unwrap(mos->os_spa, &wkey->wk_key, crypt, version, - guid, raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, + raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; @@ -1221,7 +1221,6 @@ dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) { zio_crypt_key_t *key = &dck->dck_key; dsl_wrapping_key_t *wkey = dck->dck_wkey; - objset_t *mos = tx->tx_pool->dp_meta_objset; uint8_t keydata[MASTER_KEY_MAX_LEN]; uint8_t hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -1231,13 +1230,14 @@ dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); /* encrypt and store the keys along with the IV and MAC */ - VERIFY0(zio_crypt_key_wrap(mos->os_spa, &dck->dck_wkey->wk_key, key, - iv, mac, keydata, hmac_keydata)); + VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, + keydata, hmac_keydata)); /* update the ZAP with the obtained values */ - dsl_crypto_key_sync_impl(mos, dck->dck_obj, key->zk_crypt, - wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, hmac_keydata, - wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, tx); + dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, + key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, + hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, + tx); } typedef struct spa_keystore_change_key_args { @@ -2846,8 +2846,8 @@ spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, } /* call lower level function to perform encryption / decryption */ - ret = zio_do_crypt_data(spa, encrypt, &dck->dck_key, ot, bswap, salt, - iv, mac, datalen, plainbuf, cipherbuf, no_crypt); + ret = zio_do_crypt_data(encrypt, &dck->dck_key, ot, bswap, salt, iv, + mac, datalen, plainbuf, cipherbuf, no_crypt); /* * Handle injected decryption faults. Unfortunately, we cannot inject diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index 4e5fcbdafc37..c62a6eb5877f 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -445,7 +445,7 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) * Initialize and benchmark all supported implementations. */ static void -benchmark_raidz(void *arg) +benchmark_raidz(void) { raidz_impl_ops_t *curr_impl; int i, c; @@ -515,20 +515,10 @@ benchmark_raidz(void *arg) void vdev_raidz_math_init(void) { -#if defined(_KERNEL) - /* - * For 5.0 and latter Linux kernels the fletcher 4 benchmarks are - * run in a kernel threads. This is needed to take advantage of the - * SIMD functionality, see include/linux/simd_x86.h for details. - */ - taskqid_t id = taskq_dispatch(system_taskq, benchmark_raidz, - NULL, TQ_SLEEP); - if (id != TASKQID_INVALID) { - taskq_wait_id(system_taskq, id); - } else { - benchmark_raidz(NULL); - } + /* Determine the fastest available implementation. */ + benchmark_raidz(); +#if defined(_KERNEL) /* Install kstats for all implementations */ raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); @@ -541,8 +531,6 @@ vdev_raidz_math_init(void) raidz_math_kstat_addr); kstat_install(raidz_math_kstat); } -#else - benchmark_raidz(NULL); #endif /* Finish initialization */