Skip to content

Commit

Permalink
Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
Browse files Browse the repository at this point in the history
Contrary to initial testing we cannot rely on these kernels to
invalidate the per-cpu FPU state and restore the FPU registers.
Nor can we guarantee that the kernel won't modify the FPU state
which we saved in the task struck.

Therefore, the kfpu_begin() and kfpu_end() functions have been
updated to save and restore the FPU state using our own dedicated
per-cpu FPU state variables.

This has the additional advantage of allowing us to use the FPU
again in user threads.  So we remove the code which was added to
use task queues to ensure some functions ran in kernel threads.

Signed-off-by: Brian Behlendorf <[email protected]>
Issue openzfs#9346
  • Loading branch information
behlendorf committed Oct 10, 2019
1 parent f3dc4a8 commit f95fa71
Show file tree
Hide file tree
Showing 19 changed files with 271 additions and 294 deletions.
77 changes: 38 additions & 39 deletions config/kernel-fpu.m4
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,9 @@ dnl #
dnl # Handle differences in kernel FPU code.
dnl #
dnl # Kernel
dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD.
dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD
dnl #
dnl # 5.0: As an optimization SIMD operations performed by kernel
dnl # threads can skip saving and restoring their FPU context.
dnl # Wrappers have been introduced to determine the running
dnl # context and use either the SIMD or generic implementation.
dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
dnl # HAVE_KERNEL_FPU_INITIALIZED
dnl # HAVE_KERNEL_FPU_INTERNAL
dnl #
dnl # 4.2: Use __kernel_fpu_{begin,end}()
dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
Expand Down Expand Up @@ -61,22 +55,40 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
__kernel_fpu_end();
], [], [$ZFS_META_LICENSE])
ZFS_LINUX_TEST_SRC([fpu_initialized], [
#include <linux/module.h>
#include <linux/sched.h>
],[
struct fpu *fpu = &current->thread.fpu;
if (fpu->initialized) { return (0); };
])
ZFS_LINUX_TEST_SRC([fpu_internal], [
#if defined(__x86_64) || defined(__x86_64__) || \
defined(__i386) || defined(__i386__)
#if !defined(__x86)
#define __x86
#endif
#endif
ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [
#include <linux/module.h>
#include <asm/thread_info.h>
#if !defined(__x86)
#error Unsupported architecture
#endif
#if !defined(TIF_NEED_FPU_LOAD)
#error "TIF_NEED_FPU_LOAD undefined"
#ifdef HAVE_KERNEL_FPU_API_HEADER
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
#else
#include <asm/i387.h>
#include <asm/xcr.h>
#endif
#if !defined(XSTATE_XSAVE)
#error XSTATE_XSAVE not defined
#endif
],[])
#if !defined(XSTATE_XRESTORE)
#error XSTATE_XRESTORE not defined
#endif
],[
struct fpu *fpu = &current->thread.fpu;
union fpregs_state *st = &fpu->state;
struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
])
])

AC_DEFUN([ZFS_AC_KERNEL_FPU], [
Expand Down Expand Up @@ -104,25 +116,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
[kernel exports FPU functions])
],[
dnl #
dnl # Linux 5.0 kernel
dnl #
ZFS_LINUX_TEST_RESULT([fpu_initialized], [
AC_MSG_RESULT(fpu.initialized)
AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1,
[kernel fpu.initialized exists])
ZFS_LINUX_TEST_RESULT([fpu_internal], [
AC_MSG_RESULT(internal)
AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
[kernel fpu internal])
],[
dnl #
dnl # Linux 5.2 kernel
dnl #
ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [
AC_MSG_RESULT(TIF_NEED_FPU_LOAD)
AC_DEFINE(
HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1,
[kernel TIF_NEED_FPU_LOAD exists])
],[
AC_MSG_RESULT(unavailable)
])
AC_MSG_RESULT(unavailable)
])
])
])
Expand Down
3 changes: 2 additions & 1 deletion include/os/linux/kernel/linux/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@
#else

#define kfpu_allowed() 0
#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)
#define kfpu_init() 0
#define kfpu_fini() ((void) 0)

#endif
#endif /* _LINUX_SIMD_H */
6 changes: 4 additions & 2 deletions include/os/linux/kernel/linux/simd_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
*
* Kernel fpu methods:
* kfpu_allowed()
* kfpu_initialize()
* kfpu_begin()
* kfpu_end()
* kfpu_init()
* kfpu_fini()
*/

#ifndef _LINUX_SIMD_AARCH64_H
Expand All @@ -43,9 +44,10 @@
#include <asm/neon.h>

#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() kernel_neon_begin()
#define kfpu_end() kernel_neon_end()
#define kfpu_init() 0
#define kfpu_fini() ((void) 0)

#endif /* __aarch64__ */

Expand Down
195 changes: 146 additions & 49 deletions include/os/linux/kernel/linux/simd_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
*
* Kernel fpu methods:
* kfpu_allowed()
* kfpu_initialize()
* kfpu_begin()
* kfpu_end()
* kfpu_init()
* kfpu_fini()
*
* SIMD support:
*
Expand Down Expand Up @@ -99,7 +100,8 @@
#if defined(KERNEL_EXPORTS_X86_FPU)

#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_init() 0
#define kfpu_fini() ((void) 0)

#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
#define kfpu_begin() \
Expand All @@ -126,96 +128,191 @@
#endif

#else /* defined(KERNEL_EXPORTS_X86_FPU) */

/*
* When the kernel_fpu_* symbols are unavailable then provide our own
* versions which allow the FPU to be safely used in kernel threads.
* In practice, this is not a significant restriction for ZFS since the
* vast majority of SIMD operations are performed by the IO pipeline.
* versions which allow the FPU to be safely used.
*/
#if defined(HAVE_KERNEL_FPU_INTERNAL)

extern union fpregs_state **zfs_kfpu_fpregs;

/*
* Returns non-zero if FPU operations are allowed in the current context.
* Initialize per-cpu variables to store FPU state.
*/
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
test_thread_flag(TIF_NEED_FPU_LOAD))
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
current->thread.fpu.initialized)
#else
#define kfpu_allowed() 0
#endif
static inline void
kfpu_fini(void)
{
int cpu;

for_each_possible_cpu(cpu) {
if (zfs_kfpu_fpregs[cpu] != NULL) {
kfree(zfs_kfpu_fpregs[cpu]);
}
}

kfree(zfs_kfpu_fpregs);
}

static inline int
kfpu_init(void)
{
int cpu;

zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
sizeof (union fpregs_state *), GFP_KERNEL);
if (zfs_kfpu_fpregs == NULL)
return (-ENOMEM);

for_each_possible_cpu(cpu) {
zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (union fpregs_state),
GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
if (zfs_kfpu_fpregs[cpu] == NULL) {
kfpu_fini();
return (-ENOMEM);
}
}

return (0);
}

#define kfpu_allowed() 1
#define ex_handler_fprestore ex_handler_default

/*
* FPU save and restore instructions.
*/
#define __asm __asm__ __volatile__
#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
: : [addr] "m" (rval));

static inline void
kfpu_initialize(void)
kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
{
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
uint32_t low, hi;
int err;

#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
__fpu_invalidate_fpregs_state(&current->thread.fpu);
set_thread_flag(TIF_NEED_FPU_LOAD);
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
__fpu_invalidate_fpregs_state(&current->thread.fpu);
current->thread.fpu.initialized = 1;
#endif
low = mask;
hi = mask >> 32;
XSTATE_XSAVE(addr, low, hi, err);
WARN_ON_ONCE(err);
}

static inline void
kfpu_begin(void)
kfpu_save_fxsr(struct fxregs_state *addr)
{
WARN_ON_ONCE(!kfpu_allowed());
if (IS_ENABLED(CONFIG_X86_32))
kfpu_fxsave(addr);
else
kfpu_fxsaveq(addr);
}

static inline void
kfpu_save_fsave(struct fregs_state *addr)
{
kfpu_fnsave(addr);
}

static inline void
kfpu_begin(void)
{
/*
* Preemption and interrupts must be disabled for the critical
* region where the FPU state is being modified.
*/
preempt_disable();
local_irq_disable();

#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
/*
* The current FPU registers need to be preserved by kfpu_begin()
* and restored by kfpu_end(). This is required because we can
* not call __cpu_invalidate_fpregs_state() to invalidate the
* per-cpu FPU state and force them to be restored during a
* context switch.
* and restored by kfpu_end(). They are stored in a dedicated
* per-cpu variable, not in the task struct, this allows any user
* FPU state to be correctly preserved and restored.
*/
copy_fpregs_to_fpstate(&current->thread.fpu);
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];

if (static_cpu_has(X86_FEATURE_XSAVE)) {
kfpu_save_xsave(&state->xsave, ~0);
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
kfpu_save_fxsr(&state->fxsave);
} else {
kfpu_save_fsave(&state->fsave);
}
}

static inline void
kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
{
uint32_t low, hi;

low = mask;
hi = mask >> 32;
XSTATE_XRESTORE(addr, low, hi);
}

static inline void
kfpu_restore_fxsr(struct fxregs_state *addr)
{
/*
* There is no need to preserve and restore the FPU registers.
* They will always be restored from the task's stored FPU state
* when switching contexts.
* On AuthenticAMD K7 and K8 processors the fxrstor instruction only
* restores the _x87 FOP, FIP, and FDP registers when an exception
* is pending. Clean the _x87 state to force the restore.
*/
WARN_ON_ONCE(current->thread.fpu.initialized == 0);
#endif
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
kfpu_fxsr_clean(addr);

if (IS_ENABLED(CONFIG_X86_32)) {
kfpu_fxrstor(addr);
} else {
kfpu_fxrstorq(addr);
}
}

static inline void
kfpu_restore_fsave(struct fregs_state *addr)
{
kfpu_frstor(addr);
}

static inline void
kfpu_end(void)
{
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
union fpregs_state *state = &current->thread.fpu.state;
int error;
union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()];

if (use_xsave()) {
error = copy_kernel_to_xregs_err(&state->xsave, -1);
} else if (use_fxsr()) {
error = copy_kernel_to_fxregs_err(&state->fxsave);
if (static_cpu_has(X86_FEATURE_XSAVE)) {
kfpu_restore_xsave(&state->xsave, ~0);
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
kfpu_restore_fxsr(&state->fxsave);
} else {
error = copy_kernel_to_fregs_err(&state->fsave);
kfpu_restore_fsave(&state->fsave);
}
WARN_ON_ONCE(error);
#endif

local_irq_enable();
preempt_enable();
}
#endif /* defined(HAVE_KERNEL_FPU) */

#else

/*
* FPU support is unavailable.
*/
#define kfpu_allowed() 0
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)

#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
#endif /* defined(KERNEL_EXPORTS_X86_FPU) */

/*
* Linux kernel provides an interface for CPU feature testing.
*/

/*
* Detect register set support
*/
Expand Down
Loading

0 comments on commit f95fa71

Please sign in to comment.