diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 new file mode 100644 index 000000000000..1c5690969d48 --- /dev/null +++ b/config/kernel-fpu.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # 4.2 API change +dnl # asm/i387.h is replaced by asm/fpu/api.h +dnl # +AC_DEFUN([ZFS_AC_KERNEL_FPU], [ + AC_MSG_CHECKING([whether asm/fpu/api.h exists]) + ZFS_LINUX_TRY_COMPILE([ + #include + #include + ],[ + __kernel_fpu_begin(); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FPU_API_H, 1, [kernel has interface]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9a52baed7335..c7996f5012d5 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -1,5 +1,5 @@ dnl # -dnl # Default ZFS kernel configuration +dnl # Default ZFS kernel configuration dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL @@ -91,6 +91,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_FOLLOW_DOWN_ONE ZFS_AC_KERNEL_MAKE_REQUEST_FN ZFS_AC_KERNEL_GENERIC_IO_ACCT + ZFS_AC_KERNEL_FPU AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ" diff --git a/config/toolchain-simd.m4 b/config/toolchain-simd.m4 new file mode 100644 index 000000000000..0f8c1f2d9fe8 --- /dev/null +++ b/config/toolchain-simd.m4 @@ -0,0 +1,172 @@ +dnl # +dnl # Checks if host toolchain supports SIMD instructions +dnl # +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ + case "$host_cpu" in + x86_64 | x86 | i686) + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2 + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3 + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3 + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1 + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2 + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2 + ;; + esac +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [ + AC_MSG_CHECKING([whether host toolchain supports SSE]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + __asm__ __volatile__("xorps %xmm0, %xmm1"); + } + ]])], [ + AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [ + AC_MSG_CHECKING([whether host toolchain supports SSE2]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + __asm__ __volatile__("pxor %xmm0, %xmm1"); + } + ]])], [ + AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [ + AC_MSG_CHECKING([whether host toolchain supports SSE3]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + char v[16]; + __asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0])); + } + ]])], [ + AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [ + AC_MSG_CHECKING([whether host toolchain supports SSSE3]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + __asm__ __volatile__("pshufb %xmm0,%xmm1"); + } + ]])], [ + AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [ + AC_MSG_CHECKING([whether host toolchain supports SSE4.1]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + __asm__ __volatile__("pmaxsb %xmm0,%xmm1"); + } + ]])], [ + AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [ + AC_MSG_CHECKING([whether host toolchain supports SSE4.2]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + __asm__ __volatile__("pcmpgtq %xmm0, %xmm1"); + } + ]])], [ + AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [ + AC_MSG_CHECKING([whether host toolchain supports AVX]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([[ + void main() + { + char v[32]; + __asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0])); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_AVX], 1, [Define if host toolchain supports AVX]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2 +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [ + AC_MSG_CHECKING([whether host toolchain supports AVX2]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + __asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_AVX2], 1, [Define if host toolchain supports AVX2]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index f93c5b5d4cca..a8bccf4af66c 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -63,6 +63,7 @@ AC_DEFUN([ZFS_AC_DEBUG_DMU_TX], [ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [ ZFS_AC_CONFIG_ALWAYS_NO_UNUSED_BUT_SET_VARIABLE ZFS_AC_CONFIG_ALWAYS_NO_BOOL_COMPARE + ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD ]) AC_DEFUN([ZFS_AC_CONFIG], [ diff --git a/include/linux/Makefile.am b/include/linux/Makefile.am index 595d1db01128..30f726892099 100644 --- a/include/linux/Makefile.am +++ b/include/linux/Makefile.am @@ -6,7 +6,8 @@ KERNEL_H = \ $(top_srcdir)/include/linux/vfs_compat.h \ $(top_srcdir)/include/linux/blkdev_compat.h \ $(top_srcdir)/include/linux/utsname_compat.h \ - $(top_srcdir)/include/linux/kmap_compat.h + $(top_srcdir)/include/linux/kmap_compat.h \ + $(top_srcdir)/include/linux/simd_x86.h USER_H = diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h new file mode 100644 index 000000000000..6aa51144c546 --- /dev/null +++ b/include/linux/simd_x86.h @@ -0,0 +1,388 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2016 Gvozden Neskovic . + */ + +/* + * USER API: + * + * Kernel fpu methods: + * kfpu_begin() + * kfpu_end() + * + * SIMD support: + * + * Following functions should be called to determine whether CPU feature + * is supported. All functions are usable in kernel and user space. + * If a SIMD algorithm is using more than one instruction set + * all relevant feature test functions should be called. + * + * Supported features: + * zfs_sse_available() + * zfs_sse2_available() + * zfs_sse3_available() + * zfs_ssse3_available() + * zfs_sse4_1_available() + * zfs_sse4_2_available() + * zfs_avx_available() + * zfs_avx2_available() + * zfs_bmi1_available() + * zfs_bmi2_available() + */ + +#ifndef _SIMD_X86_H +#define _SIMD_X86_H + +#include + +/* only for __x86 */ +#if defined(__x86) + +#include + +#if defined(_KERNEL) +#include +#else +#include +#endif + +#if defined(_KERNEL) +#if defined(HAVE_FPU_API_H) +#include +#include +#define kfpu_begin() \ +{ \ + preempt_disable(); \ + __kernel_fpu_begin(); \ +} +#define kfpu_end() \ +{ \ + __kernel_fpu_end(); \ + preempt_enable(); \ +} +#else +#include +#include +#define kfpu_begin() kernel_fpu_begin() +#define kfpu_end() kernel_fpu_end() +#endif /* defined(HAVE_FPU_API_H) */ +#else +/* + * fpu dummy methods for userspace + */ +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#endif /* defined(_KERNEL) */ + +/* + * CPUID feature tests for user-space. Linux kernel provides an interface for + * CPU feature testing. + */ +#if !defined(_KERNEL) + +/* + * x86 registers used implicitly by CPUID + */ +typedef enum cpuid_regs { + EAX = 0, + EBX, + ECX, + EDX, + CPUID_REG_CNT = 4 +} cpuid_regs_t; + +/* + * List of instruction sets identified by CPUID + */ +typedef enum cpuid_inst_sets { + SSE = 0, + SSE2, + SSE3, + SSSE3, + SSE4_1, + SSE4_2, + OSXSAVE, + AVX, + AVX2, + BMI1, + BMI2 +} cpuid_inst_sets_t; + +/* + * Instruction set descriptor. + */ +typedef struct cpuid_feature_desc { + uint32_t leaf; /* CPUID leaf */ + uint32_t subleaf; /* CPUID subleaf */ + uint32_t flag; /* bit mask of the feature */ + cpuid_regs_t reg; /* which CPUID return register to test */ +} cpuid_feature_desc_t; + +/* + * Descriptions of supported instruction sets + */ +static const cpuid_feature_desc_t cpuid_features[] = { + [SSE] = {1U, 0U, 1U << 25, EDX }, + [SSE2] = {1U, 0U, 1U << 26, EDX }, + [SSE3] = {1U, 0U, 1U << 0, ECX }, + [SSSE3] = {1U, 0U, 1U << 9, ECX }, + [SSE4_1] = {1U, 0U, 1U << 19, ECX }, + [SSE4_2] = {1U, 0U, 1U << 20, ECX }, + [OSXSAVE] = {1U, 0U, 1U << 27, ECX }, + [AVX] = {1U, 0U, 1U << 28, ECX }, + [AVX2] = {7U, 0U, 1U << 5, EBX }, + [BMI1] = {7U, 0U, 1U << 3, EBX }, + [BMI2] = {7U, 0U, 1U << 8, EBX } +}; + +/* + * Check if OS supports AVX and AVX2 by checking XCR0 + * Only call this function if CPUID indicates that AVX feature is + * supported by the CPU, otherwise it might be an illegal instruction. + */ +static inline uint64_t +xgetbv(uint32_t index) +{ + uint32_t eax, edx; + /* xgetbv - instruction byte code */ + __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index)); + + return ((((uint64_t)edx)<<32) | (uint64_t)eax); +} + +/* + * Check if CPU supports a feature + */ +static inline boolean_t +__cpuid_check_feature(const cpuid_feature_desc_t *desc) +{ + uint32_t r[CPUID_REG_CNT]; + + if (__get_cpuid_max(0, NULL) >= desc->leaf) { + /* + * __cpuid_count is needed to properly check + * for AVX2. It is a macro, so return parameters + * are passed by value. + */ + __cpuid_count(desc->leaf, desc->subleaf, + r[EAX], r[EBX], r[ECX], r[EDX]); + return (!!(r[desc->reg] & desc->flag)); + } + return (B_FALSE); +} + +#define CPUID_FEATURE_CHECK(name, id) \ +static inline boolean_t \ +__cpuid_has_ ## name(void)\ +{ \ + return (__cpuid_check_feature(&cpuid_features[id])); \ +} + +/* + * Define functions for user-space CPUID features testing + */ +CPUID_FEATURE_CHECK(sse, SSE); +CPUID_FEATURE_CHECK(sse2, SSE2); +CPUID_FEATURE_CHECK(sse3, SSE3); +CPUID_FEATURE_CHECK(ssse3, SSSE3); +CPUID_FEATURE_CHECK(sse4_1, SSE4_1); +CPUID_FEATURE_CHECK(sse4_2, SSE4_2); +CPUID_FEATURE_CHECK(avx, AVX); +CPUID_FEATURE_CHECK(avx2, AVX2); +CPUID_FEATURE_CHECK(osxsave, OSXSAVE); +CPUID_FEATURE_CHECK(bmi1, BMI1); +CPUID_FEATURE_CHECK(bmi2, BMI2); + +#endif /* !defined(_KERNEL) */ + +/* + * Detect ymm register set support + */ +static inline boolean_t +__ymm_enabled(void) +{ + static const uint64_t XSTATE_SSE_AVX = 0x2 | 0x4; + boolean_t has_osxsave; + uint64_t xcr0; + +#if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE) + has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE); +#elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE) + has_osxsave = B_FALSE; +#else + has_osxsave = __cpuid_has_osxsave(); +#endif + + if (!has_osxsave) + return (B_FALSE); + + xcr0 = xgetbv(0); + return ((xcr0 & XSTATE_SSE_AVX) == XSTATE_SSE_AVX); +} + +/* + * Check if SSE instruction set is available + */ +static inline boolean_t +zfs_sse_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM)); +#else + return (__cpuid_has_sse()); +#endif +} + +/* + * Check if SSE2 instruction set is available + */ +static inline boolean_t +zfs_sse2_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM2)); +#else + return (__cpuid_has_sse2()); +#endif +} + +/* + * Check if SSE3 instruction set is available + */ +static inline boolean_t +zfs_sse3_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM3)); +#else + return (__cpuid_has_sse3()); +#endif +} + +/* + * Check if SSSE3 instruction set is available + */ +static inline boolean_t +zfs_ssse3_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_SSSE3)); +#else + return (__cpuid_has_ssse3()); +#endif +} + +/* + * Check if SSE4.1 instruction set is available + */ +static inline boolean_t +zfs_sse4_1_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM4_1)); +#else + return (__cpuid_has_sse4_1()); +#endif +} + +/* + * Check if SSE4.2 instruction set is available + */ +static inline boolean_t +zfs_sse4_2_available(void) +{ +#if defined(_KERNEL) + return (!!boot_cpu_has(X86_FEATURE_XMM4_2)); +#else + return (__cpuid_has_sse4_2()); +#endif +} + +/* + * Check if AVX instruction set is available + */ +static inline boolean_t +zfs_avx_available(void) +{ + boolean_t has_avx; +#if defined(_KERNEL) + has_avx = !!boot_cpu_has(X86_FEATURE_AVX); +#else + has_avx = __cpuid_has_avx(); +#endif + + return (has_avx && __ymm_enabled()); +} + +/* + * Check if AVX2 instruction set is available + */ +static inline boolean_t +zfs_avx2_available(void) +{ + boolean_t has_avx2; +#if defined(_KERNEL) && defined(X86_FEATURE_AVX2) + has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2); +#elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2) + has_avx2 = B_FALSE; +#else + has_avx2 = __cpuid_has_avx2(); +#endif + + return (has_avx2 && __ymm_enabled()); +} + +/* + * Check if BMI1 instruction set is available + */ +static inline boolean_t +zfs_bmi1_available(void) +{ +#if defined(_KERNEL) && defined(X86_FEATURE_BMI1) + return (!!boot_cpu_has(X86_FEATURE_BMI1)); +#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1) + return (B_FALSE); +#else + return (__cpuid_has_bmi1()); +#endif +} + +/* + * Check if BMI2 instruction set is available + */ +static inline boolean_t +zfs_bmi2_available(void) +{ +#if defined(_KERNEL) && defined(X86_FEATURE_BMI2) + return (!!boot_cpu_has(X86_FEATURE_BMI2)); +#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2) + return (B_FALSE); +#else + return (__cpuid_has_bmi2()); +#endif +} + +#endif /* defined(__x86) */ + +#endif /* _SIMD_X86_H */