forked from openzfs/zfs
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support for vectorized algorithms on x86
This is initial support for x86 vectorized implementations of ZFS parity and checksum algorithms. For the compilation phase, configure step checks if toolchain supports relevant instruction sets. Each implementation must ensure that the code is not passed to compiler if relevant instruction set is not supported. For this purpose, following new defines are provided if instruction set is supported: - HAVE_SSE, - HAVE_SSE2, - HAVE_SSE3, - HAVE_SSSE3, - HAVE_SSE4_1, - HAVE_SSE4_2, - HAVE_AVX, - HAVE_AVX2. For detecting if an instruction set can be used in runtime, following functions are provided in (include/linux/simd_x86.h): - zfs_sse_available() - zfs_sse2_available() - zfs_sse3_available() - zfs_ssse3_available() - zfs_sse4_1_available() - zfs_sse4_2_available() - zfs_avx_available() - zfs_avx2_available() - zfs_bmi1_available() - zfs_bmi2_available() These function should be called once, on module load, or initialization. They are safe to use from user and kernel space. If an implementation is using more than single instruction set, both compiler and runtime support for all relevant instruction sets should be checked. Kernel fpu methods: - kfpu_begin() - kfpu_end() Use __get_cpuid_max and __cpuid_count from <cpuid.h> Both gcc and clang have support for these. They also handle ebx register in case it is used for PIC code.
- Loading branch information
Showing
6 changed files
with
583 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
dnl # | ||
dnl # 4.2 API change | ||
dnl # asm/i387.h is replaced by asm/fpu/api.h | ||
dnl # | ||
AC_DEFUN([ZFS_AC_KERNEL_FPU], [ | ||
AC_MSG_CHECKING([whether asm/fpu/api.h exists]) | ||
ZFS_LINUX_TRY_COMPILE([ | ||
#include <linux/kernel.h> | ||
#include <asm/fpu/api.h> | ||
],[ | ||
__kernel_fpu_begin(); | ||
],[ | ||
AC_MSG_RESULT(yes) | ||
AC_DEFINE(HAVE_FPU_API_H, 1, [kernel has <asm/fpu/api.h> interface]) | ||
],[ | ||
AC_MSG_RESULT(no) | ||
]) | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
dnl # | ||
dnl # Checks if host toolchain supports SIMD instructions | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ | ||
case "$host_cpu" in | ||
x86_64 | x86 | i686) | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2 | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3 | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3 | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1 | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2 | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX | ||
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2 | ||
;; | ||
esac | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSE]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("xorps %xmm0, %xmm1"); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSE2]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("pxor %xmm0, %xmm1"); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSE3]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
char v[16]; | ||
__asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0])); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSSE3]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("pshufb %xmm0,%xmm1"); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSE4.1]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("pmaxsb %xmm0,%xmm1"); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [ | ||
AC_MSG_CHECKING([whether host toolchain supports SSE4.2]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("pcmpgtq %xmm0, %xmm1"); | ||
} | ||
]])], [ | ||
AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2]) | ||
AC_MSG_RESULT([yes]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [ | ||
AC_MSG_CHECKING([whether host toolchain supports AVX]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([[ | ||
void main() | ||
{ | ||
char v[32]; | ||
__asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0])); | ||
} | ||
]])], [ | ||
AC_MSG_RESULT([yes]) | ||
AC_DEFINE([HAVE_AVX], 1, [Define if host toolchain supports AVX]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) | ||
|
||
dnl # | ||
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2 | ||
dnl # | ||
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [ | ||
AC_MSG_CHECKING([whether host toolchain supports AVX2]) | ||
AC_LINK_IFELSE([AC_LANG_SOURCE([ | ||
[ | ||
void main() | ||
{ | ||
__asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2"); | ||
} | ||
]])], [ | ||
AC_MSG_RESULT([yes]) | ||
AC_DEFINE([HAVE_AVX2], 1, [Define if host toolchain supports AVX2]) | ||
], [ | ||
AC_MSG_RESULT([no]) | ||
]) | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.