Skip to content

Commit

Permalink
Disable RVV runtime dispatch for clang. Fixes #2227
Browse files Browse the repository at this point in the history
Public Clang <= 18 still appears to require compiler flags for RVV.
Also split HWY_HAVE_RUNTIME_DISPATCH into multiple macros to enable
overriding parts of the logic.

PiperOrigin-RevId: 639667053
  • Loading branch information
jan-wassenberg authored and copybara-github committed Jun 3, 2024
1 parent 457c891 commit 0971a67
Showing 1 changed file with 32 additions and 12 deletions.
44 changes: 32 additions & 12 deletions hwy/detect_targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,23 +596,43 @@
#endif
#endif // HWY_HAVE_AUXV

#ifndef HWY_HAVE_RUNTIME_DISPATCH_RVV // allow override
// GCC 13.3 (but not 14.1) and Clang 16-18 riscv_vector.h require compiler
// flags, see https://github.com/llvm/llvm-project/issues/56592.
#if HWY_ARCH_RISCV && HWY_COMPILER_GCC_ACTUAL >= 1400
#define HWY_HAVE_RUNTIME_DISPATCH_RVV 1
#else
#define HWY_HAVE_RUNTIME_DISPATCH_RVV 0
#endif
#endif // HWY_HAVE_RUNTIME_DISPATCH_RVV

#ifndef HWY_HAVE_RUNTIME_DISPATCH_APPLE // allow override
#if HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
#define HWY_HAVE_RUNTIME_DISPATCH_APPLE 1
#else
#define HWY_HAVE_RUNTIME_DISPATCH_APPLE 0
#endif
#endif // HWY_HAVE_RUNTIME_DISPATCH_APPLE

#ifndef HWY_HAVE_RUNTIME_DISPATCH_LINUX // allow override
#if (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X) && HWY_OS_LINUX && \
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_HAVE_AUXV
#define HWY_HAVE_RUNTIME_DISPATCH_LINUX 1
#else
#define HWY_HAVE_RUNTIME_DISPATCH_LINUX 0
#endif
#endif // HWY_HAVE_RUNTIME_DISPATCH_LINUX

// Allow opting out, and without a guarantee of success, opting-in.
#ifndef HWY_HAVE_RUNTIME_DISPATCH
// Clang, GCC and MSVC allow runtime dispatch on x86.
#if HWY_ARCH_X86
#define HWY_HAVE_RUNTIME_DISPATCH 1
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
// to detect CPU capabilities.
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
HWY_HAVE_AUXV
#define HWY_HAVE_RUNTIME_DISPATCH 1
#elif HWY_ARCH_ARM_A64 && HWY_OS_APPLE && \
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700)
// Clang, GCC and MSVC allow OS-independent runtime dispatch on x86.
#if HWY_ARCH_X86 || HWY_HAVE_RUNTIME_DISPATCH_RVV || \
HWY_HAVE_RUNTIME_DISPATCH_APPLE || HWY_HAVE_RUNTIME_DISPATCH_LINUX
#define HWY_HAVE_RUNTIME_DISPATCH 1
#else
#define HWY_HAVE_RUNTIME_DISPATCH 0
#endif // HWY_ARCH_*
#endif
#endif // HWY_HAVE_RUNTIME_DISPATCH

// AVX3_DL is not widely available yet. To reduce code size and compile time,
Expand Down

0 comments on commit 0971a67

Please sign in to comment.