From bbdf3192331e911fcdfbf6b60644a5b983872187 Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Wed, 10 Apr 2024 22:47:43 -0700 Subject: [PATCH] Re-enable (#3) native support of dynamic dispatch on Clang. Refs #838 PiperOrigin-RevId: 623714267 --- hwy/contrib/math/math-inl.h | 3 ++- hwy/contrib/math/math_test.cc | 8 ++++---- hwy/detect_targets.h | 13 +++++-------- hwy/ops/arm_sve-inl.h | 8 +++++--- hwy/ops/set_macros-inl.h | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hwy/contrib/math/math-inl.h b/hwy/contrib/math/math-inl.h index 10d7c176bf..5687f669a0 100644 --- a/hwy/contrib/math/math-inl.h +++ b/hwy/contrib/math/math-inl.h @@ -331,7 +331,8 @@ HWY_NOINLINE V CallTanh(const D d, VecArg x) { template HWY_INLINE void SinCos(D d, V x, V& s, V& c); template -HWY_NOINLINE V CallSinCos(const D d, VecArg x, VecArg& s, VecArg& c) { +HWY_NOINLINE void CallSinCos(const D d, VecArg x, VecArg& s, + VecArg& c) { SinCos(d, x, s, c); } diff --git a/hwy/contrib/math/math_test.cc b/hwy/contrib/math/math_test.cc index 02eb4abe0f..f2d2d18198 100644 --- a/hwy/contrib/math/math_test.cc +++ b/hwy/contrib/math/math_test.cc @@ -119,7 +119,7 @@ HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T), const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected); max_ulp = HWY_MAX(max_ulp, ulp); if (ulp > max_error_ulp) { - fprintf(stderr, "%s: %s(%f) expected %f actual %f ulp %g max ulp %u\n", + fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n", hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected, actual, static_cast(ulp), static_cast(max_error_ulp)); @@ -181,14 +181,14 @@ constexpr uint64_t ACosh32ULP() { template static Vec SinCosSin(const D d, VecArg> x) { Vec s, c; - SinCos(d, x, s, c); + CallSinCos(d, x, s, c); return s; } template static Vec SinCosCos(const D d, VecArg> x) { Vec s, c; - SinCos(d, x, s, c); + CallSinCos(d, x, s, c); return c; } @@ -390,7 +390,7 @@ struct TestAtan2 { if (!AllTrue(d, ok)) { const size_t mismatch = static_cast(FindKnownFirstTrue(d, Not(ok))); - fprintf(stderr, "Mismatch for i=%d expected %f actual %f\n", + fprintf(stderr, "Mismatch for i=%d expected %E actual %E\n", static_cast(i + mismatch), expected[i + mismatch], ExtractLane(actual, mismatch)); HWY_ASSERT(0); diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h index f3571bcf7a..b75c0eb85f 100644 --- a/hwy/detect_targets.h +++ b/hwy/detect_targets.h @@ -554,14 +554,11 @@ // Clang, GCC and MSVC allow runtime dispatch on x86. #if HWY_ARCH_X86 #define HWY_HAVE_RUNTIME_DISPATCH 1 -// On Arm, PPC, S390X, and RISC-V: GCC and Clang 16+ do, and we require Linux -// to detect CPU capabilities. Currently require opt-in for Clang on Arm -// because it is experimental. -#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \ - (HWY_COMPILER_GCC_ACTUAL || \ - (HWY_COMPILER_CLANG >= 1600 && \ - (!HWY_ARCH_ARM || defined(HWY_ENABLE_CLANG_ARM_DISPATCH)))) && \ - HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H) +// On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux +// to detect CPU capabilities. +#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \ + (HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \ + !defined(TOOLCHAIN_MISS_SYS_AUXV_H) #define HWY_HAVE_RUNTIME_DISPATCH 1 #else #define HWY_HAVE_RUNTIME_DISPATCH 0 diff --git a/hwy/ops/arm_sve-inl.h b/hwy/ops/arm_sve-inl.h index 211c3653c9..64d70c0213 100644 --- a/hwy/ops/arm_sve-inl.h +++ b/hwy/ops/arm_sve-inl.h @@ -3257,13 +3257,15 @@ HWY_API TFromV ExtractLane(V v, size_t i) { } // ------------------------------ InsertLane (IfThenElse) -template -HWY_API V InsertLane(const V v, size_t i, TFromV t) { +template +HWY_API V InsertLane(const V v, size_t i, T t) { + static_assert(sizeof(TFromV) == sizeof(T), "Lane size mismatch"); const DFromV d; const RebindToSigned di; using TI = TFromD; const svbool_t is_i = detail::EqN(Iota(di, 0), static_cast(i)); - return IfThenElse(RebindMask(d, is_i), Set(d, t), v); + return IfThenElse(RebindMask(d, is_i), + Set(d, hwy::ConvertScalarTo>(t)), v); } // ------------------------------ DupEven diff --git a/hwy/ops/set_macros-inl.h b/hwy/ops/set_macros-inl.h index ef172e7a28..104c2ea1f4 100644 --- a/hwy/ops/set_macros-inl.h +++ b/hwy/ops/set_macros-inl.h @@ -416,7 +416,7 @@ #if HWY_TARGET == HWY_NEON_WITHOUT_AES // Do not define HWY_TARGET_STR (no pragma). #else -#define HWY_TARGET_STR "+crypto" +#define HWY_TARGET_STR "+aes" #endif // HWY_TARGET == HWY_NEON_WITHOUT_AES #endif // HWY_ARCH_ARM_V7