Skip to content

Commit

Permalink
Re-enable (#3) native support of dynamic dispatch on Clang. Refs #838
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 623382838
  • Loading branch information
jan-wassenberg authored and copybara-github committed Apr 10, 2024
1 parent 701c328 commit a0ecd4e
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 17 deletions.
3 changes: 2 additions & 1 deletion hwy/contrib/math/math-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,8 @@ HWY_NOINLINE V CallTanh(const D d, VecArg<V> x) {
template <class D, class V>
HWY_INLINE void SinCos(D d, V x, V& s, V& c);
template <class D, class V>
HWY_NOINLINE V CallSinCos(const D d, VecArg<V> x, VecArg<V>& s, VecArg<V>& c) {
HWY_NOINLINE void CallSinCos(const D d, VecArg<V> x, VecArg<V>& s,
VecArg<V>& c) {
SinCos(d, x, s, c);
}

Expand Down
8 changes: 4 additions & 4 deletions hwy/contrib/math/math_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T),
const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected);
max_ulp = HWY_MAX(max_ulp, ulp);
if (ulp > max_error_ulp) {
fprintf(stderr, "%s: %s(%f) expected %f actual %f ulp %g max ulp %u\n",
fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n",
hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected,
actual, static_cast<double>(ulp),
static_cast<uint32_t>(max_error_ulp));
Expand Down Expand Up @@ -181,14 +181,14 @@ constexpr uint64_t ACosh32ULP() {
template <class D>
static Vec<D> SinCosSin(const D d, VecArg<Vec<D>> x) {
Vec<D> s, c;
SinCos(d, x, s, c);
CallSinCos(d, x, s, c);
return s;
}

template <class D>
static Vec<D> SinCosCos(const D d, VecArg<Vec<D>> x) {
Vec<D> s, c;
SinCos(d, x, s, c);
CallSinCos(d, x, s, c);
return c;
}

Expand Down Expand Up @@ -390,7 +390,7 @@ struct TestAtan2 {
if (!AllTrue(d, ok)) {
const size_t mismatch =
static_cast<size_t>(FindKnownFirstTrue(d, Not(ok)));
fprintf(stderr, "Mismatch for i=%d expected %f actual %f\n",
fprintf(stderr, "Mismatch for i=%d expected %E actual %E\n",
static_cast<int>(i + mismatch), expected[i + mismatch],
ExtractLane(actual, mismatch));
HWY_ASSERT(0);
Expand Down
13 changes: 5 additions & 8 deletions hwy/detect_targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,14 +553,11 @@
// Clang, GCC and MSVC allow runtime dispatch on x86.
#if HWY_ARCH_X86
#define HWY_HAVE_RUNTIME_DISPATCH 1
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 16+ do, and we require Linux
// to detect CPU capabilities. Currently require opt-in for Clang on Arm
// because it is experimental.
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
(HWY_COMPILER_GCC_ACTUAL || \
(HWY_COMPILER_CLANG >= 1600 && \
(!HWY_ARCH_ARM || defined(HWY_ENABLE_CLANG_ARM_DISPATCH)))) && \
HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 17+ do, and we require Linux
// to detect CPU capabilities.
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RVV) && \
(HWY_COMPILER_GCC_ACTUAL || HWY_COMPILER_CLANG >= 1700) && HWY_OS_LINUX && \
!defined(TOOLCHAIN_MISS_SYS_AUXV_H)
#define HWY_HAVE_RUNTIME_DISPATCH 1
#else
#define HWY_HAVE_RUNTIME_DISPATCH 0
Expand Down
8 changes: 5 additions & 3 deletions hwy/ops/arm_sve-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3257,13 +3257,15 @@ HWY_API TFromV<V> ExtractLane(V v, size_t i) {
}

// ------------------------------ InsertLane (IfThenElse)
template <class V>
HWY_API V InsertLane(const V v, size_t i, TFromV<V> t) {
template <class V, typename T>
HWY_API V InsertLane(const V v, size_t i, T t) {
static_assert(sizeof(TFromV<V>) == sizeof(T), "Lane size mismatch");
const DFromV<V> d;
const RebindToSigned<decltype(d)> di;
using TI = TFromD<decltype(di)>;
const svbool_t is_i = detail::EqN(Iota(di, 0), static_cast<TI>(i));
return IfThenElse(RebindMask(d, is_i), Set(d, t), v);
return IfThenElse(RebindMask(d, is_i),
Set(d, hwy::ConvertScalarTo<TFromV<V>>(t)), v);
}

// ------------------------------ DupEven
Expand Down
2 changes: 1 addition & 1 deletion hwy/ops/set_macros-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@
#if HWY_TARGET == HWY_NEON_WITHOUT_AES
// Do not define HWY_TARGET_STR (no pragma).
#else
#define HWY_TARGET_STR "+crypto"
#define HWY_TARGET_STR "+aes"
#endif // HWY_TARGET == HWY_NEON_WITHOUT_AES

#endif // HWY_ARCH_ARM_V7
Expand Down

0 comments on commit a0ecd4e

Please sign in to comment.