From fcf710171a4fa3dc226e2ff68b504b076cc2e6c3 Mon Sep 17 00:00:00 2001 From: Lu Yahan Date: Wed, 10 Apr 2024 14:27:34 +0800 Subject: [PATCH] Replace HWY_ARCH_RVV with HWY_ARCH_RISCV --- hwy/abort.cc | 2 +- hwy/aligned_allocator.cc | 2 +- hwy/base.h | 6 +++--- hwy/contrib/sort/vqsort.cc | 2 +- hwy/detect_compiler_arch.h | 8 +++++--- hwy/detect_targets.h | 4 ++-- hwy/foreach_target.h | 2 +- hwy/ops/emu128-inl.h | 2 +- hwy/targets.cc | 6 +++--- hwy/targets.h | 10 +++++----- hwy/tests/convert_test.cc | 8 ++++---- hwy/tests/test_util-inl.h | 4 ++-- hwy/timer-inl.h | 2 +- hwy/timer.cc | 2 +- 14 files changed, 31 insertions(+), 29 deletions(-) diff --git a/hwy/abort.cc b/hwy/abort.cc index 972aba3505..ce4d5016e5 100644 --- a/hwy/abort.cc +++ b/hwy/abort.cc @@ -60,7 +60,7 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) fflush(stderr); // Now terminate the program: -#if HWY_ARCH_RVV +#if HWY_ARCH_RISCV exit(1); // trap/abort just freeze Spike. #elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC // Facilitates breaking into a debugger, but don't use this in non-debug diff --git a/hwy/aligned_allocator.cc b/hwy/aligned_allocator.cc index 1b87e50ed1..b88a64e24e 100644 --- a/hwy/aligned_allocator.cc +++ b/hwy/aligned_allocator.cc @@ -27,7 +27,7 @@ namespace hwy { namespace { -#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 +#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 // Not actually an upper bound on the size, but this value prevents crossing a // 4K boundary (relevant on Andes). constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096); diff --git a/hwy/base.h b/hwy/base.h index 0e6f8624a9..2d1802be6b 100644 --- a/hwy/base.h +++ b/hwy/base.h @@ -370,7 +370,7 @@ HWY_API void ZeroBytes(void* to, size_t num_bytes) { #if HWY_ARCH_X86 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64; // AVX-512 -#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \ +#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \ __riscv_v_intrinsic >= 11000 // Not actually an upper bound on the size. static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096; @@ -386,7 +386,7 @@ static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16; // exceed the stack size. #if HWY_ARCH_X86 #define HWY_ALIGN_MAX alignas(64) -#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \ +#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \ __riscv_v_intrinsic >= 11000 #define HWY_ALIGN_MAX alignas(8) // only elements need be aligned #else @@ -1046,7 +1046,7 @@ HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) { // RVV with f16 extension supports _Float16 and f16 vector ops. If set, implies // HWY_HAVE_FLOAT16. -#if HWY_ARCH_RVV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600 +#if HWY_ARCH_RISCV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600 #define HWY_RVV_HAVE_F16_VEC 1 #else #define HWY_RVV_HAVE_F16_VEC 0 diff --git a/hwy/contrib/sort/vqsort.cc b/hwy/contrib/sort/vqsort.cc index da8ca64b39..6fab0bbbea 100644 --- a/hwy/contrib/sort/vqsort.cc +++ b/hwy/contrib/sort/vqsort.cc @@ -23,7 +23,7 @@ // unavailable on Android and non-Linux RVV, we assume that those systems lack // getrandom. Note that the only supported sources of entropy are getrandom or // Windows, thus VQSORT_SECURE_SEED=0 when this is 0 and we are not on Windows. -#if defined(ANDROID) || defined(__ANDROID__) || (HWY_ARCH_RVV && !HWY_OS_LINUX) +#if defined(ANDROID) || defined(__ANDROID__) || (HWY_ARCH_RISCV && !HWY_OS_LINUX) #define VQSORT_GETRANDOM 0 #endif diff --git a/hwy/detect_compiler_arch.h b/hwy/detect_compiler_arch.h index 98545d3d3e..5e680aad12 100644 --- a/hwy/detect_compiler_arch.h +++ b/hwy/detect_compiler_arch.h @@ -233,10 +233,12 @@ #endif #ifdef __riscv -#define HWY_ARCH_RVV 1 +#define HWY_ARCH_RISCV 1 #else -#define HWY_ARCH_RVV 0 +#define HWY_ARCH_RISCV 0 #endif +// DEPRECATED names; please use HWY_ARCH_RISCV instead. +#define HWY_ARCH_RVV HWY_ARCH_RISCV #if defined(__s390x__) #define HWY_ARCH_S390X 1 @@ -247,7 +249,7 @@ // It is an error to detect multiple architectures at the same time, but OK to // detect none of the above. #if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \ - HWY_ARCH_WASM + HWY_ARCH_RVV + HWY_ARCH_S390X) > 1 + HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X) > 1 #error "Must not detect more than one architecture" #endif diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h index 372173dbcf..0f28bf30a5 100644 --- a/hwy/detect_targets.h +++ b/hwy/detect_targets.h @@ -505,7 +505,7 @@ #endif // RVV requires intrinsics 0.11 or later, see #1156. -#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 +#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 #define HWY_BASELINE_RVV HWY_RVV #else #define HWY_BASELINE_RVV 0 @@ -556,7 +556,7 @@ // On Arm, PPC, S390X, and RISC-V: GCC and Clang 16+ do, and we require Linux // to detect CPU capabilities. Currently require opt-in for Clang on Arm // because it is experimental. -#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RVV) && \ +#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \ (HWY_COMPILER_GCC_ACTUAL || \ (HWY_COMPILER_CLANG >= 1600 && \ (!HWY_ARCH_ARM || defined(HWY_ENABLE_CLANG_ARM_DISPATCH)))) && \ diff --git a/hwy/foreach_target.h b/hwy/foreach_target.h index c07e86a4d6..649e55f80b 100644 --- a/hwy/foreach_target.h +++ b/hwy/foreach_target.h @@ -295,7 +295,7 @@ #endif #endif -// ------------------------------ HWY_ARCH_RVV +// ------------------------------ HWY_ARCH_RISCV #if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV) #undef HWY_TARGET diff --git a/hwy/ops/emu128-inl.h b/hwy/ops/emu128-inl.h index 312e15c1ac..47345236a4 100644 --- a/hwy/ops/emu128-inl.h +++ b/hwy/ops/emu128-inl.h @@ -2081,7 +2081,7 @@ HWY_API VFromD ConcatEven(D d, VFromD hi, VFromD lo) { // 2023-11-23: workaround for incorrect codegen (reduction_test fails for // SumsOf2 because PromoteOddTo, which uses ConcatOdd, returns zero). -#if HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128 && HWY_COMPILER_CLANG +#if HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128 && HWY_COMPILER_CLANG #define HWY_EMU128_CONCAT_INLINE HWY_NOINLINE #else #define HWY_EMU128_CONCAT_INLINE HWY_API diff --git a/hwy/targets.cc b/hwy/targets.cc index a3429acd34..bb2a2fbcf8 100644 --- a/hwy/targets.cc +++ b/hwy/targets.cc @@ -31,7 +31,7 @@ #include #endif // HWY_COMPILER_MSVC -#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RVV) && \ +#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \ HWY_OS_LINUX // sys/auxv.h does not always include asm/hwcap.h, or define HWCAP*, hence we // still include this directly. See #1199. @@ -498,7 +498,7 @@ int64_t DetectTargets() { return bits; } } // namespace s390x -#elif HWY_ARCH_RVV && HWY_HAVE_RUNTIME_DISPATCH +#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH namespace rvv { #ifndef HWCAP_RVV @@ -537,7 +537,7 @@ int64_t DetectTargets() { bits |= ppc::DetectTargets(); #elif HWY_ARCH_S390X && HWY_HAVE_RUNTIME_DISPATCH bits |= s390x::DetectTargets(); -#elif HWY_ARCH_RVV && HWY_HAVE_RUNTIME_DISPATCH +#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH bits |= rvv::DetectTargets(); #else diff --git a/hwy/targets.h b/hwy/targets.h index f4a74877b1..0bc13b5010 100644 --- a/hwy/targets.h +++ b/hwy/targets.h @@ -29,7 +29,7 @@ #include "hwy/detect_targets.h" #include "hwy/highway_export.h" -#if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX) +#if !HWY_ARCH_RISCV && !defined(HWY_NO_LIBCXX) #include #endif @@ -141,7 +141,7 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) { return "WASM_EMU256"; #endif -#if HWY_ARCH_RVV +#if HWY_ARCH_RISCV case HWY_RVV: return "RVV"; #endif @@ -237,7 +237,7 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) { HWY_CHOOSE_NEON(func_name), /* NEON */ \ HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */ -#elif HWY_ARCH_RVV +#elif HWY_ARCH_RISCV // See HWY_ARCH_X86 above for details. #define HWY_MAX_DYNAMIC_TARGETS 9 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV @@ -324,7 +324,7 @@ struct ChosenTarget { private: // TODO(janwas): remove RVV once is available -#if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX) +#if HWY_ARCH_RISCV || defined(HWY_NO_LIBCXX) int64_t LoadMask() const { return mask_; } void StoreMask(int64_t mask) { mask_ = mask; } @@ -334,7 +334,7 @@ struct ChosenTarget { void StoreMask(int64_t mask) { mask_.store(mask); } std::atomic mask_{1}; // Initialized to 1 so GetIndex() returns 0. -#endif // HWY_ARCH_RVV +#endif // HWY_ARCH_RISCV }; // For internal use (e.g. by FunctionCache and DisableTargets). diff --git a/hwy/tests/convert_test.cc b/hwy/tests/convert_test.cc index 7901ca55be..793b2ab900 100644 --- a/hwy/tests/convert_test.cc +++ b/hwy/tests/convert_test.cc @@ -1009,7 +1009,7 @@ class TestNonFiniteF2IConvertTo { // TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v: // target/riscv/translate.c:213 in void decode_save_opc(DisasContext *): // ctx->insn_start != NULL. -#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128) +#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128) if (sizeof(TTo) > sizeof(TF)) { return; } @@ -1154,7 +1154,7 @@ struct TestF2IPromoteTo { // TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v: // target/riscv/translate.c:213 in void decode_save_opc(DisasContext *): // ctx->insn_start != NULL. -#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128) +#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128) return; #endif @@ -1251,7 +1251,7 @@ struct TestF2IPromoteUpperLowerTo { // TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v: // target/riscv/translate.c:213 in void decode_save_opc(DisasContext *): // ctx->insn_start != NULL. -#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128) +#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128) return; #endif @@ -1395,7 +1395,7 @@ class TestNonFiniteF2IPromoteUpperLowerTo { // TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v: // target/riscv/translate.c:213 in void decode_save_opc(DisasContext *): // ctx->insn_start != NULL. -#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128) +#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128) return; #endif diff --git a/hwy/tests/test_util-inl.h b/hwy/tests/test_util-inl.h index b10bcfdbab..11a7d5052e 100644 --- a/hwy/tests/test_util-inl.h +++ b/hwy/tests/test_util-inl.h @@ -864,7 +864,7 @@ void ForUIF163264(const Func& func) { // For tests that involve loops, adjust the trip count so that emulated tests // finish quickly (but always at least 2 iterations to ensure some diversity). constexpr size_t AdjustedReps(size_t max_reps) { -#if HWY_ARCH_RVV +#if HWY_ARCH_RISCV return HWY_MAX(max_reps / 32, 2); #elif HWY_IS_DEBUG_BUILD return HWY_MAX(max_reps / 8, 2); @@ -880,7 +880,7 @@ constexpr size_t AdjustedReps(size_t max_reps) { // Same as above, but the loop trip count will be 1 << max_pow2. constexpr size_t AdjustedLog2Reps(size_t max_pow2) { // If "negative" (unsigned wraparound), use original. -#if HWY_ARCH_RVV +#if HWY_ARCH_RISCV return HWY_MIN(max_pow2 - 4, max_pow2); #elif HWY_IS_DEBUG_BUILD return HWY_MIN(max_pow2 - 1, max_pow2); diff --git a/hwy/timer-inl.h b/hwy/timer-inl.h index 2e082fce01..d9a9fa84fb 100644 --- a/hwy/timer-inl.h +++ b/hwy/timer-inl.h @@ -139,7 +139,7 @@ inline Ticks Start() { // "memory" avoids reordering. rdx = TSC >> 32. // "cc" = flags modified by SHL. : "rdx", "memory", "cc"); -#elif HWY_ARCH_RVV +#elif HWY_ARCH_RISCV asm volatile("fence; rdtime %0" : "=r"(t)); #elif defined(_WIN32) || defined(_WIN64) LARGE_INTEGER counter; diff --git a/hwy/timer.cc b/hwy/timer.cc index 129d36eb28..73e8ae39fa 100644 --- a/hwy/timer.cc +++ b/hwy/timer.cc @@ -136,7 +136,7 @@ HWY_DLLEXPORT bool HaveTimerStop(char* cpu100) { HWY_DLLEXPORT double InvariantTicksPerSecond() { #if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__) return static_cast(__ppc_get_timebase_freq()); -#elif HWY_ARCH_X86 || HWY_ARCH_RVV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC) +#elif HWY_ARCH_X86 || HWY_ARCH_RISCV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC) // We assume the x86 TSC is invariant; it is on all recent Intel/AMD CPUs. static const double freq = MeasureNominalClockRate(); return freq;