Skip to content

Commit

Permalink
Merge pull request #2073 from luyahan:fix-rvv
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 623416155
  • Loading branch information
copybara-github committed Apr 10, 2024
2 parents b15ba1e + fcf7101 commit 701c328
Show file tree
Hide file tree
Showing 14 changed files with 31 additions and 29 deletions.
2 changes: 1 addition & 1 deletion hwy/abort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
fflush(stderr);

// Now terminate the program:
#if HWY_ARCH_RVV
#if HWY_ARCH_RISCV
exit(1); // trap/abort just freeze Spike.
#elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC
// Facilitates breaking into a debugger, but don't use this in non-debug
Expand Down
2 changes: 1 addition & 1 deletion hwy/aligned_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace hwy {
namespace {

#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
// Not actually an upper bound on the size, but this value prevents crossing a
// 4K boundary (relevant on Andes).
constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096);
Expand Down
6 changes: 3 additions & 3 deletions hwy/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ HWY_API void ZeroBytes(void* to, size_t num_bytes) {

#if HWY_ARCH_X86
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64; // AVX-512
#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
// Not actually an upper bound on the size.
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
Expand All @@ -386,7 +386,7 @@ static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
// exceed the stack size.
#if HWY_ARCH_X86
#define HWY_ALIGN_MAX alignas(64)
#elif HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && \
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
#define HWY_ALIGN_MAX alignas(8) // only elements need be aligned
#else
Expand Down Expand Up @@ -1046,7 +1046,7 @@ HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) {

// RVV with f16 extension supports _Float16 and f16 vector ops. If set, implies
// HWY_HAVE_FLOAT16.
#if HWY_ARCH_RVV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600
#if HWY_ARCH_RISCV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600
#define HWY_RVV_HAVE_F16_VEC 1
#else
#define HWY_RVV_HAVE_F16_VEC 0
Expand Down
2 changes: 1 addition & 1 deletion hwy/contrib/sort/vqsort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
// unavailable on Android and non-Linux RVV, we assume that those systems lack
// getrandom. Note that the only supported sources of entropy are getrandom or
// Windows, thus VQSORT_SECURE_SEED=0 when this is 0 and we are not on Windows.
#if defined(ANDROID) || defined(__ANDROID__) || (HWY_ARCH_RVV && !HWY_OS_LINUX)
#if defined(ANDROID) || defined(__ANDROID__) || (HWY_ARCH_RISCV && !HWY_OS_LINUX)
#define VQSORT_GETRANDOM 0
#endif

Expand Down
8 changes: 5 additions & 3 deletions hwy/detect_compiler_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,12 @@
#endif

#ifdef __riscv
#define HWY_ARCH_RVV 1
#define HWY_ARCH_RISCV 1
#else
#define HWY_ARCH_RVV 0
#define HWY_ARCH_RISCV 0
#endif
// DEPRECATED names; please use HWY_ARCH_RISCV instead.
#define HWY_ARCH_RVV HWY_ARCH_RISCV

#if defined(__s390x__)
#define HWY_ARCH_S390X 1
Expand All @@ -247,7 +249,7 @@
// It is an error to detect multiple architectures at the same time, but OK to
// detect none of the above.
#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_ARM_OLD + \
HWY_ARCH_WASM + HWY_ARCH_RVV + HWY_ARCH_S390X) > 1
HWY_ARCH_WASM + HWY_ARCH_RISCV + HWY_ARCH_S390X) > 1
#error "Must not detect more than one architecture"
#endif

Expand Down
4 changes: 2 additions & 2 deletions hwy/detect_targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@
#endif

// RVV requires intrinsics 0.11 or later, see #1156.
#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
#define HWY_BASELINE_RVV HWY_RVV
#else
#define HWY_BASELINE_RVV 0
Expand Down Expand Up @@ -556,7 +556,7 @@
// On Arm, PPC, S390X, and RISC-V: GCC and Clang 16+ do, and we require Linux
// to detect CPU capabilities. Currently require opt-in for Clang on Arm
// because it is experimental.
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RVV) && \
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
(HWY_COMPILER_GCC_ACTUAL || \
(HWY_COMPILER_CLANG >= 1600 && \
(!HWY_ARCH_ARM || defined(HWY_ENABLE_CLANG_ARM_DISPATCH)))) && \
Expand Down
2 changes: 1 addition & 1 deletion hwy/foreach_target.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@
#endif
#endif

// ------------------------------ HWY_ARCH_RVV
// ------------------------------ HWY_ARCH_RISCV

#if (HWY_TARGETS & HWY_RVV) && (HWY_STATIC_TARGET != HWY_RVV)
#undef HWY_TARGET
Expand Down
2 changes: 1 addition & 1 deletion hwy/ops/emu128-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2081,7 +2081,7 @@ HWY_API VFromD<D> ConcatEven(D d, VFromD<D> hi, VFromD<D> lo) {

// 2023-11-23: workaround for incorrect codegen (reduction_test fails for
// SumsOf2 because PromoteOddTo, which uses ConcatOdd, returns zero).
#if HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128 && HWY_COMPILER_CLANG
#if HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128 && HWY_COMPILER_CLANG
#define HWY_EMU128_CONCAT_INLINE HWY_NOINLINE
#else
#define HWY_EMU128_CONCAT_INLINE HWY_API
Expand Down
6 changes: 3 additions & 3 deletions hwy/targets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include <cpuid.h>
#endif // HWY_COMPILER_MSVC

#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RVV) && \
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
HWY_OS_LINUX
// sys/auxv.h does not always include asm/hwcap.h, or define HWCAP*, hence we
// still include this directly. See #1199.
Expand Down Expand Up @@ -498,7 +498,7 @@ int64_t DetectTargets() {
return bits;
}
} // namespace s390x
#elif HWY_ARCH_RVV && HWY_HAVE_RUNTIME_DISPATCH
#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
namespace rvv {

#ifndef HWCAP_RVV
Expand Down Expand Up @@ -537,7 +537,7 @@ int64_t DetectTargets() {
bits |= ppc::DetectTargets();
#elif HWY_ARCH_S390X && HWY_HAVE_RUNTIME_DISPATCH
bits |= s390x::DetectTargets();
#elif HWY_ARCH_RVV && HWY_HAVE_RUNTIME_DISPATCH
#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
bits |= rvv::DetectTargets();

#else
Expand Down
10 changes: 5 additions & 5 deletions hwy/targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "hwy/detect_targets.h"
#include "hwy/highway_export.h"

#if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX)
#if !HWY_ARCH_RISCV && !defined(HWY_NO_LIBCXX)
#include <atomic>
#endif

Expand Down Expand Up @@ -141,7 +141,7 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
return "WASM_EMU256";
#endif

#if HWY_ARCH_RVV
#if HWY_ARCH_RISCV
case HWY_RVV:
return "RVV";
#endif
Expand Down Expand Up @@ -237,7 +237,7 @@ static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
HWY_CHOOSE_NEON(func_name), /* NEON */ \
HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */

#elif HWY_ARCH_RVV
#elif HWY_ARCH_RISCV
// See HWY_ARCH_X86 above for details.
#define HWY_MAX_DYNAMIC_TARGETS 9
#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
Expand Down Expand Up @@ -324,7 +324,7 @@ struct ChosenTarget {

private:
// TODO(janwas): remove RVV once <atomic> is available
#if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX)
#if HWY_ARCH_RISCV || defined(HWY_NO_LIBCXX)
int64_t LoadMask() const { return mask_; }
void StoreMask(int64_t mask) { mask_ = mask; }

Expand All @@ -334,7 +334,7 @@ struct ChosenTarget {
void StoreMask(int64_t mask) { mask_.store(mask); }

std::atomic<int64_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
#endif // HWY_ARCH_RVV
#endif // HWY_ARCH_RISCV
};

// For internal use (e.g. by FunctionCache and DisableTargets).
Expand Down
8 changes: 4 additions & 4 deletions hwy/tests/convert_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ class TestNonFiniteF2IConvertTo {
// TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v:
// target/riscv/translate.c:213 in void decode_save_opc(DisasContext *):
// ctx->insn_start != NULL.
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128)
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128)
if (sizeof(TTo) > sizeof(TF)) {
return;
}
Expand Down Expand Up @@ -1154,7 +1154,7 @@ struct TestF2IPromoteTo {
// TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v:
// target/riscv/translate.c:213 in void decode_save_opc(DisasContext *):
// ctx->insn_start != NULL.
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128)
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128)
return;
#endif

Expand Down Expand Up @@ -1251,7 +1251,7 @@ struct TestF2IPromoteUpperLowerTo {
// TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v:
// target/riscv/translate.c:213 in void decode_save_opc(DisasContext *):
// ctx->insn_start != NULL.
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128)
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128)
return;
#endif

Expand Down Expand Up @@ -1395,7 +1395,7 @@ class TestNonFiniteF2IPromoteUpperLowerTo {
// TODO(janwas): workaround for QEMU 7.2 crash on vfwcvt_rtz_x_f_v:
// target/riscv/translate.c:213 in void decode_save_opc(DisasContext *):
// ctx->insn_start != NULL.
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RVV && HWY_TARGET == HWY_EMU128)
#if HWY_TARGET == HWY_RVV || (HWY_ARCH_RISCV && HWY_TARGET == HWY_EMU128)
return;
#endif

Expand Down
4 changes: 2 additions & 2 deletions hwy/tests/test_util-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ void ForUIF163264(const Func& func) {
// For tests that involve loops, adjust the trip count so that emulated tests
// finish quickly (but always at least 2 iterations to ensure some diversity).
constexpr size_t AdjustedReps(size_t max_reps) {
#if HWY_ARCH_RVV
#if HWY_ARCH_RISCV
return HWY_MAX(max_reps / 32, 2);
#elif HWY_IS_DEBUG_BUILD
return HWY_MAX(max_reps / 8, 2);
Expand All @@ -880,7 +880,7 @@ constexpr size_t AdjustedReps(size_t max_reps) {
// Same as above, but the loop trip count will be 1 << max_pow2.
constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
// If "negative" (unsigned wraparound), use original.
#if HWY_ARCH_RVV
#if HWY_ARCH_RISCV
return HWY_MIN(max_pow2 - 4, max_pow2);
#elif HWY_IS_DEBUG_BUILD
return HWY_MIN(max_pow2 - 1, max_pow2);
Expand Down
2 changes: 1 addition & 1 deletion hwy/timer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ inline Ticks Start() {
// "memory" avoids reordering. rdx = TSC >> 32.
// "cc" = flags modified by SHL.
: "rdx", "memory", "cc");
#elif HWY_ARCH_RVV
#elif HWY_ARCH_RISCV
asm volatile("fence; rdtime %0" : "=r"(t));
#elif defined(_WIN32) || defined(_WIN64)
LARGE_INTEGER counter;
Expand Down
2 changes: 1 addition & 1 deletion hwy/timer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ HWY_DLLEXPORT bool HaveTimerStop(char* cpu100) {
HWY_DLLEXPORT double InvariantTicksPerSecond() {
#if HWY_ARCH_PPC && defined(__GLIBC__) && defined(__powerpc64__)
return static_cast<double>(__ppc_get_timebase_freq());
#elif HWY_ARCH_X86 || HWY_ARCH_RVV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC)
#elif HWY_ARCH_X86 || HWY_ARCH_RISCV || (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC)
// We assume the x86 TSC is invariant; it is on all recent Intel/AMD CPUs.
static const double freq = MeasureNominalClockRate();
return freq;
Expand Down

0 comments on commit 701c328

Please sign in to comment.