From f6518663878bbef442af566a3e91e3502d087f2c Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 11 Jun 2022 16:59:49 -0400 Subject: [PATCH] Prefix Float16 intrinsics --- src/APInt-C.cpp | 6 ++-- src/julia.expmap | 6 ---- src/julia_internal.h | 14 +++++++-- src/runtime_intrinsics.c | 64 +++++++++++++++++++++++----------------- 4 files changed, 52 insertions(+), 38 deletions(-) diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp index bc0a62e21dd3e..f06d4362bf958 100644 --- a/src/APInt-C.cpp +++ b/src/APInt-C.cpp @@ -316,7 +316,7 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) { void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) { double Val; if (numbits == 16) - Val = __gnu_h2f_ieee(*(uint16_t*)pa); + Val = julia__gnu_h2f_ieee(*(uint16_t*)pa); else if (numbits == 32) Val = *(float*)pa; else if (numbits == 64) @@ -391,7 +391,7 @@ void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar val = a.roundToDouble(true); } if (onumbits == 16) - *(uint16_t*)pr = __gnu_f2h_ieee(val); + *(uint16_t*)pr = julia__gnu_f2h_ieee(val); else if (onumbits == 32) *(float*)pr = val; else if (onumbits == 64) @@ -408,7 +408,7 @@ void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar val = a.roundToDouble(false); } if (onumbits == 16) - *(uint16_t*)pr = __gnu_f2h_ieee(val); + *(uint16_t*)pr = julia__gnu_f2h_ieee(val); else if (onumbits == 32) *(float*)pr = val; else if (onumbits == 64) diff --git a/src/julia.expmap b/src/julia.expmap index 4b4a792de52b9..41299aa808572 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -38,12 +38,6 @@ environ; __progname; - /* compiler run-time intrinsics */ - __gnu_h2f_ieee; - __extendhfsf2; - __gnu_f2h_ieee; - __truncdfhf2; - local: *; }; diff --git a/src/julia_internal.h b/src/julia_internal.h index 60583f2240aea..384ad9ccb0189 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1543,8 +1543,18 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT; #define JL_GC_ASSERT_LIVE(x) (void)(x) #endif -float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT; -uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT; +JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT; +//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT; +//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT; #ifdef __cplusplus } diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 89c9449e55920..ea912b61ac4c3 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -15,9 +15,6 @@ const unsigned int host_char_bit = 8; // float16 intrinsics -// TODO: use LLVM's compiler-rt on all platforms (Xcode already links compiler-rt) - -#if !defined(_OS_DARWIN_) static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT { @@ -188,22 +185,17 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT return h; } -JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param) +JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) { return half_to_float(param); } -JL_DLLEXPORT float __extendhfsf2(uint16_t param) -{ - return half_to_float(param); -} - -JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param) +JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) { return float_to_half(param); } -JL_DLLEXPORT uint16_t __truncdfhf2(double param) +JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) { float res = (float)param; uint32_t resi; @@ -225,7 +217,25 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param) return float_to_half(res); } -#endif +//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) { return (double)julia__gnu_h2f_ieee(n); } +//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) { return (int32_t)julia__gnu_h2f_ieee(n); } +//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) { return (int64_t)julia__gnu_h2f_ieee(n); } +//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) { return (uint32_t)julia__gnu_h2f_ieee(n); } +//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) { return (uint64_t)julia__gnu_h2f_ieee(n); } +//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) { return julia__gnu_f2h_ieee((float)n); } +//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) { return julia__gnu_f2h_ieee((float)n); } +//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) { return julia__gnu_f2h_ieee((float)n); } +//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) { return julia__gnu_f2h_ieee((float)n); } +//HANDLE_LIBCALL(F16, F128, __extendhftf2) +//HANDLE_LIBCALL(F16, F80, __extendhfxf2) +//HANDLE_LIBCALL(F80, F16, __truncxfhf2) +//HANDLE_LIBCALL(F128, F16, __trunctfhf2) +//HANDLE_LIBCALL(PPCF128, F16, __trunctfhf2) +//HANDLE_LIBCALL(F16, I128, __fixhfti) +//HANDLE_LIBCALL(F16, I128, __fixunshfti) +//HANDLE_LIBCALL(I128, F16, __floattihf) +//HANDLE_LIBCALL(I128, F16, __floatuntihf) + // run time version of bitcast intrinsic JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v) @@ -551,9 +561,9 @@ static inline unsigned select_by_size(unsigned sz) JL_NOTSAFEPOINT } #define fp_select(a, func) \ - sizeof(a) == sizeof(float) ? func##f((float)a) : func(a) + sizeof(a) <= sizeof(float) ? func##f((float)a) : func(a) #define fp_select2(a, b, func) \ - sizeof(a) == sizeof(float) ? func##f(a, b) : func(a, b) + sizeof(a) <= sizeof(float) ? func##f(a, b) : func(a, b) // fast-function generators // @@ -597,11 +607,11 @@ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \ { \ uint16_t a = *(uint16_t*)pa; \ - float A = __gnu_h2f_ieee(a); \ + float A = julia__gnu_h2f_ieee(a); \ if (osize == 16) { \ float R; \ OP(&R, A); \ - *(uint16_t*)pr = __gnu_f2h_ieee(R); \ + *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \ } else { \ OP((uint16_t*)pr, A); \ } \ @@ -625,11 +635,11 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr) { \ uint16_t a = *(uint16_t*)pa; \ uint16_t b = *(uint16_t*)pb; \ - float A = __gnu_h2f_ieee(a); \ - float B = __gnu_h2f_ieee(b); \ + float A = julia__gnu_h2f_ieee(a); \ + float B = julia__gnu_h2f_ieee(b); \ runtime_nbits = 16; \ float R = OP(A, B); \ - *(uint16_t*)pr = __gnu_f2h_ieee(R); \ + *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \ } // float or integer inputs, bool output @@ -650,8 +660,8 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP { \ uint16_t a = *(uint16_t*)pa; \ uint16_t b = *(uint16_t*)pb; \ - float A = __gnu_h2f_ieee(a); \ - float B = __gnu_h2f_ieee(b); \ + float A = julia__gnu_h2f_ieee(a); \ + float B = julia__gnu_h2f_ieee(b); \ runtime_nbits = 16; \ return OP(A, B); \ } @@ -691,12 +701,12 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc, uint16_t a = *(uint16_t*)pa; \ uint16_t b = *(uint16_t*)pb; \ uint16_t c = *(uint16_t*)pc; \ - float A = __gnu_h2f_ieee(a); \ - float B = __gnu_h2f_ieee(b); \ - float C = __gnu_h2f_ieee(c); \ + float A = julia__gnu_h2f_ieee(a); \ + float B = julia__gnu_h2f_ieee(b); \ + float C = julia__gnu_h2f_ieee(c); \ runtime_nbits = 16; \ float R = OP(A, B, C); \ - *(uint16_t*)pr = __gnu_f2h_ieee(R); \ + *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \ } @@ -1318,7 +1328,7 @@ static inline int fpiseq##nbits(c_type a, c_type b) JL_NOTSAFEPOINT { \ fpiseq_n(float, 32) fpiseq_n(double, 64) #define fpiseq(a,b) \ - sizeof(a) == sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b) + sizeof(a) <= sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b) bool_fintrinsic(eq,eq_float) bool_fintrinsic(ne,ne_float) @@ -1367,7 +1377,7 @@ cvt_iintrinsic(LLVMFPtoUI, fptoui) if (!(osize < 8 * sizeof(a))) \ jl_error("fptrunc: output bitsize must be < input bitsize"); \ else if (osize == 16) \ - *(uint16_t*)pr = __gnu_f2h_ieee(a); \ + *(uint16_t*)pr = julia__gnu_f2h_ieee(a); \ else if (osize == 32) \ *(float*)pr = a; \ else if (osize == 64) \