From f6518663878bbef442af566a3e91e3502d087f2c Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Sat, 11 Jun 2022 16:59:49 -0400
Subject: [PATCH] Prefix Float16 intrinsics

---
 src/APInt-C.cpp          |  6 ++--
 src/julia.expmap         |  6 ----
 src/julia_internal.h     | 14 +++++++--
 src/runtime_intrinsics.c | 64 +++++++++++++++++++++++-----------------
 4 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index bc0a62e21dd3e..f06d4362bf958 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -316,7 +316,7 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
 void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
     double Val;
     if (numbits == 16)
-        Val = __gnu_h2f_ieee(*(uint16_t*)pa);
+        Val = julia__gnu_h2f_ieee(*(uint16_t*)pa);
     else if (numbits == 32)
         Val = *(float*)pa;
     else if (numbits == 64)
@@ -391,7 +391,7 @@ void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
         val = a.roundToDouble(true);
     }
     if (onumbits == 16)
-        *(uint16_t*)pr = __gnu_f2h_ieee(val);
+        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
     else if (onumbits == 32)
         *(float*)pr = val;
     else if (onumbits == 64)
@@ -408,7 +408,7 @@ void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
         val = a.roundToDouble(false);
     }
     if (onumbits == 16)
-        *(uint16_t*)pr = __gnu_f2h_ieee(val);
+        *(uint16_t*)pr = julia__gnu_f2h_ieee(val);
     else if (onumbits == 32)
         *(float*)pr = val;
     else if (onumbits == 64)
diff --git a/src/julia.expmap b/src/julia.expmap
index 4b4a792de52b9..41299aa808572 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap
@@ -38,12 +38,6 @@
     environ;
     __progname;
 
-    /* compiler run-time intrinsics */
-    __gnu_h2f_ieee;
-    __extendhfsf2;
-    __gnu_f2h_ieee;
-    __truncdfhf2;
-
   local:
     *;
 };
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 60583f2240aea..384ad9ccb0189 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -1543,8 +1543,18 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
   #define JL_GC_ASSERT_LIVE(x) (void)(x)
 #endif
 
-float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
-uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 89c9449e55920..ea912b61ac4c3 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -15,9 +15,6 @@
 const unsigned int host_char_bit = 8;
 
 // float16 intrinsics
-// TODO: use LLVM's compiler-rt on all platforms (Xcode already links compiler-rt)
-
-#if !defined(_OS_DARWIN_)
 
 static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
 {
@@ -188,22 +185,17 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
     return h;
 }
 
-JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
+JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param)
 {
     return half_to_float(param);
 }
 
-JL_DLLEXPORT float __extendhfsf2(uint16_t param)
-{
-    return half_to_float(param);
-}
-
-JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
+JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param)
 {
     return float_to_half(param);
 }
 
-JL_DLLEXPORT uint16_t __truncdfhf2(double param)
+JL_DLLEXPORT uint16_t julia__truncdfhf2(double param)
 {
     float res = (float)param;
     uint32_t resi;
@@ -225,7 +217,25 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
     return float_to_half(res);
 }
 
-#endif
+//JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) { return (double)julia__gnu_h2f_ieee(n); }
+//JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) { return (int32_t)julia__gnu_h2f_ieee(n); }
+//JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) { return (int64_t)julia__gnu_h2f_ieee(n); }
+//JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) { return (uint32_t)julia__gnu_h2f_ieee(n); }
+//JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) { return (uint64_t)julia__gnu_h2f_ieee(n); }
+//JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) { return julia__gnu_f2h_ieee((float)n); }
+//JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) { return julia__gnu_f2h_ieee((float)n); }
+//JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) { return julia__gnu_f2h_ieee((float)n); }
+//JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) { return julia__gnu_f2h_ieee((float)n); }
+//HANDLE_LIBCALL(F16, F128, __extendhftf2)
+//HANDLE_LIBCALL(F16, F80, __extendhfxf2)
+//HANDLE_LIBCALL(F80, F16, __truncxfhf2)
+//HANDLE_LIBCALL(F128, F16, __trunctfhf2)
+//HANDLE_LIBCALL(PPCF128, F16, __trunctfhf2)
+//HANDLE_LIBCALL(F16, I128, __fixhfti)
+//HANDLE_LIBCALL(F16, I128, __fixunshfti)
+//HANDLE_LIBCALL(I128, F16, __floattihf)
+//HANDLE_LIBCALL(I128, F16, __floatuntihf)
+
 
 // run time version of bitcast intrinsic
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v)
@@ -551,9 +561,9 @@ static inline unsigned select_by_size(unsigned sz) JL_NOTSAFEPOINT
     }
 
 #define fp_select(a, func) \
-    sizeof(a) == sizeof(float) ? func##f((float)a) : func(a)
+    sizeof(a) <= sizeof(float) ? func##f((float)a) : func(a)
 #define fp_select2(a, b, func) \
-    sizeof(a) == sizeof(float) ? func##f(a, b) : func(a, b)
+    sizeof(a) <= sizeof(float) ? func##f(a, b) : func(a, b)
 
 // fast-function generators //
 
@@ -597,11 +607,11 @@ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
 static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     uint16_t a = *(uint16_t*)pa; \
-    float A = __gnu_h2f_ieee(a); \
+    float A = julia__gnu_h2f_ieee(a); \
     if (osize == 16) { \
         float R; \
         OP(&R, A); \
-        *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+        *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
     } else { \
         OP((uint16_t*)pr, A); \
     } \
@@ -625,11 +635,11 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr)
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
+    float A = julia__gnu_h2f_ieee(a); \
+    float B = julia__gnu_h2f_ieee(b); \
     runtime_nbits = 16; \
     float R = OP(A, B); \
-    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
 }
 
 // float or integer inputs, bool output
@@ -650,8 +660,8 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
+    float A = julia__gnu_h2f_ieee(a); \
+    float B = julia__gnu_h2f_ieee(b); \
     runtime_nbits = 16; \
     return OP(A, B); \
 }
@@ -691,12 +701,12 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc,
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
     uint16_t c = *(uint16_t*)pc; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
-    float C = __gnu_h2f_ieee(c); \
+    float A = julia__gnu_h2f_ieee(a); \
+    float B = julia__gnu_h2f_ieee(b); \
+    float C = julia__gnu_h2f_ieee(c); \
     runtime_nbits = 16; \
     float R = OP(A, B, C); \
-    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
 }
 
 
@@ -1318,7 +1328,7 @@ static inline int fpiseq##nbits(c_type a, c_type b) JL_NOTSAFEPOINT { \
 fpiseq_n(float, 32)
 fpiseq_n(double, 64)
 #define fpiseq(a,b) \
-    sizeof(a) == sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
+    sizeof(a) <= sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
 
 bool_fintrinsic(eq,eq_float)
 bool_fintrinsic(ne,ne_float)
@@ -1367,7 +1377,7 @@ cvt_iintrinsic(LLVMFPtoUI, fptoui)
         if (!(osize < 8 * sizeof(a))) \
             jl_error("fptrunc: output bitsize must be < input bitsize"); \
         else if (osize == 16) \
-            *(uint16_t*)pr = __gnu_f2h_ieee(a); \
+            *(uint16_t*)pr = julia__gnu_f2h_ieee(a); \
         else if (osize == 32) \
             *(float*)pr = a; \
         else if (osize == 64) \