diff --git a/base/atomics.jl b/base/atomics.jl index b9a3289b68944..930b164512c54 100644 --- a/base/atomics.jl +++ b/base/atomics.jl @@ -323,7 +323,7 @@ const llvmtypes = IdDict{Any,String}( Int32 => "i32", UInt32 => "i32", Int64 => "i64", UInt64 => "i64", Int128 => "i128", UInt128 => "i128", - Float16 => "i16", # half + Float16 => "half", Float32 => "float", Float64 => "double", ) diff --git a/base/float.jl b/base/float.jl index 62dc732d107d5..db755d52fb206 100644 --- a/base/float.jl +++ b/base/float.jl @@ -45,13 +45,8 @@ A not-a-number value of type [`Float64`](@ref). const NaN = NaN64 ## conversions to floating-point ## -Float16(x::Integer) = convert(Float16, convert(Float32, x)) -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16 -end -promote_rule(::Type{Float16}, ::Type{Bool}) = Float16 -for t1 in (Float32, Float64) +for t1 in (Float16, Float32, Float64) for st in (Int8, Int16, Int32, Int64) @eval begin (::Type{$t1})(x::($st)) = sitofp($t1, x) @@ -65,7 +60,6 @@ for t1 in (Float32, Float64) end end end -(::Type{T})(x::Float16) where {T<:Integer} = T(Float32(x)) Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x)) @@ -73,6 +67,8 @@ promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64 promote_rule(::Type{Float64}, ::Type{Int128}) = Float64 promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32 promote_rule(::Type{Float32}, ::Type{Int128}) = Float32 +promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16 +promote_rule(::Type{Float16}, ::Type{Int128}) = Float16 function Float64(x::UInt128) x == 0 && return 0.0 @@ -134,115 +130,16 @@ function Float32(x::Int128) reinterpret(Float32, s | d + y) end -function Float16(val::Float32) - f = reinterpret(UInt32, val) - if isnan(val) - t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16)) - return reinterpret(Float16, t ⊻ ((f >> 0xd) % UInt16)) - end - i = (f >> 23) & 0x1ff + 1 - sh = shifttable[i] - f &= 0x007fffff - h::UInt16 = basetable[i] + (f >> sh) - # round - # NOTE: we maybe should ignore NaNs here, but the payload is - # getting truncated anyway so "rounding" it might not matter - nextbit = (f >> (sh-1)) & 1 - if nextbit != 0 - # Round halfway to even or check lower bits - if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 - h += 1 - end - end - reinterpret(Float16, h) -end - -function Float32(val::Float16) - local ival::UInt32 = reinterpret(UInt16, val) - local sign::UInt32 = (ival & 0x8000) >> 15 - local exp::UInt32 = (ival & 0x7c00) >> 10 - local sig::UInt32 = (ival & 0x3ff) >> 0 - local ret::UInt32 - - if exp == 0 - if sig == 0 - sign = sign << 31 - ret = sign | exp | sig - else - n_bit = 1 - bit = 0x0200 - while (bit & sig) == 0 - n_bit = n_bit + 1 - bit = bit >> 1 - end - sign = sign << 31 - exp = (-14 - n_bit + 127) << 23 - sig = ((sig & (~bit)) << n_bit) << (23 - 10) - ret = sign | exp | sig - end - elseif exp == 0x1f - if sig == 0 # Inf - if sign == 0 - ret = 0x7f800000 - else - ret = 0xff800000 - end - else # NaN - ret = 0x7fc00000 | (sign<<31) | (sig<<(23-10)) - end - else - sign = sign << 31 - exp = (exp - 15 + 127) << 23 - sig = sig << (23 - 10) - ret = sign | exp | sig - end - return reinterpret(Float32, ret) -end - -# Float32 -> Float16 algorithm from: -# "Fast Half Float Conversion" by Jeroen van der Zijp -# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf - -const basetable = Vector{UInt16}(uninitialized, 512) -const shifttable = Vector{UInt8}(uninitialized, 512) - -for i = 0:255 - e = i - 127 - if e < -24 # Very small numbers map to zero - basetable[i|0x000+1] = 0x0000 - basetable[i|0x100+1] = 0x8000 - shifttable[i|0x000+1] = 24 - shifttable[i|0x100+1] = 24 - elseif e < -14 # Small numbers map to denorms - basetable[i|0x000+1] = (0x0400>>(-e-14)) - basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000 - shifttable[i|0x000+1] = -e-1 - shifttable[i|0x100+1] = -e-1 - elseif e <= 15 # Normal numbers just lose precision - basetable[i|0x000+1] = ((e+15)<<10) - basetable[i|0x100+1] = ((e+15)<<10) | 0x8000 - shifttable[i|0x000+1] = 13 - shifttable[i|0x100+1] = 13 - elseif e < 128 # Large numbers map to Infinity - basetable[i|0x000+1] = 0x7C00 - basetable[i|0x100+1] = 0xFC00 - shifttable[i|0x000+1] = 24 - shifttable[i|0x100+1] = 24 - else # Infinity and NaN's stay Infinity and NaN's - basetable[i|0x000+1] = 0x7C00 - basetable[i|0x100+1] = 0xFC00 - shifttable[i|0x000+1] = 13 - shifttable[i|0x100+1] = 13 - end -end +Float16(x::UInt128) = convert(Float16, Float32(x)) +Float16(x::Int128) = convert(Float16, Float32(x)) -#convert(::Type{Float16}, x::Float32) = fptrunc(Float16, x) +Float16(x::Float32) = fptrunc(Float16, x) +Float16(x::Float64) = fptrunc(Float16, x) Float32(x::Float64) = fptrunc(Float32, x) -Float16(x::Float64) = Float16(Float32(x)) -#convert(::Type{Float32}, x::Float16) = fpext(Float32, x) +Float32(x::Float16) = fpext(Float32, x) Float64(x::Float32) = fpext(Float64, x) -Float64(x::Float16) = Float64(Float32(x)) +Float64(x::Float16) = fpext(Float64, x) AbstractFloat(x::Bool) = Float64(x) AbstractFloat(x::Int8) = Float64(x) @@ -293,14 +190,14 @@ function unsafe_trunc end for Ti in (Int8, Int16, Int32, Int64) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x)) + unsafe_trunc(::Type{$Ti}, x::Float16) = fptosi($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float32) = fptosi($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float64) = fptosi($Ti, x) end end for Ti in (UInt8, UInt16, UInt32, UInt64) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x)) + unsafe_trunc(::Type{$Ti}, x::Float16) = fptosi($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float32) = fptoui($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float64) = fptoui($Ti, x) end @@ -339,37 +236,36 @@ unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x)) # matches convert methods # also determines floor, ceil, round +trunc(::Type{Signed}, x::Float16) = trunc(Int,x) trunc(::Type{Signed}, x::Float32) = trunc(Int,x) trunc(::Type{Signed}, x::Float64) = trunc(Int,x) +trunc(::Type{Unsigned}, x::Float16) = trunc(UInt,x) trunc(::Type{Unsigned}, x::Float32) = trunc(UInt,x) trunc(::Type{Unsigned}, x::Float64) = trunc(UInt,x) +trunc(::Type{Integer}, x::Float16) = trunc(Int,x) trunc(::Type{Integer}, x::Float32) = trunc(Int,x) trunc(::Type{Integer}, x::Float64) = trunc(Int,x) -trunc(::Type{T}, x::Float16) where {T<:Integer} = trunc(T, Float32(x)) # fallbacks floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,floor(x)) -floor(::Type{T}, x::Float16) where {T<:Integer} = floor(T, Float32(x)) ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,ceil(x)) -ceil(::Type{T}, x::Float16) where {T<:Integer} = ceil(T, Float32(x)) round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x)) -round(::Type{T}, x::Float16) where {T<:Integer} = round(T, Float32(x)) trunc(x::Float64) = trunc_llvm(x) trunc(x::Float32) = trunc_llvm(x) -trunc(x::Float16) = Float16(trunc(Float32(x))) +trunc(x::Float16) = trunc_llvm(x) floor(x::Float64) = floor_llvm(x) floor(x::Float32) = floor_llvm(x) -floor(x::Float16) = Float16(floor(Float32(x))) +floor(x::Float16) = floor_llvm(x) ceil(x::Float64) = ceil_llvm(x) ceil(x::Float32) = ceil_llvm(x) -ceil(x::Float16) = Float16( ceil(Float32(x))) +ceil(x::Float16) = ceil_llvm(x) round(x::Float64) = rint_llvm(x) round(x::Float32) = rint_llvm(x) -round(x::Float16) = Float16(round(Float32(x))) +round(x::Float16) = rint_llvm(x) ## floating point promotions ## promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 @@ -384,36 +280,30 @@ _default_type(T::Union{Type{Real},Type{AbstractFloat}}) = Float64 ## floating point arithmetic ## -(x::Float64) = neg_float(x) -(x::Float32) = neg_float(x) --(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) ⊻ 0x8000) +-(x::Float16) = neg_float(x) -for op in (:+, :-, :*, :/, :\, :^) - @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b))) -end ++(x::Float16, y::Float16) = add_float(x, y) +(x::Float32, y::Float32) = add_float(x, y) +(x::Float64, y::Float64) = add_float(x, y) +-(x::Float16, y::Float16) = sub_float(x, y) -(x::Float32, y::Float32) = sub_float(x, y) -(x::Float64, y::Float64) = sub_float(x, y) +*(x::Float16, y::Float16) = mul_float(x, y) *(x::Float32, y::Float32) = mul_float(x, y) *(x::Float64, y::Float64) = mul_float(x, y) +/(x::Float16, y::Float16) = div_float(x, y) /(x::Float32, y::Float32) = div_float(x, y) /(x::Float64, y::Float64) = div_float(x, y) muladd(x::Float32, y::Float32, z::Float32) = muladd_float(x, y, z) muladd(x::Float64, y::Float64, z::Float64) = muladd_float(x, y, z) -function muladd(a::Float16, b::Float16, c::Float16) - Float16(muladd(Float32(a), Float32(b), Float32(c))) -end +muladd(x::Float16, y::Float16, z::Float16) = muladd_float(x, y, z) # TODO: faster floating point div? # TODO: faster floating point fld? # TODO: faster floating point mod? -for func in (:div,:fld,:cld,:rem,:mod) - @eval begin - $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b))) - end -end - +rem(x::Float16, y::Float16) = rem_float(x, y) rem(x::Float32, y::Float32) = rem_float(x, y) rem(x::Float64, y::Float64) = rem_float(x, y) @@ -431,33 +321,25 @@ function mod(x::T, y::T) where T<:AbstractFloat end ## floating point comparisons ## -function ==(x::Float16, y::Float16) - ix = reinterpret(UInt16,x) - iy = reinterpret(UInt16,y) - if (ix|iy)&0x7fff > 0x7c00 #isnan(x) || isnan(y) - return false - end - if (ix|iy)&0x7fff == 0x0000 - return true - end - return ix == iy -end +==(x::Float16, y::Float16) = eq_float(x, y) ==(x::Float32, y::Float32) = eq_float(x, y) ==(x::Float64, y::Float64) = eq_float(x, y) +!=(x::Float16, y::Float16) = ne_float(x, y) !=(x::Float32, y::Float32) = ne_float(x, y) !=(x::Float64, y::Float64) = ne_float(x, y) +<( x::Float16, y::Float16) = lt_float(x, y) <( x::Float32, y::Float32) = lt_float(x, y) <( x::Float64, y::Float64) = lt_float(x, y) +<=(x::Float16, y::Float16) = le_float(x, y) <=(x::Float32, y::Float32) = le_float(x, y) <=(x::Float64, y::Float64) = le_float(x, y) +isequal(x::Float16, y::Float16) = fpiseq(x, y) isequal(x::Float32, y::Float32) = fpiseq(x, y) isequal(x::Float64, y::Float64) = fpiseq(x, y) +isless( x::Float16, y::Float16) = fpislt(x, y) isless( x::Float32, y::Float32) = fpislt(x, y) isless( x::Float64, y::Float64) = fpislt(x, y) -for op in (:<, :<=, :isless) - @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b)) -end # Exact Float (Tf) vs Integer (Ti) comparisons # Assumes: @@ -512,7 +394,7 @@ end <=(x::Union{Int32,UInt32}, y::Float32) = Float64(x)<=Float64(y) -abs(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) & 0x7fff) +abs(x::Float16) = abs_float(x) abs(x::Float32) = abs_float(x) abs(x::Float64) = abs_float(x) @@ -648,7 +530,7 @@ such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`. prevfloat(x::AbstractFloat) = nextfloat(x,-1) for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - for Tf in (Float32, Float64) + for Tf in (Float16, Float32, Float64) if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf) # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or @@ -825,6 +707,7 @@ eps(::AbstractFloat) ## byte order swaps for arbitrary-endianness serialization/deserialization ## +bswap(x::Float16) = bswap_int(x) bswap(x::Float32) = bswap_int(x) bswap(x::Float64) = bswap_int(x) diff --git a/base/math.jl b/base/math.jl index 9fe4c3427d9dd..80e28362653f8 100644 --- a/base/math.jl +++ b/base/math.jl @@ -723,6 +723,8 @@ end end z end +@inline ^(x::Float16, y::Float16) = Float16(Float32(x)^Float32(y)) + @inline ^(x::Float64, y::Integer) = ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, Float64(y)) @inline ^(x::Float32, y::Integer) = ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, Float32(y)) @inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y) diff --git a/base/rtlib/RTLIB.jl b/base/rtlib/RTLIB.jl new file mode 100644 index 0000000000000..c544fbb3903d9 --- /dev/null +++ b/base/rtlib/RTLIB.jl @@ -0,0 +1,130 @@ +module RTLIB + +import Core.Intrinsics: ne_float, bitcast, fpext, fptrunc + +function register(f, rtype, argt, name) + ccall(:jl_extern_c, Nothing, (Any, Any, Any, Ptr{UInt8}), + f, rtype, argt, name) +end + +isnan(x::Float32) = ne_float(x,x) + +const basetable = Vector{UInt16}(uninitialized, 512) +const shifttable = Vector{UInt8}(uninitialized, 512) + +# Trunc +function truncsfhf2(val::Float32) + f = bitcast(UInt32, val) + if isnan(val) + t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16)) + return bitcast(Float16, t ⊻ ((f >> 0xd) % UInt16)) + end + i = (f >> 23) & 0x1ff + 1 + sh = shifttable[i] + f &= 0x007fffff + h::UInt16 = basetable[i] + (f >> sh) + # round + # NOTE: we maybe should ignore NaNs here, but the payload is + # getting truncated anyway so "rounding" it might not matter + nextbit = (f >> (sh-1)) & 1 + if nextbit != 0 + # Round halfway to even or check lower bits + if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 + h += 1 + end + end + return bitcast(Float16, h) +end +register(truncsfhf2, Float16, Tuple{Float32}, "__truncsfhf2") +register(truncsfhf2, Float16, Tuple{Float32}, "__gnu_f2h_ieee") + +function truncdfhf2(x::Float64) + return truncsfhf2(fptrunc(Float32, x)) +end +register(truncdfhf2, Float16, Tuple{Float64}, "__truncdfhf2") + +# Extend +function extendhfsf2(val::Float16) + local ival::UInt32 = bitcast(UInt16, val) + local sign::UInt32 = (ival & 0x8000) >> 15 + local exp::UInt32 = (ival & 0x7c00) >> 10 + local sig::UInt32 = (ival & 0x3ff) >> 0 + local ret::UInt32 + + if exp == 0 + if sig == 0 + sign = sign << 31 + ret = sign | exp | sig + else + n_bit = 1 + bit = 0x0200 + while (bit & sig) == 0 + n_bit = n_bit + 1 + bit = bit >> 1 + end + sign = sign << 31 + exp = (-14 - n_bit + 127) << 23 + sig = ((sig & (~bit)) << n_bit) << (23 - 10) + ret = sign | exp | sig + end + elseif exp == 0x1f + if sig == 0 # Inf + if sign == 0 + ret = 0x7f800000 + else + ret = 0xff800000 + end + else # NaN + ret = 0x7fc00000 | (sign<<31) | (sig<<(23-10)) + end + else + sign = sign << 31 + exp = (exp - 15 + 127) << 23 + sig = sig << (23 - 10) + ret = sign | exp | sig + end + return bitcast(Float32, ret) +end +register(extendhfsf2, Float32, Tuple{Float16}, "__extendhfsf2") +register(extendhfsf2, Float32, Tuple{Float16}, "__gnu_h2f_ieee") + +function extendhfdf2(x::Float16) + return fpext(Float64, extendhfsf2(x)) +end +register(extendhfdf2, Float64, Tuple{Float16}, "__extendhfdf2") + +# Float32 -> Float16 algorithm from: +# "Fast Half Float Conversion" by Jeroen van der Zijp +# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf + +for i = 0:255 + e = i - 127 + if e < -24 # Very small numbers map to zero + basetable[i|0x000+1] = 0x0000 + basetable[i|0x100+1] = 0x8000 + shifttable[i|0x000+1] = 24 + shifttable[i|0x100+1] = 24 + elseif e < -14 # Small numbers map to denorms + basetable[i|0x000+1] = (0x0400>>(-e-14)) + basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000 + shifttable[i|0x000+1] = -e-1 + shifttable[i|0x100+1] = -e-1 + elseif e <= 15 # Normal numbers just lose precision + basetable[i|0x000+1] = ((e+15)<<10) + basetable[i|0x100+1] = ((e+15)<<10) | 0x8000 + shifttable[i|0x000+1] = 13 + shifttable[i|0x100+1] = 13 + elseif e < 128 # Large numbers map to Infinity + basetable[i|0x000+1] = 0x7C00 + basetable[i|0x100+1] = 0xFC00 + shifttable[i|0x000+1] = 24 + shifttable[i|0x100+1] = 24 + else # Infinity and NaN's stay Infinity and NaN's + basetable[i|0x000+1] = 0x7C00 + basetable[i|0x100+1] = 0xFC00 + shifttable[i|0x000+1] = 13 + shifttable[i|0x100+1] = 13 + end +end + +end diff --git a/base/sysimg.jl b/base/sysimg.jl index 766a2f707da5e..8c5e4e622c29b 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -186,6 +186,8 @@ include("namedtuple.jl") include("hashing.jl") include("rounding.jl") using .Rounding +include("rtlib/RTLIB.jl") +using .RTLIB include("float.jl") include("twiceprecision.jl") include("complex.jl") diff --git a/src/cgutils.cpp b/src/cgutils.cpp index acc630914db82..b8f3061d5c218 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -507,6 +507,8 @@ static Type *bitstype_to_llvm(jl_value_t *bt) return T_int32; if (bt == (jl_value_t*)jl_int64_type) return T_int64; + if (bt == (jl_value_t*)jl_float16_type) + return T_float16; if (bt == (jl_value_t*)jl_float32_type) return T_float32; if (bt == (jl_value_t*)jl_float64_type) diff --git a/src/codegen.cpp b/src/codegen.cpp index bcd91afce5d1b..5393bff053bfe 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -157,8 +157,6 @@ extern void _chkstk(void); #define __alignof__ __alignof #endif -#define DISABLE_FLOAT16 - // llvm state JL_DLLEXPORT LLVMContext jl_LLVMContext; static bool nested_compile = false; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index ca7e0f45db92b..f17a251013b31 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -109,10 +109,8 @@ static Type *FLOATT(Type *t) return T_float64; if (nb == 32) return T_float32; -#ifndef DISABLE_FLOAT16 if (nb == 16) return T_float16; -#endif if (nb == 128) return T_float128; return NULL; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 4b4b82869d3e9..de6078deccc0b 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -336,6 +336,28 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry) } // ------------------------ END OF TEMPORARY COPY FROM LLVM ----------------- +// Resolve llvm libcalls to the implementations in base/rtlib +static uint64_t resolve_libcalls(const char *name) +{ + static void *sys_hdl = jl_load_dynamic_library_e("sys", JL_RTLD_LOCAL); + static const char *const prefix = "__"; + if (!sys_hdl) { + jl_printf(JL_STDERR, "WARNING: Unable to load sysimage\n"); + return 0; + } + if (strncmp(name, prefix, strlen(prefix)) != 0) + return 0; +#if defined(_OS_DARWIN_) + // jl_dlsym_e expects an unmangled 'C' symbol name, + // so iff we are on Darwin we strip the leading '_' off. + static const char *const mangled_prefix = "___"; + if (strncmp(name, mangled_prefix, strlen(mangled_prefix)) == 0) { + ++name; + } +#endif + return (uintptr_t)jl_dlsym_e(sys_hdl, name); +} + #if defined(_OS_LINUX_) || defined(_OS_WINDOWS_) || defined(_OS_FREEBSD_) // Resolve non-lock free atomic functions in the libatomic1 library. // This is the library that provides support for c11/c++11 atomic operations. @@ -584,6 +606,8 @@ void JuliaOJIT::addModule(std::unique_ptr M) if (uint64_t addr = resolve_atomic(Name.c_str())) return JL_SymbolInfo(addr, JITSymbolFlags::Exported); #endif + if (uint64_t addr = resolve_libcalls(Name.c_str())) + return JL_SymbolInfo(addr, JITSymbolFlags::Exported); // Return failure code return JL_SymbolInfo(nullptr); },