Faster Float64^Float64 (JuliaLang#42271)

Co-authored-by: Kristoffer Carlsson <[email protected]>
LilithHafner · Mar 8, 2022 · 37556d9 · 37556d9
1 parent f0b9e13
commit 37556d9
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 6 deletions.
diff --git a/base/math.jl b/base/math.jl
@@ -942,11 +942,15 @@ end
 @inline function ^(x::Float64, y::Float64)
     yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result
     y == yint && return x^yint
-    z = ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y)
-    if isnan(z) & !isnan(x+y)
-        throw_exp_domainerror(x)
-    end
-    z
+    x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    x == 1 && return 1.0
+    !isfinite(x) && return x*(y>0)
+    x==0 && return abs(y)*Inf*(!(y>0))
+    logxhi,logxlo = Base.Math._log_ext(x)
+    xyhi = logxhi*y
+    xylo = logxlo*y
+    hi = xyhi+xylo
+    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
 end
 @inline function ^(x::T, y::T) where T <: Union{Float16, Float32}
     yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result

diff --git a/base/special/exp.jl b/base/special/exp.jl
@@ -227,6 +227,31 @@ end
     twopk = Int64(k) << 52
     return reinterpret(T, twopk + reinterpret(Int64, small_part))
 end
+# Computes base^(x+xlo). Used for pow.
+@inline function exp_impl(x::Float64, xlo::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU, jL = table_unpack(N&255 + 1)
+    very_small = muladd(jU, expm1b_kernel(base, r), jL)
+    small_part =  muladd(jU,xlo,very_small) + jU
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x >= MAX_EXP(base, T) && return Inf
+        x <= MIN_EXP(base, T) && return 0.0
+        if k <= -53
+            # The UInt64 forces promotion. (Only matters for 32 bit systems.)
+            twopk = (k + UInt64(53)) << 52
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+        end
+        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+    end
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
 @inline function exp_impl_fast(x::Float64, base)
     T = Float64
     N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))

diff --git a/base/special/log.jl b/base/special/log.jl
@@ -409,3 +409,51 @@ function log1p(x::Float32)
     end
 end
 
+
+@inline function log_ext_kernel(x_hi::Float64, x_lo::Float64)
+    c1hi = 0.666666666666666629659233
+    hi_order =  evalpoly(x_hi, (0.400000000000000077715612, 0.285714285714249172087875,
+                                0.222222222230083560345903, 0.181818180850050775676507,
+                                0.153846227114512262845736, 0.13332981086846273921509,
+                                0.117754809412463995466069, 0.103239680901072952701192,
+                                0.116255524079935043668677))
+    res_hi = hi_order * x_hi
+    res_lo = fma(x_lo, hi_order, fma(hi_order, x_hi, -res_hi))
+    ans_hi = c1hi + res_hi
+    ans_lo = ((c1hi - ans_hi) + res_hi) + (res_lo + 3.80554962542412056336616e-17)
+    return ans_hi, ans_lo
+end
+
+# Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision
+# Implimentation adapted from SLEEFPirates.jl
+# Does not normalize results.
+# Must be caused with positive finite arguments
+function _log_ext(d::Float64)
+    m, e = significand(d), exponent(d)
+    if m > 1.5
+        m *= 0.5
+        e += 1.0
+    end
+    # x = (m-1)/(m+1)
+    mp1hi = m + 1.0
+    mp1lo = m + (1.0 - mp1hi)
+    invy = inv(mp1hi)
+    xhi = (m - 1.0) * invy
+    xlo = fma(-xhi, mp1lo, fma(-xhi, mp1hi, m - 1.0)) * invy
+    x2hi = xhi * xhi
+    x2lo = muladd(xhi, xlo * 2.0, fma(xhi, xhi, -x2hi))
+    thi, tlo  = log_ext_kernel(x2hi, x2lo)
+
+    shi = 0.6931471805582987 * e
+    xhi2 = xhi * 2.0
+    shinew = muladd(xhi, 2.0, shi)
+    slo = muladd(1.6465949582897082e-12, e, muladd(xlo, 2.0, (((shi - shinew) + xhi2))))
+    shi = shinew
+    x3hi = x2hi * xhi
+    x3lo = muladd(x2hi, xlo, muladd(xhi, x2lo,fma(x2hi, xhi, -x3hi)))
+    x3thi = x3hi * thi
+    x3tlo = muladd(x3hi, tlo, muladd(x3lo, thi, fma(x3hi, thi, -x3thi)))
+    anshi = x3thi + shi
+    anslo = slo + x3tlo - ((anshi - shi) - x3thi)
+    return anshi, anslo
+end
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
@@ -36,7 +36,7 @@ julia> (-1 + 2im)^2
 -3 - 4im
 
 julia> (-1 + 2im)^2.5
-2.729624464784009 - 6.9606644595719im
+2.7296244647840084 - 6.960664459571898im
 
 julia> (-1 + 2im)^(1 + 1im)
 -0.27910381075826657 + 0.08708053414102428im