From 7d6dfe1ec94dbf5ba52a2109c3c3f8c0ef555c43 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 8 Apr 2021 15:04:14 -0500 Subject: [PATCH] faster expm1 (#37440) Faster and slightly more accurate (and all Julia). --- base/math.jl | 31 +++++++------- base/special/exp.jl | 85 +++++++++++++++++++++++++++++++++++++- base/special/hyperbolic.jl | 16 +------ 3 files changed, 98 insertions(+), 34 deletions(-) diff --git a/base/math.jl b/base/math.jl index 42f10760ed4fb..b66a058e5ae9e 100644 --- a/base/math.jl +++ b/base/math.jl @@ -241,6 +241,20 @@ macro evalpoly(z, p...) :(evalpoly($zesc, ($(pesc...),))) end +# polynomial evaluation using compensated summation. +# much more accurate, especially when lo can be combined with other rounding errors +@inline function exthorner(x, p::Tuple) + hi, lo = p[end], zero(x) + for i in length(p)-1:-1:1 + pi = p[i] + prod = hi*x + err = fma(hi, x, -prod) + hi = pi+prod + lo = fma(lo, x, prod - (hi - pi) + err) + end + return hi, lo +end + """ rad2deg(x) @@ -365,23 +379,6 @@ Compute the inverse hyperbolic sine of `x`. """ asinh(x::Number) -""" - expm1(x) - -Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct -evaluation of exp(x)-1 for small values of x. -# Examples -```jldoctest -julia> expm1(1e-16) -1.0e-16 - -julia> exp(1e-16) - 1 -0.0 -``` -""" -expm1(x) -expm1(x::Float64) = ccall((:expm1,libm), Float64, (Float64,), x) -expm1(x::Float32) = ccall((:expm1f,libm), Float32, (Float32,), x) # utility for converting NaN return to DomainError # the branch in nan_dom_err prevents its callers from inlining, so be sure to force it diff --git a/base/special/exp.jl b/base/special/exp.jl index 5dffcde385812..e10fdf243055c 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -205,7 +205,7 @@ for (func, base) in (:exp2=>Val(2), :exp=>Val(:ℯ), :exp10=>Val(10)) @eval begin function ($func)(x::T) where T<:Float64 N_float = muladd(x, LogBo256INV($base, T), MAGIC_ROUND_CONST(T)) - N = reinterpret(uinttype(T), N_float) % Int32 + N = reinterpret(UInt64, N_float) % Int32 N_float -= MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV($base, T)) r = muladd(N_float, LogBo256U($base, T), x) r = muladd(N_float, LogBo256L($base, T), r) @@ -273,7 +273,6 @@ julia> exp(1.0) ``` """ exp(x::Real) - """ exp2(x) @@ -314,3 +313,85 @@ exp10(x) reinterpret(Float64, (exponent_bias(Float64) + (x % Int64)) << (significand_bits(Float64) % UInt)) end end + +# min and max arguments by base and type +MAX_EXP(::Type{Float64}) = 709.7827128933845 # log 2^1023*(2-2^-52) +MIN_EXP(::Type{Float64}) = -37.42994775023705 # log 2^-54 +MAX_EXP(::Type{Float32}) = 88.72284f0 # log 2^127 *(2-2^-23) +MIN_EXP(::Type{Float32}) = -17.32868f0 # log 2^-25 + +Ln2INV(::Type{Float64}) = 1.4426950408889634 +Ln2(::Type{Float64}) = -0.6931471805599453 + +# log(.75) <= x <= log(1.25) +function expm1_small(x::Float32) + p = evalpoly(x, (0.16666666f0, 0.041666627f0, 0.008333682f0, + 0.0013908712f0, 0.0001933096f0)) + p2 = exthorner(x, (1f0, .5f0, p)) + return fma(x, p2[1], x*p2[2]) +end +@inline function expm1_small(x::Float64) + p = evalpoly(x, (0.16666666666666632, 0.04166666666666556, 0.008333333333401227, + 0.001388888889068783, 0.00019841269447671544, 2.480157691845342e-5, + 2.7558212415361945e-6, 2.758218402815439e-7, 2.4360682937111612e-8)) + p2 = exthorner(x, (1.0, .5, p)) + return fma(x, p2[1], x*p2[2]) +end + +@inline function expm1(x::Float64) + T=Float64 + if -0.2876820724517809 <= x <= 0.22314355131420976 + return expm1_small(x) + elseif !(abs(x)<=MIN_EXP(Float64)) + isnan(x) && return x + x > MAX_EXP(Float64) && return Inf + x < MIN_EXP(Float64) && return -1.0 + end + + N_float = muladd(x, LogBo256INV(Val(:ℯ), T), MAGIC_ROUND_CONST(T)) + N = reinterpret(UInt64, N_float) % Int32 + N_float -= MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(Val(:ℯ), T)) + r = muladd(N_float, LogBo256U(Val(:ℯ), T), x) + r = muladd(N_float, LogBo256L(Val(:ℯ), T), r) + k = Int64(N >> 8) + jU, jL = table_unpack(N&255 +1) + p = expm1b_kernel(Val(:ℯ), r) + twopk = reinterpret(Float64, (1023+k) << 52) + twopnk = reinterpret(Float64, (1023-k) << 52) + k>=106 && return reinterpret(Float64, (1022+k) << 52)*(jU + muladd(jU, p, jL))*2 + k>=53 && return twopk*(jU + muladd(jU, p, (jL-twopnk))) + k<=-2 && return twopk*(jU + muladd(jU, p, jL))-1 + return twopk*((jU-twopnk) + fma(jU, p, jL)) +end + +@inline function expm1(x::Float32) + if -0.2876821f0 <=x <= 0.22314355f0 + return expm1_small(x) + end + x = Float64(x) + N_float = round(x*Ln2INV(Float64)) + N = unsafe_trunc(UInt64, N_float) + r = muladd(N_float, Ln2(Float64), x) + hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026, + 0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124)) + small_part = r*hi + twopk = reinterpret(Float64, (N+1023) << 52) + x > MAX_EXP(Float32) && return Inf32 + return Float32(muladd(twopk, small_part, twopk-1.0)) +end + +""" + expm1(x) + +Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct +evaluation of exp(x)-1 for small values of x. +# Examples +```jldoctest +julia> expm1(1e-16) +1.0e-16 + +julia> exp(1e-16) - 1 +0.0 +``` +""" +expm1(x) diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl index 1fee6e2879220..f6ff43d900b1b 100644 --- a/base/special/hyperbolic.jl +++ b/base/special/hyperbolic.jl @@ -14,20 +14,6 @@ # is preserved. # ==================================================== -@inline function exthorner(x, p::Tuple) - # polynomial evaluation using compensated summation. - # much more accurate, especially when lo can be combined with other rounding errors - hi, lo = p[end], zero(x) - for i in length(p)-1:-1:1 - pi = p[i] - prod = hi*x - err = fma(hi, x, -prod) - hi = pi+prod - lo = fma(lo, x, prod - (hi - pi) + err) - end - return hi, lo -end - # Hyperbolic functions # sinh methods H_SMALL_X(::Type{Float64}) = 2.0^-28 @@ -112,7 +98,7 @@ function cosh(x::T) where T<:Union{Float32,Float64} # return cosh(x) = = (exp(x) + exp(-x))/2 # e) H_LARGE_X <= x # return cosh(x) = exp(x/2)/2 * exp(x/2) - # Note that this branch automatically deals with Infs and NaNs + # Note that this branch automatically deals with Infs and NaNs absx = abs(x) if absx <= COSH_SMALL_X(T)