Skip to content

Commit

Permalink
only use accurate powf function
Browse files Browse the repository at this point in the history
The powi intrinsic optimization over calling powf is that it is inaccurate.
We don't need that.

When it is equally accurate (e.g. tiny constant powers),
LLVM will already recognize and optimize any call to a function named `powf`,
and produce the same speedup.

fix #19872
  • Loading branch information
vtjnash committed Jan 27, 2017
1 parent 8a75199 commit e577f8e
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 96 deletions.
6 changes: 3 additions & 3 deletions base/fastmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ module FastMath

export @fastmath

import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast,
import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast

Expand Down Expand Up @@ -243,8 +243,8 @@ end

# builtins

pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, Int32(y))
pow_fast{T<:FloatTypes}(x::T, y::Int32) = Base.powi_llvm(x, y)
pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y)
pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y)

# TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
# checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
Expand Down
1 change: 0 additions & 1 deletion base/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,6 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc)
add_tfunc(trunc_llvm, 1, 1, math_tfunc)
add_tfunc(rint_llvm, 1, 1, math_tfunc)
add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
add_tfunc(powi_llvm, 2, 2, math_tfunc)
add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
## same-type comparisons ##
cmp_tfunc(x::ANY, y::ANY) = Bool
Expand Down
23 changes: 11 additions & 12 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
exponent_half, exponent_max, exponent_raw_max, fpinttype,
significand_mask, significand_bits, exponent_bits

using Core.Intrinsics: sqrt_llvm, powi_llvm
using Core.Intrinsics: sqrt_llvm

# non-type specific math functions

Expand Down Expand Up @@ -308,6 +308,8 @@ exp10(x::Float32) = 10.0f0^x
exp10(x::Integer) = exp10(float(x))

# utility for converting NaN return to DomainError
# the branch in nan_dom_err prevents its callers from inlining, so be sure to force it
# until the heuristics can be improved
@inline nan_dom_err(f, x) = isnan(f) & !isnan(x) ? throw(DomainError()) : f

# functions that return NaN on non-NaN argument for domain error
Expand Down Expand Up @@ -414,9 +416,9 @@ log1p(x)
for f in (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10,
:lgamma, :log1p)
@eval begin
($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x)
($f)(x::Float32) = nan_dom_err(ccall(($(string(f,"f")),libm), Float32, (Float32,), x), x)
($f)(x::Real) = ($f)(float(x))
@inline ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x)
@inline ($f)(x::Float32) = nan_dom_err(ccall(($(string(f, "f")), libm), Float32, (Float32,), x), x)
@inline ($f)(x::Real) = ($f)(float(x))
end
end

Expand Down Expand Up @@ -677,14 +679,11 @@ function modf(x::Float64)
f, _modf_temp[]
end

^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm), Float64, (Float64,Float64), x, y), x+y)
^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)

^(x::Float64, y::Integer) = x^Int32(y)
^(x::Float64, y::Int32) = powi_llvm(x, y)
^(x::Float32, y::Integer) = x^Int32(y)
^(x::Float32, y::Int32) = powi_llvm(x, y)
^(x::Float16, y::Integer) = Float16(Float32(x)^y)
@inline ^(x::Float64, y::Float64) = nan_dom_err(ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y), x + y)
@inline ^(x::Float32, y::Float32) = nan_dom_err(ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y), x + y)
@inline ^(x::Float64, y::Integer) = x ^ Float64(y)
@inline ^(x::Float32, y::Integer) = x ^ Float32(y)
@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ Float32(y))

function angle_restrict_symm(theta)
const P1 = 4 * 7.8539812564849853515625e-01
Expand Down
8 changes: 6 additions & 2 deletions base/special/exp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ exp_small_thres(::Type{Float32}) = 2.0f0^-13
Compute the natural base exponential of `x`, in other words ``e^x``.
"""
function exp{T<:Union{Float32,Float64}}(x::T)
^ = Base.FastMath.pow_fast
# we don't need the accurate power function here since we only compute 2^c,
# where c is a constant

xa = reinterpret(Unsigned, x) & ~sign_mask(T)
xsb = signbit(x)

Expand Down Expand Up @@ -114,13 +118,13 @@ function exp{T<:Union{Float32,Float64}}(x::T)
# scale back
if k > -significand_bits(T)
# multiply by 2.0 first to prevent overflow, which helps extends the range
k == exponent_max(T) && return y*T(2.0)*T(2.0)^(exponent_max(T) - 1)
k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
return y*twopk
else
# add significand_bits(T) + 1 to lift the range outside the subnormals
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
return y*twopk*T(2.0)^(-significand_bits(T) - 1)
return y * twopk * T(2.0)^(-significand_bits(T) - 1)
end
elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
# Taylor approximation for small x
Expand Down
23 changes: 0 additions & 23 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,6 @@ static Function *jldlsym_func;
static Function *jlnewbits_func;
static Function *jltypeassert_func;
static Function *jldepwarnpi_func;
#if JL_LLVM_VERSION < 30600
static Function *jlpow_func;
static Function *jlpowf_func;
#endif
//static Function *jlgetnthfield_func;
static Function *jlgetnthfieldchecked_func;
//static Function *jlsetnthfield_func;
Expand Down Expand Up @@ -5985,25 +5981,6 @@ static void init_julia_llvm_env(Module *m)
"jl_gc_diff_total_bytes", m);
add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes);

#if JL_LLVM_VERSION < 30600
Type *powf_type[2] = { T_float32, T_float32 };
jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false),
Function::ExternalLinkage,
"powf", m);
add_named_global(jlpowf_func, &powf, false);

Type *pow_type[2] = { T_float64, T_float64 };
jlpow_func = Function::Create(FunctionType::get(T_float64, pow_type, false),
Function::ExternalLinkage,
"pow", m);
add_named_global(jlpow_func,
#ifdef _COMPILER_MICROSOFT_
static_cast<double (*)(double, double)>(&pow),
#else
&pow,
#endif
false);
#endif
std::vector<Type*> array_owner_args(0);
array_owner_args.push_back(T_pjlvalue);
jlarray_data_owner_func =
Expand Down
28 changes: 0 additions & 28 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
float_func[rint_llvm] = true;
float_func[sqrt_llvm] = true;
float_func[sqrt_llvm_fast] = true;
float_func[powi_llvm] = true;
}

extern "C"
Expand Down Expand Up @@ -915,33 +914,6 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
return mark_julia_type(ans, false, x.typ, ctx);
}

case powi_llvm: {
const jl_cgval_t &x = argv[0];
const jl_cgval_t &y = argv[1];
if (!jl_is_bitstype(x.typ) || !jl_is_bitstype(y.typ) || jl_datatype_size(y.typ) != 4)
return emit_runtime_call(f, argv, nargs, ctx);
Type *xt = FLOATT(bitstype_to_llvm(x.typ));
Type *yt = T_int32;
if (!xt)
return emit_runtime_call(f, argv, nargs, ctx);

Value *xv = emit_unbox(xt, x, x.typ);
Value *yv = emit_unbox(yt, y, y.typ);
#if JL_LLVM_VERSION >= 30600
Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
#if JL_LLVM_VERSION >= 30700
Value *ans = builder.CreateCall(powi, {xv, yv});
#else
Value *ans = builder.CreateCall2(powi, xv, yv);
#endif
#else
// issue #6506
Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
xv, builder.CreateSIToFP(yv, xt));
#endif
return mark_julia_type(ans, false, x.typ, ctx);
}

default: {
assert(nargs >= 1 && "invalid nargs for intrinsic call");
const jl_cgval_t &xinfo = argv[0];
Expand Down
1 change: 0 additions & 1 deletion src/intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
ADD_I(trunc_llvm, 1) \
ADD_I(rint_llvm, 1) \
ADD_I(sqrt_llvm, 1) \
ADD_I(powi_llvm, 2) \
ALIAS(sqrt_llvm_fast, sqrt_llvm) \
/* pointer access */ \
ADD_I(pointerref, 3) \
Expand Down
1 change: 0 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,6 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b);
JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
Expand Down
25 changes: 0 additions & 25 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -956,31 +956,6 @@ un_fintrinsic(trunc_float,trunc_llvm)
un_fintrinsic(rint_float,rint_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm)

JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b)
{
jl_ptls_t ptls = jl_get_ptls_states();
jl_value_t *ty = jl_typeof(a);
if (!jl_is_bitstype(ty))
jl_error("powi_llvm: a is not a bitstype");
if (!jl_is_bitstype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4)
jl_error("powi_llvm: b is not a 32-bit bitstype");
int sz = jl_datatype_size(ty);
jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);
void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
switch (sz) {
/* choose the right size c-type operation */
case 4:
*(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b));
break;
case 8:
*(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b));
break;
default:
jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
}
return newv;
}

JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b)
{
JL_TYPECHK(isfalse, bool, isfalse);
Expand Down
7 changes: 7 additions & 0 deletions test/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,13 @@ end
end
end

@testset "issue #19872" begin
f19872(x) = x ^ 3
@test issubnormal(2.0 ^ (-1024))
@test f19872(2.0) === 8.0
@test !issubnormal(0.0)
end

@test Base.Math.f32(complex(1.0,1.0)) == complex(Float32(1.),Float32(1.))
@test Base.Math.f16(complex(1.0,1.0)) == complex(Float16(1.),Float16(1.))

Expand Down

0 comments on commit e577f8e

Please sign in to comment.