JuliaLang · vtjnash · Feb 24, 2017 · Feb 22, 2017 · Feb 22, 2017 · Feb 23, 2017
diff --git a/base/fastmath.jl b/base/fastmath.jl
@@ -23,7 +23,7 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast,
+import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
     add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
@@ -243,8 +243,8 @@ end
 
 # builtins
 
-pow_fast(x::FloatTypes, y::Integer) = pow_fast(x, Int32(y))
-pow_fast(x::FloatTypes, y::Int32) = Base.powi_llvm(x, y)
+pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y)
+pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y)
 
 # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
 # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic

diff --git a/base/float.jl b/base/float.jl
@@ -812,14 +812,6 @@ fpinttype(::Type{Float64}) = UInt64
 fpinttype(::Type{Float32}) = UInt32
 fpinttype(::Type{Float16}) = UInt16
 
-@pure significand_bits{T<:AbstractFloat}(::Type{T}) = trailing_ones(significand_mask(T))
-@pure exponent_bits{T<:AbstractFloat}(::Type{T}) = sizeof(T)*8 - significand_bits(T) - 1
-@pure exponent_bias{T<:AbstractFloat}(::Type{T}) = Int(exponent_one(T) >> significand_bits(T))
-# maximum float exponent
-@pure exponent_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T)
-# maximum float exponent without bias
-@pure exponent_raw_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T))
-
 ## TwicePrecision utilities
 # The numeric constants are half the number of bits in the mantissa
 for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26))

diff --git a/base/inference.jl b/base/inference.jl
@@ -468,7 +468,6 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc)
 add_tfunc(trunc_llvm, 1, 1, math_tfunc)
 add_tfunc(rint_llvm, 1, 1, math_tfunc)
 add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
-add_tfunc(powi_llvm, 2, 2, math_tfunc)
 add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
     ## same-type comparisons ##
 cmp_tfunc(x::ANY, y::ANY) = Bool

diff --git a/base/intfuncs.jl b/base/intfuncs.jl
@@ -199,16 +199,19 @@ end
 # to enable compile-time optimizations specialized to p.
 # However, we still need a fallback that calls the general ^.
 # To avoid ambiguities for methods that dispatch on the
-# first argument, we dispatch the fallback via internal_pow:
-^(x, p) = internal_pow(x, p)
-internal_pow{p}(x, ::Type{Val{p}}) = x^p
+# first argument, we dispatch the fallback via internal_pow.
+# We mark these @inline since if the target is marked @inline,
+# we want to make sure that gets propagated,
+# even if it is over the inlining threshold.
+@inline ^(x, p) = internal_pow(x, p)
+@inline internal_pow{p}(x, ::Type{Val{p}}) = x^p
 
 # inference.jl has complicated logic to inline x^2 and x^3 for
 # numeric types.  In terms of Val we can do it much more simply:
-internal_pow(x::Number, ::Type{Val{0}}) = one(x)
-internal_pow(x::Number, ::Type{Val{1}}) = x
-internal_pow(x::Number, ::Type{Val{2}}) = x*x
-internal_pow(x::Number, ::Type{Val{3}}) = x*x*x
+@inline internal_pow(x::Number, ::Type{Val{0}}) = one(x)
+@inline internal_pow(x::Number, ::Type{Val{1}}) = x
+@inline internal_pow(x::Number, ::Type{Val{2}}) = x*x
+@inline internal_pow(x::Number, ::Type{Val{3}}) = x*x*x
 
 # b^p mod m
 

diff --git a/base/math.jl b/base/math.jl
@@ -20,13 +20,23 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              max, min, minmax, ^, exp2, muladd, rem,
              exp10, expm1, log1p
 
-using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
-            exponent_half, exponent_max, exponent_raw_max, fpinttype,
-            significand_mask, significand_bits, exponent_bits
+using Base: sign_mask, exponent_mask, exponent_one,
+            exponent_half, fpinttype, significand_mask
 
-using Core.Intrinsics: sqrt_llvm, powi_llvm
+using Core.Intrinsics: sqrt_llvm
+
+const IEEEFloat = Union{Float16, Float32, Float64}
+
+for T in (Float16, Float32, Float64)
+    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
+    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
+    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
+    # maximum float exponent
+    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T))
+    # maximum float exponent without bias
+    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
+end
 
-const IEEEFloat = Union{Float16,Float32,Float64}
 # non-type specific math functions
 
 """
@@ -229,8 +239,6 @@ for f in (:cbrt, :sinh, :cosh, :tanh, :atan, :asinh, :exp2, :expm1)
         ($f)(x::Real) = ($f)(float(x))
     end
 end
-# pure julia exp function
-include("special/exp.jl")
 exp(x::Real) = exp(float(x))
 
 # fallback definitions to prevent infinite loop from $f(x::Real) def above
@@ -286,6 +294,8 @@ exp10(x::Float32) = 10.0f0^x
 exp10(x::Integer) = exp10(float(x))
 
 # utility for converting NaN return to DomainError
+# the branch in nan_dom_err prevents its callers from inlining, so be sure to force it
+# until the heuristics can be improved
 @inline nan_dom_err(f, x) = isnan(f) & !isnan(x) ? throw(DomainError()) : f
 
 # functions that return NaN on non-NaN argument for domain error
@@ -403,9 +413,9 @@ log1p(x)
 for f in (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10,
           :lgamma, :log1p)
     @eval begin
-        ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x)
-        ($f)(x::Float32) = nan_dom_err(ccall(($(string(f,"f")),libm), Float32, (Float32,), x), x)
-        ($f)(x::Real) = ($f)(float(x))
+        @inline ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)), libm), Float64, (Float64,), x), x)
+        @inline ($f)(x::Float32) = nan_dom_err(ccall(($(string(f, "f")), libm), Float32, (Float32,), x), x)
+        @inline ($f)(x::Real) = ($f)(float(x))
     end
 end
 
@@ -683,15 +693,12 @@ function modf(x::Float64)
     f, _modf_temp[]
 end
 
-^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm),  Float64, (Float64,Float64), x, y), x+y)
-^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)
-
-^(x::Float64, y::Integer) = x^Int32(y)
-^(x::Float64, y::Int32) = powi_llvm(x, y)
-^(x::Float32, y::Integer) = x^Int32(y)
-^(x::Float32, y::Int32) = powi_llvm(x, y)
-^(x::Float16, y::Integer) = Float16(Float32(x)^y)
-^{p}(x::Float16, ::Type{Val{p}}) = Float16(Float32(x)^Val{p})
+@inline ^(x::Float64, y::Float64) = nan_dom_err(ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y), x + y)
+@inline ^(x::Float32, y::Float32) = nan_dom_err(ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y), x + y)
+@inline ^(x::Float64, y::Integer) = x ^ Float64(y)
+@inline ^(x::Float32, y::Integer) = x ^ Float32(y)
+@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ Float32(y))
+@inline ^{p}(x::Float16, ::Type{Val{p}}) = Float16(Float32(x) ^ Val{p})
 
 function angle_restrict_symm(theta)
     const P1 = 4 * 7.8539812564849853515625e-01
@@ -951,6 +958,7 @@ end
 cbrt(a::Float16) = Float16(cbrt(Float32(a)))
 
 # More special functions
+include("special/exp.jl")
 include("special/trig.jl")
 include("special/gamma.jl")
 

diff --git a/base/replutil.jl b/base/replutil.jl
@@ -219,28 +219,29 @@ showerror(io::IO, ex::InitError) = showerror(io, ex, [])
 function showerror(io::IO, ex::DomainError, bt; backtrace=true)
     print(io, "DomainError:")
     for b in bt
-        code = StackTraces.lookup(b)[1]
-        if !code.from_c
-            if code.func == :nan_dom_err
+        for code in StackTraces.lookup(b)
+            if code.from_c
+                continue
+            elseif code.func === :nan_dom_err
                 continue
             elseif code.func in (:log, :log2, :log10, :sqrt)
                 print(io, "\n$(code.func) will only return a complex result if called ",
                     "with a complex argument. Try $(string(code.func))(complex(x)).")
-            elseif (code.func == :^ && code.file == Symbol("intfuncs.jl")) ||
-                    code.func == :power_by_squaring #3024
+            elseif (code.func === :^ &&
+                    (code.file === Symbol("intfuncs.jl") || code.file === Symbol(joinpath(".", "intfuncs.jl")))) ||
+                   code.func === :power_by_squaring #3024
                 print(io, "\nCannot raise an integer x to a negative power -n. ",
                     "\nMake x a float by adding a zero decimal (e.g. 2.0^-n instead ",
                     "of 2^-n), or write 1/x^n, float(x)^-n, or (x//1)^-n.")
-            elseif code.func == :^ &&
-                    (code.file == Symbol("promotion.jl") || code.file == Symbol("math.jl") ||
-                    code.file == Symbol(joinpath(".","promotion.jl")) ||
-                    code.file == Symbol(joinpath(".","math.jl")))
+            elseif code.func === :^ &&
+                    (code.file === Symbol("math.jl") || code.file === Symbol(joinpath(".", "math.jl")))
                 print(io, "\nExponentiation yielding a complex result requires a complex ",
                     "argument.\nReplace x^y with (x+0im)^y, Complex(x)^y, or similar.")
             end
-            break
+            @goto showbacktrace
         end
     end
+    @label showbacktrace
     backtrace && show_backtrace(io, bt)
     nothing
 end

diff --git a/base/special/exp.jl b/base/special/exp.jl
@@ -56,8 +56,8 @@ MINEXP(::Type{Float32}) = -103.97207708f0            # log 2^-150
 @inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3)
 
 # for values smaller than this threshold just use a Taylor expansion
-exp_small_thres(::Type{Float64}) = 2.0^-28
-exp_small_thres(::Type{Float32}) = 2.0f0^-13
+@eval exp_small_thres(::Type{Float64}) = $(2.0^-28)
+@eval exp_small_thres(::Type{Float32}) = $(2.0f0^-13)
 
 """
     exp(x)
@@ -114,13 +114,13 @@ function exp{T<:Union{Float32,Float64}}(x::T)
         # scale back
         if k > -significand_bits(T)
             # multiply by 2.0 first to prevent overflow, which helps extends the range
-            k == exponent_max(T) && return y*T(2.0)*T(2.0)^(exponent_max(T) - 1)
+            k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
             twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
             return y*twopk
         else
             # add significand_bits(T) + 1 to lift the range outside the subnormals
             twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
-            return y*twopk*T(2.0)^(-significand_bits(T) - 1)
+            return y * twopk * T(2.0)^(-significand_bits(T) - 1)
 k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) 
 return y * twopk * T(2.0)^(-significand_bits(T) - 1) 
 @pure exponent_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) 
 # scale back 
 k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1) 
 return y * twopk * T(2.0)^(-significand_bits(T) - 1) 
 @pure exponent_max{T<:AbstractFloat}(::Type{T}) = Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) 
 # scale back 
         end
     elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
         # Taylor approximation for small x

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -397,10 +397,6 @@ static Function *jldlsym_func;
 static Function *jlnewbits_func;
 static Function *jltypeassert_func;
 static Function *jldepwarnpi_func;
-#if JL_LLVM_VERSION < 30600
-static Function *jlpow_func;
-static Function *jlpowf_func;
-#endif
 //static Function *jlgetnthfield_func;
 static Function *jlgetnthfieldchecked_func;
 //static Function *jlsetnthfield_func;
@@ -6857,25 +6853,6 @@ static void init_julia_llvm_env(Module *m)
                          "jl_gc_diff_total_bytes", m);
     add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes);
 
-#if JL_LLVM_VERSION < 30600
-    Type *powf_type[2] = { T_float32, T_float32 };
-    jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false),
-                                   Function::ExternalLinkage,
-                                   "powf", m);
-    add_named_global(jlpowf_func, &powf, false);
-
-    Type *pow_type[2] = { T_float64, T_float64 };
-    jlpow_func = Function::Create(FunctionType::get(T_float64, pow_type, false),
-                                  Function::ExternalLinkage,
-                                  "pow", m);
-    add_named_global(jlpow_func,
-#ifdef _COMPILER_MICROSOFT_
-        static_cast<double (*)(double, double)>(&pow),
-#else
-        &pow,
-#endif
-        false);
-#endif
     std::vector<Type*> array_owner_args(0);
     array_owner_args.push_back(T_pjlvalue);
     jlarray_data_owner_func =

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -71,7 +71,6 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
     float_func[rint_llvm] = true;
     float_func[sqrt_llvm] = true;
     float_func[sqrt_llvm_fast] = true;
-    float_func[powi_llvm] = true;
 }
 
 extern "C"
@@ -851,33 +850,6 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         return mark_julia_type(ans, false, x.typ, ctx);
     }
 
-    case powi_llvm: {
-        const jl_cgval_t &x = argv[0];
-        const jl_cgval_t &y = argv[1];
-        if (!jl_is_primitivetype(x.typ) || !jl_is_primitivetype(y.typ) || jl_datatype_size(y.typ) != 4)
-            return emit_runtime_call(f, argv, nargs, ctx);
-        Type *xt = FLOATT(bitstype_to_llvm(x.typ));
-        Type *yt = T_int32;
-        if (!xt)
-            return emit_runtime_call(f, argv, nargs, ctx);
-
-        Value *xv = emit_unbox(xt, x, x.typ);
-        Value *yv = emit_unbox(yt, y, y.typ);
-#if JL_LLVM_VERSION >= 30600
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
-#if JL_LLVM_VERSION >= 30700
-        Value *ans = builder.CreateCall(powi, {xv, yv});
-#else
-        Value *ans = builder.CreateCall2(powi, xv, yv);
-#endif
-#else
-        // issue #6506
-        Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
-                xv, builder.CreateSIToFP(yv, xt));
-#endif
-        return mark_julia_type(ans, false, x.typ, ctx);
-    }
-
     default: {
         assert(nargs >= 1 && "invalid nargs for intrinsic call");
         const jl_cgval_t &xinfo = argv[0];

diff --git a/src/intrinsics.h b/src/intrinsics.h
@@ -91,7 +91,6 @@
     ADD_I(trunc_llvm, 1) \
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
-    ADD_I(powi_llvm, 2) \
     ALIAS(sqrt_llvm_fast, sqrt_llvm) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \

diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -680,7 +680,6 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);

diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
@@ -925,31 +925,6 @@ un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
 
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *ty = jl_typeof(a);
-    if (!jl_is_primitivetype(ty))
-        jl_error("powi_llvm: a is not a primitive type");
-    if (!jl_is_primitivetype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4)
-        jl_error("powi_llvm: b is not a 32-bit primitive type");
-    int sz = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);
-    void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
-    switch (sz) {
-    /* choose the right size c-type operation */
-    case 4:
-        *(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b));
-        break;
-    case 8:
-        *(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b));
-        break;
-    default:
-        jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
-    }
-    return newv;
-}
-
 JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b)
 {
     JL_TYPECHK(isfalse, bool, isfalse);

diff --git a/test/math.jl b/test/math.jl
@@ -590,6 +590,18 @@ end
     end
 end
 
+@testset "issue #19872" begin
+    f19872a(x) = x ^ 5
+    f19872b(x) = x ^ (-1024)
+    @test 0 < f19872b(2.0) < 1e-300
+    @test issubnormal(2.0 ^ (-1024))
+    @test issubnormal(f19872b(2.0))
+    @test !issubnormal(f19872b(0.0))
+    @test f19872a(2.0) === 32.0
+    @test !issubnormal(f19872a(2.0))
+    @test !issubnormal(0.0)
+end
+
 # no domain error is thrown for negative values
 @test invoke(cbrt, Tuple{AbstractFloat}, -1.0) == -1.0