RFC: Remove sqrt_llvm intrinsic

This is more of a "Do we want to move in this direction RFC". As mentioned in #43786, we currently have three implementations of these intrinsics: 1. The code generated by LLVM for the intrinsic 2. The code LLVM uses for constant folding the intrinsic 3. Our own runtime intrinsic used by the interpreter This basically removes the third one, which will be required if we want to do something about #26434 because we just forward these to libm. Of course we'll still have to do something to teach LLVM how to constant fold these in a manner compatible with what will actually end up running, but that's a separate issue.
JuliaLang · Jan 19, 2022 · 3e0b827 · 3e0b827
1 parent 8ec5580
commit 3e0b827
Show file tree

Hide file tree

Showing 7 changed files with 2 additions and 12 deletions.
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -618,7 +618,6 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
              f === Intrinsics.pointerset || # this one is never effect-free
              f === Intrinsics.llvmcall ||   # this one is never effect-free
              f === Intrinsics.arraylen ||   # this one is volatile
-             f === Intrinsics.sqrt_llvm ||  # this one may differ at runtime (by a few ulps)
              f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
              f === Intrinsics.have_fma ||  # this one depends on the runtime environment
              f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime

diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
@@ -167,7 +167,6 @@ add_tfunc(ceil_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(floor_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(trunc_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(rint_llvm, 1, 1, math_tfunc, 10)
-add_tfunc(sqrt_llvm, 1, 1, math_tfunc, 20)
 add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc, 20)
     ## same-type comparisons ##
 cmp_tfunc(@nospecialize(x), @nospecialize(y)) = Bool

diff --git a/base/math.jl b/base/math.jl
@@ -25,8 +25,6 @@ using .Base: sign_mask, exponent_mask, exponent_one,
             significand_bits, exponent_bits, exponent_bias,
             exponent_max, exponent_raw_max
 
-using Core.Intrinsics: sqrt_llvm
-
 using .Base: IEEEFloat
 
 @noinline function throw_complex_domainerror(f::Symbol, x)
@@ -586,6 +584,8 @@ Stacktrace:
 """
 log1p(x)
 
+sqrt_llvm(x::Float32) = ccall("llvm.sqrt.f32", Float32, (Float32,), x)
+sqrt_llvm(x::Float64) = ccall("llvm.sqrt.f64", Float64, (Float64,), x)
 @inline function sqrt(x::Union{Float32,Float64})
     x < zero(x) && throw_complex_domainerror(:sqrt, x)
     sqrt_llvm(x)

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -65,7 +65,6 @@ static void jl_init_intrinsic_functions_codegen(void)
     float_func[floor_llvm] = true;
     float_func[trunc_llvm] = true;
     float_func[rint_llvm] = true;
-    float_func[sqrt_llvm] = true;
     float_func[sqrt_llvm_fast] = true;
 }
 
@@ -1485,10 +1484,6 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
         return ctx.builder.CreateCall(rintintr, x);
     }
-    case sqrt_llvm: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
-        return ctx.builder.CreateCall(sqrtintr, x);
-    }
     case sqrt_llvm_fast: {
         FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
         return math_builder(ctx, true)().CreateCall(sqrtintr, x);

diff --git a/src/intrinsics.h b/src/intrinsics.h
@@ -86,7 +86,6 @@
     ADD_I(floor_llvm, 1) \
     ADD_I(trunc_llvm, 1) \
     ADD_I(rint_llvm, 1) \
-    ADD_I(sqrt_llvm, 1) \
     ADD_I(sqrt_llvm_fast, 1) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \

diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -1233,7 +1233,6 @@ JL_DLLEXPORT jl_value_t *jl_ceil_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm_fast(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);

diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
@@ -1445,7 +1445,6 @@ un_fintrinsic(ceil_float,ceil_llvm)
 un_fintrinsic(floor_float,floor_llvm)
 un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
-un_fintrinsic(sqrt_float,sqrt_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm_fast)
 
 JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)