From e24624541d8399a7e088a68079df1a12f3dd92dd Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Fri, 29 Jun 2018 00:15:33 -0400 Subject: [PATCH] some updates and fixes to inlining cost - Instead of always inlining functions marked at-inline, increase the cost threshold 20x - Don't inline functions inferred not to return - statement_cost no longer needs to look at nested Exprs in general - Fix cost of `:copyast` --- base/array.jl | 8 +- base/compiler/abstractinterpretation.jl | 5 +- base/compiler/optimize.jl | 124 +++++++++++++----------- base/compiler/ssair/inlining.jl | 2 + base/compiler/tfuncs.jl | 4 +- base/compiler/validation.jl | 4 +- base/deprecated.jl | 5 +- base/strings/string.jl | 1 + test/compiler/compiler.jl | 2 +- test/show.jl | 6 +- 10 files changed, 91 insertions(+), 70 deletions(-) diff --git a/base/array.jl b/base/array.jl index 77591d333d5bf..08b3570c5c106 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1666,7 +1666,9 @@ julia> findnext(isodd, A, CartesianIndex(1, 1)) CartesianIndex(1, 1) ``` """ -function findnext(testf::Function, A, start) +@inline findnext(testf::Function, A, start) = findnext_internal(testf, A, start) + +function findnext_internal(testf::Function, A, start) l = last(keys(A)) i = start while i <= l @@ -1854,7 +1856,9 @@ julia> findprev(isodd, A, CartesianIndex(1, 2)) CartesianIndex(2, 1) ``` """ -function findprev(testf::Function, A, start) +@inline findprev(testf::Function, A, start) = findprev_internal(testf, A, start) + +function findprev_internal(testf::Function, A, start) i = start while i >= first(keys(A)) testf(A[i]) && return i diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 5b5e6dac121fd..61770df95c2b4 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -129,8 +129,9 @@ function abstract_call_method_with_const_args(@nospecialize(f), argtypes::Vector code === nothing && return Any code = code::MethodInstance # decide if it's likely to be worthwhile - cache_inlineable = false - if isdefined(code, :inferred) + declared_inline = isdefined(method, :source) && ccall(:jl_ast_flag_inlineable, Bool, (Any,), method.source) + cache_inlineable = declared_inline + if isdefined(code, :inferred) && !cache_inlineable cache_inf = code.inferred if !(cache_inf === nothing) cache_src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), cache_inf) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index d7c81db97445a..ff95de0ae45ef 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -212,14 +212,21 @@ function optimize(opt::OptimizationState, @nospecialize(result)) else force_noinline = true end + if !opt.src.inlineable && result === Union{} + force_noinline = true + end end if force_noinline opt.src.inlineable = false - elseif !opt.src.inlineable && isa(def, Method) + elseif isa(def, Method) bonus = 0 if result ⊑ Tuple && !isbitstype(widenconst(result)) bonus = opt.params.inline_tupleret_bonus end + if opt.src.inlineable + # For functions declared @inline, increase the cost threshold 20x + bonus += opt.params.inline_cost_threshold*19 + end opt.src.inlineable = isinlineable(def, opt, bonus) end nothing @@ -270,86 +277,87 @@ isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T) function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params) head = ex.head - if is_meta_expr_head(head) || head == :copyast # not sure if copyast is right + if is_meta_expr_head(head) return 0 - end - argcost = 0 - for a in ex.args - if a isa Expr - argcost = plus_saturate(argcost, statement_cost(a, -1, src, spvals, slottypes, params)) - end - end - if head == :return || head == :(=) - return argcost - end - extyp = line == -1 ? Any : src.ssavaluetypes[line] - if head == :call + elseif head === :call ftyp = argextype(ex.args[1], src, spvals, slottypes) - if isa(ftyp, Type) - return argcost + f = singleton_type(ftyp) + if isa(f, IntrinsicFunction) + iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1 + if !isassigned(T_IFUNC_COST, iidx) + # unknown/unhandled intrinsic + return params.inline_nonleaf_penalty + end + return T_IFUNC_COST[iidx] end - if isa(ftyp, Const) - f = (ftyp::Const).val - if isa(f, IntrinsicFunction) - iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1 - if !isassigned(T_IFUNC_COST, iidx) - # unknown/unhandled intrinsic - return plus_saturate(argcost, params.inline_nonleaf_penalty) - end - return plus_saturate(argcost, T_IFUNC_COST[iidx]) + if isa(f, Builtin) + # The efficiency of operations like a[i] and s.b + # depend strongly on whether the result can be + # inferred, so check the type of ex + if f === Main.Core.getfield || f === Main.Core.tuple + # we might like to penalize non-inferrability, but + # tuple iteration/destructuring makes that impossible + # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty) + return 0 + elseif f === Main.Core.arrayref && length(ex.args) >= 3 + atyp = argextype(ex.args[3], src, spvals, slottypes) + return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty end - if isa(f, Builtin) - # The efficiency of operations like a[i] and s.b - # depend strongly on whether the result can be - # inferred, so check the type of ex - if f == Main.Core.getfield || f == Main.Core.tuple - # we might like to penalize non-inferrability, but - # tuple iteration/destructuring makes that - # impossible - # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty) - return argcost - elseif f == Main.Core.arrayref && length(ex.args) >= 3 - atyp = argextype(ex.args[3], src, spvals, slottypes) - return plus_saturate(argcost, isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty) - end - fidx = findfirst(x->x===f, T_FFUNC_KEY) - if fidx === nothing - # unknown/unhandled builtin or anonymous function - # Use the generic cost of a direct function call - return plus_saturate(argcost, 20) - end - return plus_saturate(argcost, T_FFUNC_COST[fidx]) + fidx = findfirst(x->x===f, T_FFUNC_KEY) + if fidx === nothing + # unknown/unhandled builtin or anonymous function + # Use the generic cost of a direct function call + return 20 end + return T_FFUNC_COST[fidx] end - return plus_saturate(argcost, params.inline_nonleaf_penalty) - elseif head == :foreigncall || head == :invoke + return params.inline_nonleaf_penalty + elseif head === :foreigncall || head === :invoke # Calls whose "return type" is Union{} do not actually return: # they are errors. Since these are not part of the typical # run-time of the function, we omit them from # consideration. This way, non-inlined error branches do not # prevent inlining. - return extyp == Union{} ? 0 : plus_saturate(20, argcost) - elseif head == :llvmcall - return plus_saturate(10, argcost) # a wild guess at typical cost - elseif head == :enter + extyp = line == -1 ? Any : src.ssavaluetypes[line] + return extyp === Union{} ? 0 : 20 + elseif head === :return + a = ex.args[1] + if a isa Expr + return statement_cost(a, -1, src, spvals, slottypes, params) + end + return 0 + elseif head === :(=) + if ex.args[1] isa GlobalRef + cost = 20 + else + cost = 0 + end + a = ex.args[2] + if a isa Expr + cost = plus_saturate(cost, statement_cost(a, -1, src, spvals, slottypes, params)) + end + return cost + elseif head === :copyast + return 100 + elseif head === :enter # try/catch is a couple function calls, # but don't inline functions with try/catch # since these aren't usually performance-sensitive functions, # and llvm is more likely to miscompile them when these functions get large return typemax(Int) - elseif head == :gotoifnot + elseif head === :gotoifnot target = ex.args[2]::Int # loops are generally always expensive # but assume that forward jumps are already counted for from # summing the cost of the not-taken branch - return target < line ? plus_saturate(40, argcost) : argcost + return target < line ? 40 : 0 end - return argcost + return 0 end function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params, cost_threshold::Integer=params.inline_cost_threshold) - bodycost = 0 + bodycost::Int = 0 for line = 1:length(body) stmt = body[line] if stmt isa Expr @@ -363,9 +371,9 @@ function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, continue end bodycost = plus_saturate(bodycost, thiscost) - bodycost == typemax(Int) && return false + bodycost > cost_threshold && return false end - return bodycost <= cost_threshold + return true end function is_known_call(e::Expr, @nospecialize(func), src, spvals::SimpleVector, slottypes::Vector{Any} = empty_slottypes) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 5556108fc1bad..8cf537296de65 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -585,6 +585,8 @@ end function singleton_type(@nospecialize(ft)) if isa(ft, Const) return ft.val + elseif ft isa DataType && isdefined(ft, :instance) + return ft.instance end return nothing end diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index df72955368341..a244ecfb363c4 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -188,7 +188,9 @@ add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10) add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10) ## other, misc intrinsics ## add_tfunc(Core.Intrinsics.llvmcall, 3, INT_INF, - (@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 10) + # TODO: Lower this inlining cost. We currently need to prevent inlining llvmcall + # to avoid issues with its IR. + (@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 1000) cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid} cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr) cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr) diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl index 25111131bf5aa..b66832747dc59 100644 --- a/base/compiler/validation.jl +++ b/base/compiler/validation.jl @@ -211,7 +211,7 @@ is_valid_lvalue(@nospecialize(x)) = isa(x, Slot) || isa(x, GlobalRef) function is_valid_argument(@nospecialize(x)) if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) || - (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) || + (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck))) || isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) || isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing return true @@ -223,7 +223,7 @@ end function is_valid_rvalue(@nospecialize(x)) is_valid_argument(x) && return true - if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin) + if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast) return true end return false diff --git a/base/deprecated.jl b/base/deprecated.jl index 3ca57a97587e4..7547ccc54610d 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -735,8 +735,9 @@ end @deprecate findprev(A, v, i::Integer) something(findprev(isequal(v), A, i), 0) @deprecate findlast(A, v) something(findlast(isequal(v), A), 0) # to fix ambiguities introduced by deprecations -findnext(pred::Function, A, i::Integer) = invoke(findnext, Tuple{Function, Any, Any}, pred, A, i) -findprev(pred::Function, A, i::Integer) = invoke(findprev, Tuple{Function, Any, Any}, pred, A, i) +# TODO: also remove find*_internal in array.jl +findnext(pred::Function, A, i::Integer) = findnext_internal(pred, A, i) +findprev(pred::Function, A, i::Integer) = findprev_internal(pred, A, i) # also remove deprecation warnings in find* functions in array.jl, sparse/sparsematrix.jl, # and sparse/sparsevector.jl. diff --git a/base/strings/string.jl b/base/strings/string.jl index 00d350ad329ff..83ef6304f17fa 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -80,6 +80,7 @@ pointer(s::String) = unsafe_convert(Ptr{UInt8}, s) pointer(s::String, i::Integer) = pointer(s)+(i-1) ncodeunits(s::String) = Core.sizeof(s) +sizeof(s::String) = Core.sizeof(s) codeunit(s::String) = UInt8 @inline function codeunit(s::String, i::Integer) diff --git a/test/compiler/compiler.jl b/test/compiler/compiler.jl index 93917154f5c52..a3a2b19fe6553 100644 --- a/test/compiler/compiler.jl +++ b/test/compiler/compiler.jl @@ -860,7 +860,7 @@ function break_21369() local fr while true fr = Base.StackTraces.lookup(bt[i])[end] - if !fr.from_c + if !fr.from_c && fr.func !== :error break end i += 1 diff --git a/test/show.jl b/test/show.jl index 395327a567f80..a354af1f139e1 100644 --- a/test/show.jl +++ b/test/show.jl @@ -1269,10 +1269,12 @@ function compute_annotations(f, types) join((strip(string(a, " "^(max_loc_method-length(a)), b)) for (a, b) in zip(la, lb)), '\n') end -g_line() = leaf() # Deliberately not implemented to end up as a leaf after inlining +@noinline leaffunc() = print() + +@inline g_line() = leaffunc() # Test that separate instances of the same function do not get merged -function f_line() +@inline function f_line() g_line() g_line() g_line()