From 67ec007c104d13d52ce52bd889a731a89e9c5db0 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 May 2022 06:59:44 +0000 Subject: [PATCH] Eager finalizer insertion This is a variant of the eager-finalization idea (e.g. as seen in #44056), but with a focus on the mechanism of finalizer insertion, since I need a similar pass downstream. Integration of EscapeAnalysis is left to #44056. My motivation for this change is somewhat different. In particular, I want to be able to insert finalize call such that I can subsequently SROA the mutable object. This requires a couple design points that are more stringent than the pass from #44056, so I decided to prototype them as an independent PR. The primary things I need here that are not seen in #44056 are: - The ability to forgo finalizer registration with the runtime entirely (requires additional legality analyis) - The ability to inline the registered finalizer at the deallocation point (to enable subsequent SROA) To this end, adding a finalizer is promoted to a builtin that is recognized by inference and inlining (such that inference can produce an inferred version of the finalizer for inlining). The current status is that this fixes the minimal example I wanted to have work, but does not yet extend to the motivating case I had. Nevertheless, I felt that this was a good checkpoint to synchronize with other efforts along these lines. Currently working demo: ``` julia> const total_deallocations = Ref{Int}(0) Base.RefValue{Int64}(0) julia> mutable struct DoAlloc function DoAlloc() this = new() Core._add_finalizer(this, function(this) global total_deallocations[] += 1 end) return this end end julia> function foo() for i = 1:1000 DoAlloc() end end foo (generic function with 1 method) julia> @code_llvm foo() ; @ REPL[3]:1 within `foo` define void @julia_foo_111() #0 { top: %.promoted = load i64, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:2 within `foo` %0 = add i64 %.promoted, 1000 ; @ REPL[3] within `foo` store i64 %0, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:4 within `foo` ret void } ``` --- base/compiler/abstractinterpretation.jl | 32 +++++- base/compiler/optimize.jl | 5 +- base/compiler/ssair/inlining.jl | 136 ++++++++++++++++------ base/compiler/ssair/ir.jl | 60 +++++----- base/compiler/ssair/passes.jl | 145 ++++++++++++++++++++++-- base/compiler/ssair/show.jl | 2 + base/compiler/stmtinfo.jl | 11 ++ base/compiler/tfuncs.jl | 2 + base/compiler/types.jl | 27 ++++- base/gcutils.jl | 17 +-- src/builtin_proto.h | 2 + src/builtins.c | 9 ++ src/codegen.cpp | 3 +- src/gc.c | 6 +- src/julia_internal.h | 1 + src/staticdata.c | 2 +- test/compiler/inline.jl | 86 ++++++++++++++ 17 files changed, 447 insertions(+), 99 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 1449b911ee629c..d9714e581e607c 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1589,6 +1589,16 @@ function invoke_rewrite(xs::Vector{Any}) return newxs end +function abstract_add_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState) + if length(argtypes) == 3 + tt = argtypes[3] + finalizer_argvec = Any[argtypes[3], argtypes[2]] + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1) + return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects)) + end + return CallMeta(Nothing, Effects(), false) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, sv::InferenceState, @@ -1603,6 +1613,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), return abstract_invoke(interp, arginfo, sv) elseif f === modifyfield! return abstract_modifyfield!(interp, argtypes, sv) + elseif f === Core._add_finalizer + return abstract_add_finalizer(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) return CallMeta(rt, builtin_effects(f, argtypes, rt), false) @@ -1998,7 +2010,8 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), effects.effect_free ? ALWAYS_TRUE : TRISTATE_UNKNOWN, effects.nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN, effects.terminates_globally ? ALWAYS_TRUE : TRISTATE_UNKNOWN, - #=nonoverlayed=#true + #=nonoverlayed=#true, + TRISTATE_UNKNOWN )) else tristate_merge!(sv, EFFECTS_UNKNOWN) @@ -2089,6 +2102,19 @@ function abstract_eval_global(M::Module, s::Symbol, frame::InferenceState) return ty end +function abstract_eval_global_assignment(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(rhs)) + M = lhs.mod + s = lhs.name + nothrow = false + if isdefined(M, s) && !isconst(M, s) + ty = ccall(:jl_binding_type, Any, (Any, Any), M, s) + nothrow = ty === nothing || rhs ⊑ ty + end + tristate_merge!(frame, Effects(EFFECTS_TOTAL, + effect_free=TRISTATE_UNKNOWN, + nothrow=nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN)) +end + abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.src) function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) typ = (src.ssavaluetypes::Vector{Any})[s.id] @@ -2321,9 +2347,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) if isa(lhs, SlotNumber) changes = StateUpdate(lhs, VarState(t, false), changes, false) elseif isa(lhs, GlobalRef) - tristate_merge!(frame, Effects(EFFECTS_TOTAL, - effect_free=TRISTATE_UNKNOWN, - nothrow=TRISTATE_UNKNOWN)) + abstract_eval_global_assignment(interp, frame, lhs, t) elseif !isa(lhs, SSAValue) tristate_merge!(frame, EFFECTS_UNKNOWN) end diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 3c9e9cf4c21d6a..0dc6c7857b0361 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 +# This statement was proven not to throw +const IR_FLAG_NOTHROW = 0x01 << 5 + const TOP_TUPLE = GlobalRef(Core, :tuple) @@ -542,7 +545,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @timeit "compact 2" ir = compact!(ir) - @timeit "SROA" ir = sroa_pass!(ir) + @timeit "SROA" ir = sroa_pass!(ir, sv.inlining) @timeit "ADCE" ir = adce_pass!(ir) @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index f07757eafc6e10..15f2e125d4ba87 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -306,21 +306,17 @@ function finish_cfg_inline!(state::CFGInliningState) end end -function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, - linetable::Vector{LineInfoNode}, item::InliningTodo, - boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) - # Ok, do the inlining here - spec = item.spec::ResolvedInliningSpec - sparam_vals = item.mi.sparam_vals - def = item.mi.def::Method +function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode, + inlinee::Method, + inlined_at::Int32) + coverage = coverage_enabled(inlinee.module) linetable_offset::Int32 = length(linetable) # Append the linetable of the inlined function to our line table - inlined_at = compact.result[idx][:line] topline::Int32 = linetable_offset + Int32(1) - coverage = coverage_enabled(def.module) coverage_by_path = JLOptions().code_coverage == 3 - push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at)) - oldlinetable = spec.ir.linetable + push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at)) + oldlinetable = inlinee_ir.linetable + extra_coverage_line = 0 for oldline in 1:length(oldlinetable) entry = oldlinetable[oldline] if !coverage && coverage_by_path && is_file_tracked(entry.file) @@ -339,8 +335,25 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector end push!(linetable, newentry) end - if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline - insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline)) + if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline + extra_coverage_line = topline + end + return linetable_offset, extra_coverage_line +end + +function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, + linetable::Vector{LineInfoNode}, item::InliningTodo, + boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + # Ok, do the inlining here + spec = item.spec::ResolvedInliningSpec + sparam_vals = item.mi.sparam_vals + def = item.mi.def::Method + inlined_at = compact.result[idx][:line] + linetable_offset::Int32 = length(linetable) + topline::Int32 = linetable_offset + Int32(1) + linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at) + if extra_coverage_line != 0 + insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -847,12 +860,8 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) return compileable_specialization(et, match, effects) end - if isa(src, IRCode) - src = copy(src) - end - et !== nothing && push!(et, mi) - return InliningTodo(mi, src, effects) + return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects) end function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8) @@ -874,7 +883,8 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, - flag::UInt8, state::InliningState) + flag::UInt8, state::InliningState, + do_resolve::Bool = true) method = match.method spec_types = match.spec_types @@ -908,7 +918,7 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, todo = InliningTodo(mi, match, argtypes) # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) - state.mi_cache === nothing && return todo + do_resolve && state.mi_cache === nothing && return todo return resolve_todo(todo, state, flag) end @@ -916,15 +926,15 @@ function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects) return InliningTodo(mi, ResolvedInliningSpec(ir, linear_inline_eligible(ir), effects)) end -function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Array{UInt8, 1}}, effects::Effects) - if !isa(src, CodeInfo) - src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo - end +function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1}) + src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo + return retrieve_ir_for_inlining(mi, src) +end - @timeit "inline IR inflation" begin; - return InliningTodo(mi, inflate_ir(src, mi)::IRCode, effects) - end +retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = @timeit "inline IR inflation" begin; + inflate_ir(src, mi)::IRCode end +retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir) function handle_single_case!( ir::IRCode, idx::Int, stmt::Expr, @@ -1206,7 +1216,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end end - if sig.f !== Core.invoke && is_builtin(sig) + if sig.f !== Core.invoke && sig.f !== Core._add_finalizer && is_builtin(sig) # No inlining for builtins (other invoke/apply/typeassert) return nothing end @@ -1223,9 +1233,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end # TODO inline non-`isdispatchtuple`, union-split callsites? -function analyze_single_call!( - ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, - sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) +function compute_inlining_cases( + infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, + do_resolve::Bool = true) argtypes = sig.argtypes cases = InliningCase[] local any_fully_covered = false @@ -1242,7 +1253,7 @@ function analyze_single_call!( continue end for match in meth - handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true) + handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve) any_fully_covered |= match.fully_covers end end @@ -1252,8 +1263,18 @@ function analyze_single_call!( filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases, - handled_all_cases & any_fully_covered, todo, state.params) + return cases, handled_all_cases & any_fully_covered +end + +function analyze_single_call!( + ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) + + r = compute_inlining_cases(infos, flag, sig, state) + r === nothing && return nothing + cases, all_covered = r + handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, + all_covered, todo, state.params) end # similar to `analyze_single_call!`, but with constant results @@ -1305,14 +1326,15 @@ end function handle_match!( match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, - cases::Vector{InliningCase}, allow_abstract::Bool = false) + cases::Vector{InliningCase}, allow_abstract::Bool = false, + do_resolve::Bool = true) spec_types = match.spec_types allow_abstract || isdispatchtuple(spec_types) || return false # we may see duplicated dispatch signatures here when a signature gets widened # during abstract interpretation: for the purpose of inlining, we can just skip # processing this dispatch candidate _any(case->case.sig === spec_types, cases) && return true - item = analyze_method!(match, argtypes, flag, state) + item = analyze_method!(match, argtypes, flag, state, do_resolve) item === nothing && return false push!(cases, InliningCase(spec_types, item)) return true @@ -1427,6 +1449,48 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end + # Handle finalizer + if sig.f === Core._add_finalizer + if isa(info, FinalizerInfo) + # Only inline finalizers that are known nothrow and notls. + # This avoids having to set up state for finalizer isolation + (is_nothrow(info.effects) && is_notls(info.effects)) || continue + + info = info.info + if isa(info, MethodMatchInfo) + infos = MethodMatchInfo[info] + elseif isa(info, UnionSplitInfo) + infos = info.matches + else + continue + end + + ft = argextype(stmt.args[3], ir) + has_free_typevars(ft) && return nothing + f = singleton_type(ft) + argtypes = Vector{Any}(undef, 2) + argtypes[1] = ft + argtypes[2] = argextype(stmt.args[2], ir) + sig = Signature(f, ft, argtypes) + + cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false) + length(cases) == 0 && continue + if all_covered && length(cases) == 1 + if isa(cases[1], InliningCase) + case1 = cases[1].item + if isa(case1, InliningTodo) + push!(stmt.args, true) + push!(stmt.args, case1.mi) + elseif isa(case1, InvokeCase) + push!(stmt.args, false) + push!(stmt.args, case1.invoke) + end + end + end + continue + end + end + # if inference arrived here with constant-prop'ed result(s), # we can perform a specialized analysis for just this case if isa(info, ConstCallInfo) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 2f1359e4002aea..5907e76caed7a6 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -166,36 +166,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue} # SSA-indexed nodes - -struct NewInstruction - stmt::Any - type::Any - info::Any - # If nothing, copy the line from previous statement - # in the insertion location - line::Union{Int32, Nothing} - flag::UInt8 - - ## Insertion options - - # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). - # Don't bother redoing so on insertion. - effect_free_computed::Bool - NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), - line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = - new(stmt, type, info, line, flag, effect_free_computed) -end -NewInstruction(@nospecialize(stmt), @nospecialize(type)) = - NewInstruction(stmt, type, nothing) -NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = - NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) - -effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) -non_effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) - - struct InstructionStream inst::Vector{Any} type::Vector{Any} @@ -295,6 +265,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool) end copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info)) +struct NewInstruction + stmt::Any + type::Any + info::Any + # If nothing, copy the line from previous statement + # in the insertion location + line::Union{Int32, Nothing} + flag::UInt8 + + ## Insertion options + + # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). + # Don't bother redoing so on insertion. + effect_free_computed::Bool + NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), + line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = + new(stmt, type, info, line, flag, effect_free_computed) +end +NewInstruction(@nospecialize(stmt), @nospecialize(type)) = + NewInstruction(stmt, type, nothing) +NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = + NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) +NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) = + NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true) + +effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) +non_effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) + struct IRCode stmts::InstructionStream argtypes::Vector{Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index c2597363df2824..9f3aacbc65326f 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -14,6 +14,7 @@ GetfieldUse(idx::Int) = SSAUse(:getfield, idx) PreserveUse(idx::Int) = SSAUse(:preserve, idx) NoPreserve() = SSAUse(:nopreserve, 0) IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx) +AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx) """ du::SSADefUse @@ -735,7 +736,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of succeeding dead code elimination. """ -function sroa_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode) for ((_, idx), stmt) in compact # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement) isa(stmt, Expr) || continue - is_setfield = is_isdefined = false + is_setfield = is_isdefined = is_add_finalizer = false field_ordering = :unspecified if is_known_call(stmt, setfield!, compact) 4 <= length(stmt.args) <= 5 || continue @@ -767,6 +768,13 @@ function sroa_pass!(ir::IRCode) field_ordering = argextype(stmt.args[4], compact) widenconst(field_ordering) === Bool && (field_ordering = :unspecified) end + elseif is_known_call(stmt, Core._add_finalizer, compact) + 3 <= length(stmt.args) <= 5 || continue + # Inlining performs legality checks on the finalizer to determine + # whether or not we may inline it. If so, it appends extra arguments + # at the end of the intrinsic. Detect that here. + length(stmt.args) == 5 || continue + is_add_finalizer = true elseif isexpr(stmt, :foreigncall) nccallargs = length(stmt.args[3]::SimpleVector) preserved = Int[] @@ -824,9 +832,10 @@ function sroa_pass!(ir::IRCode) # analyze this `getfield` / `isdefined` / `setfield!` call - field = try_compute_field_stmt(compact, stmt) - field === nothing && continue - + if !is_add_finalizer + field = try_compute_field_stmt(compact, stmt) + field === nothing && continue + end val = stmt.args[2] struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) @@ -864,14 +873,16 @@ function sroa_pass!(ir::IRCode) push!(defuse.defs, idx) elseif is_isdefined push!(defuse.uses, IsdefinedUse(idx)) + elseif is_add_finalizer + push!(defuse.uses, AddFinalizerUse(idx)) else push!(defuse.uses, GetfieldUse(idx)) end union!(mid, intermediaries) end continue - elseif is_setfield - continue # invalid `setfield!` call, but just ignore here + elseif is_setfield || is_add_finalizer + continue # invalid `setfield!` or `_add_finalizer` call, but just ignore here elseif is_isdefined continue # TODO? end @@ -921,7 +932,7 @@ function sroa_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) - sroa_mutables!(ir, defuses, used_ssas, lazydomtree) + sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining) return ir else simple_dce!(compact) @@ -929,7 +940,60 @@ function sroa_pass!(ir::IRCode) end end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) + code = get(inlining.mi_cache, mi, nothing) + if code isa CodeInstance + if use_const_api(code) + # No code in the function - Nothing to do + inlining.et !== nothing && push!(inlining.et, mi) + return true + end + src = code.inferred + else + src = code + end + + src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[]) + src === nothing && return false + src = retrieve_ir_for_inlining(mi, src) + + # For now: Require finalizer to only have one basic block + length(src.cfg.blocks) == 1 || return false + + # Ok, we're committed to inlining the finalizer + inlining.et !== nothing && push!(inlining.et, mi) + + linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line]) + if extra_coverage_line != 0 + insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) + end + + # TODO: Use the actual inliner here rather than open coding this special + # purpose inliner. + spvals = mi.sparam_vals + ssa_rename = Vector{Any}(undef, length(src.stmts)) + for idx′ = 1:length(src.stmts) + urs = userefs(src[SSAValue(idx′)][:inst]) + for ur in urs + if isa(ur[], SSAValue) + ur[] = ssa_rename[ur[].id] + elseif isa(ur[], Argument) + ur[] = argexprs[ur[].n] + elseif isexpr(ur[], :static_parameter) + ur[] = spvals[ur[].args[1]] + end + end + # TODO: Scan newly added statement into the sroa defuse struct + stmt = urs[] + isa(stmt, ReturnNode) && continue + inst = src[SSAValue(idx′)] + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true) + end + return true +end + +is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -952,9 +1016,72 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # error at runtime, but is not illegal to have in the IR. ismutabletype(typ) || continue typ = typ::DataType + # First check for any add_finalizer calls + add_finalizer_idx = nothing + for use in defuse.uses + if use.kind === :add_finalizer + # For now: Only allow one add_finalizer per allocation + add_finalizer_idx !== nothing && @goto skip + add_finalizer_idx = use.idx + end + end + if add_finalizer_idx !== nothing + # For now: Require that all uses and defs are in the same basic block, + # so that live range calculations are easy. + bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] + minval = typemax(Int) + maxval = 0 + + check_in_range(defuse) = check_in_range(defuse.idx) + function check_in_range(didx::Int) + didx in bb.stmts || return false + if didx < minval + minval = didx + end + if didx > maxval + maxval = didx + end + return true + end + + check_in_range(idx) || continue + _all(check_in_range, defuse.uses) || continue + _all(check_in_range, defuse.defs) || continue + + # For now: Require all statements in the basic block range to be + # nothrow. + all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval) + all_nothrow || continue + + # Ok, finalizer rewrite is legal. + add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst] + argexprs = Any[add_finalizer_stmt.args[3], add_finalizer_stmt.args[2]] + may_inline = add_finalizer_stmt.args[4]::Bool + mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing} + if may_inline && mi !== nothing + if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining) + @goto done_finalizer + end + mi = compileable_specialization(inlining.et, mi, Effects()).invoke + end + if mi !== nothing + insert_node!(ir, maxval, + NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), + true) + else + insert_node!(ir, maxval, + NewInstruction(Expr(:call, argexprs...), Nothing), + true) + end + @label done_finalizer + # Erase call to add_finalizer + ir[SSAValue(add_finalizer_idx)][:inst] = nothing + continue + end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] all_eliminated = all_forwarded = true + has_add_finalizer = false for use in defuse.uses if use.kind === :preserve for du in fielddefuse diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl index f4c826a45156fd..4811125926c2c7 100644 --- a/base/compiler/ssair/show.jl +++ b/base/compiler/ssair/show.jl @@ -802,6 +802,8 @@ function Base.show(io::IO, e::Core.Compiler.Effects) printstyled(io, string(tristate_letter(e.nothrow), 'n'); color=tristate_color(e.nothrow)) print(io, ',') printstyled(io, string(tristate_letter(e.terminates), 't'); color=tristate_color(e.terminates)) + print(io, ',') + printstyled(io, string(tristate_letter(e.notls), 's'); color=tristate_color(e.notls)) print(io, ')') e.nonoverlayed || printstyled(io, '′'; color=:red) end diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 3f9a562061a12c..99f39563946159 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -183,4 +183,15 @@ struct ReturnTypeCallInfo info::Any end +""" + info::FinalizerInfo + +Represents a the information of a potential call to the finalizer on the given +object type. +""" +struct FinalizerInfo + info::Any + effects::Effects +end + @specialize diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 87df43ec92224b..90c10c7e1aea38 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -559,6 +559,8 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) +add_tfunc(Core._add_finalizer, 2, 2, (@nospecialize args...)->Nothing, 5) + # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) diff --git a/base/compiler/types.jl b/base/compiler/types.jl index e594c233353d92..12efbf8e7207eb 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -45,6 +45,11 @@ The effects are composed of the following set of different properties: - `terminates::TriState`: this method is guaranteed to terminate - `nonoverlayed::Bool`: indicates that any methods that may be called within this method are not defined in an [overlayed method table](@ref OverlayMethodTable) +- `notls::TriState`: this method does not access any state bound to the current + task and may thus be moved to a different task without changing observable + behavior. Note that this currently implies that `noyield` as well, since + yielding modifies the state of the current task, though this may be split + in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation on the definitions of these properties. Along the abstract interpretation, `Effects` at each statement are analyzed locally and @@ -67,6 +72,7 @@ struct Effects nothrow::TriState terminates::TriState nonoverlayed::Bool + notls::TriState # This effect is currently only tracked in inference and modified # :consistent before caching. We may want to track it in the future. inbounds_taints_consistency::Bool @@ -76,20 +82,22 @@ function Effects( effect_free::TriState, nothrow::TriState, terminates::TriState, - nonoverlayed::Bool) + nonoverlayed::Bool, + notls::TriState) return Effects( consistent, effect_free, nothrow, terminates, nonoverlayed, + notls, false) end -const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, true) -const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, TRISTATE_UNKNOWN, ALWAYS_TRUE, true) -const EFFECTS_UNKNOWN = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, true) # mostly unknown, but it's not overlayed at least (e.g. it's not a call) -const EFFECTS_UNKNOWN′ = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, false) # unknown, really +const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, true, ALWAYS_TRUE) +const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, TRISTATE_UNKNOWN, ALWAYS_TRUE, true, ALWAYS_TRUE) +const EFFECTS_UNKNOWN = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, true, TRISTATE_UNKNOWN) # mostly unknown, but it's not overlayed at least (e.g. it's not a call) +const EFFECTS_UNKNOWN′ = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, false, TRISTATE_UNKNOWN) # unknown, really function Effects(e::Effects = EFFECTS_UNKNOWN′; consistent::TriState = e.consistent, @@ -97,6 +105,7 @@ function Effects(e::Effects = EFFECTS_UNKNOWN′; nothrow::TriState = e.nothrow, terminates::TriState = e.terminates, nonoverlayed::Bool = e.nonoverlayed, + notls::TriState = e.notls, inbounds_taints_consistency::Bool = e.inbounds_taints_consistency) return Effects( consistent, @@ -104,6 +113,7 @@ function Effects(e::Effects = EFFECTS_UNKNOWN′; nothrow, terminates, nonoverlayed, + notls, inbounds_taints_consistency) end @@ -111,6 +121,7 @@ is_consistent(effects::Effects) = effects.consistent === ALWAYS_TRUE is_effect_free(effects::Effects) = effects.effect_free === ALWAYS_TRUE is_nothrow(effects::Effects) = effects.nothrow === ALWAYS_TRUE is_terminates(effects::Effects) = effects.terminates === ALWAYS_TRUE +is_notls(effects::Effects) = effects.notls === ALWAYS_TRUE is_nonoverlayed(effects::Effects) = effects.nonoverlayed is_concrete_eval_eligible(effects::Effects) = @@ -132,7 +143,8 @@ function encode_effects(e::Effects) (e.effect_free.state << 2) | (e.nothrow.state << 4) | (e.terminates.state << 6) | - (UInt32(e.nonoverlayed) << 8) + (UInt32(e.nonoverlayed) << 8) | + (UInt32(e.notls.state) << 9) end function decode_effects(e::UInt32) return Effects( @@ -141,6 +153,7 @@ function decode_effects(e::UInt32) TriState((e >> 4) & 0x03), TriState((e >> 6) & 0x03), _Bool( (e >> 8) & 0x01), + TriState((e >> 9) & 0x03), false) end @@ -155,6 +168,8 @@ function tristate_merge(old::Effects, new::Effects) tristate_merge( old.terminates, new.terminates), old.nonoverlayed & new.nonoverlayed, + tristate_merge( + old.notls, new.notls), old.inbounds_taints_consistency | new.inbounds_taints_consistency) end diff --git a/base/gcutils.jl b/base/gcutils.jl index d17301a1be9b07..6918f547caa030 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -4,6 +4,12 @@ ==(w::WeakRef, v) = isequal(w.value, v) ==(w, v::WeakRef) = isequal(w, v.value) +function _check_mutable(@nospecialize(o)) @noinline + if !ismutable(o) + error("objects of type ", typeof(o), " cannot be finalized") + end +end + """ finalizer(f, x) @@ -42,18 +48,13 @@ end ``` """ function finalizer(@nospecialize(f), @nospecialize(o)) - if !ismutable(o) - error("objects of type ", typeof(o), " cannot be finalized") - end - ccall(:jl_gc_add_finalizer_th, Cvoid, (Ptr{Cvoid}, Any, Any), - Core.getptls(), o, f) + _check_mutable(o) + Core._add_finalizer(o, f) return o end function finalizer(f::Ptr{Cvoid}, o::T) where T @inline - if !ismutable(o) - error("objects of type ", typeof(o), " cannot be finalized") - end + _check_mutable(o) ccall(:jl_gc_add_ptr_finalizer, Cvoid, (Ptr{Cvoid}, Any, Ptr{Cvoid}), Core.getptls(), o, f) return o diff --git a/src/builtin_proto.h b/src/builtin_proto.h index c820751ab56e23..94f07a71642034 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -57,6 +57,7 @@ DECLARE_BUILTIN(_typevar); DECLARE_BUILTIN(donotdelete); DECLARE_BUILTIN(getglobal); DECLARE_BUILTIN(setglobal); +DECLARE_BUILTIN(_add_finalizer); JL_CALLABLE(jl_f_invoke_kwsorter); #ifdef DEFINE_BUILTIN_GLOBALS @@ -73,6 +74,7 @@ JL_CALLABLE(jl_f_get_binding_type); JL_CALLABLE(jl_f_set_binding_type); JL_CALLABLE(jl_f_donotdelete); JL_CALLABLE(jl_f_setglobal); +JL_CALLABLE(jl_f__add_finalizer); #ifdef __cplusplus } diff --git a/src/builtins.c b/src/builtins.c index 90dc0ec6a0e5c4..8d9bc01c8416bc 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1591,6 +1591,14 @@ JL_CALLABLE(jl_f_donotdelete) return jl_nothing; } +JL_CALLABLE(jl_f__add_finalizer) +{ + JL_NARGS(_add_finalizer, 2, 3); + jl_task_t *ct = jl_current_task; + jl_gc_add_finalizer_(ct->ptls, args[0], args[1]); + return jl_nothing; +} + static int equiv_field_types(jl_value_t *old, jl_value_t *ft) { size_t nf = jl_svec_len(ft); @@ -1961,6 +1969,7 @@ void jl_init_primitives(void) JL_GC_DISABLED jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody); add_builtin_func("_equiv_typedef", jl_f__equiv_typedef); jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete); + add_builtin_func("_add_finalizer", jl_f__add_finalizer); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/codegen.cpp b/src/codegen.cpp index fdf422bb07a7d4..70a892d8e00416 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1125,7 +1125,8 @@ static const auto &builtin_func_map() { { jl_f_arrayset_addr, new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} }, { jl_f_arraysize_addr, new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} }, { jl_f_apply_type_addr, new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} }, - { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} } + { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} }, + { jl_f__add_finalizer, new JuliaFunction{XSTR(jl_f__add_finalizer), get_func_sig, get_func_attrs} } }; return builtins; } diff --git a/src/gc.c b/src/gc.c index e299661db87d42..2d34df0edcd88b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -488,7 +488,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) run_finalizers(ct); } -static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; @@ -518,7 +518,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { - gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); + jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -527,7 +527,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); } else { - gc_add_finalizer_(ptls, v, f); + jl_gc_add_finalizer_(ptls, v, f); } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 02130ef963198f..be5716be997288 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -466,6 +466,7 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT; void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT; void jl_gc_run_all_finalizers(jl_task_t *ct); void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task); +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; diff --git a/src/staticdata.c b/src/staticdata.c index 27fbb0fb336cf1..a6a6b3d8e62ee6 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -314,7 +314,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype, &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type, &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, - &jl_f_getglobal, &jl_f_setglobal, + &jl_f_getglobal, &jl_f_setglobal, &jl_f__add_finalizer, NULL }; typedef struct { diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 4f2e8f8783f584..75eed6dd772e5f 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1279,3 +1279,89 @@ end # Test that inlining doesn't accidentally delete a bad return_type call f_bad_return_type() = Core.Compiler.return_type(+, 1, 2) @test_throws MethodError f_bad_return_type() + +# Test that we can inline a finalizer for a struct that does not otherwise escape +global total_deallocations::Int = 0 + +mutable struct DoAllocNoEscape + function DoAllocNoEscape() + finalizer(new()) do this + global total_deallocations += 1 + end + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscape() + end + end + @test count(isnew, src.code) == 0 +end + +# Test that finalizer elision doesn't cause a throw to be inlined into a function +# that shouldn't have it +const finalizer_should_throw = Ref{Bool}(true) +mutable struct DoAllocFinalizerThrows + function DoAllocFinalizerThrows() + finalizer(new()) do this + finalizer_should_throw[] && error("Unexpected finalizer throw") + end + end +end + +function f_finalizer_throws() + prev = GC.enable(false) + for i = 1:100 + DoAllocFinalizerThrows() + end + finalizer_should_throw[] = false + GC.enable(prev) + GC.gc() + return true +end + +@test f_finalizer_throws() + +# Test finalizers with static parameters +global last_finalizer_type::Type = Any +mutable struct DoAllocNoEscapeSparam{T} + x::T + function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T} + global total_deallocations += 1 + global last_finalizer_type = T + end + function DoAllocNoEscapeSparam{T}(x::T) where {T} + finalizer(finalizer_sparam, new{T}(x)) + end +end +DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x) + +let src = code_typed1(Tuple{Any}) do x + for i = 1:1000 + DoAllocNoEscapeSparam(x) + end + end + # This requires more inlining enhancments. For now just make sure this + # doesn't error. + @test count(isnew, src.code) in (0, 1) # == 0 +end + +# Test noinline finalizer +@noinline function noinline_finalizer(d) + global total_deallocations += 1 +end +mutable struct DoAllocNoEscapeNoInline + function DoAllocNoEscapeNoInline() + finalizer(noinline_finalizer, new()) + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscapeNoInline() + end + end + @test count(isnew, src.code) == 1 + @test count(isinvoke(:noinline_finalizer), src.code) == 1 +end