From 8f4ed29524c07a061eb37108a886ce81b4d0b3e5 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 May 2022 06:59:44 +0000 Subject: [PATCH] Eager finalizer insertion This is a variant of the eager-finalization idea (e.g. as seen in #44056), but with a focus on the mechanism of finalizer insertion, since I need a similar pass downstream. Integration of EscapeAnalysis is left to #44056. My motivation for this change is somewhat different. In particular, I want to be able to insert finalize call such that I can subsequently SROA the mutable object. This requires a couple design points that are more stringent than the pass from #44056, so I decided to prototype them as an independent PR. The primary things I need here that are not seen in #44056 are: - The ability to forgo finalizer registration with the runtime entirely (requires additional legality analyis) - The ability to inline the registered finalizer at the deallocation point (to enable subsequent SROA) To this end, adding a finalizer is promoted to a builtin that is recognized by inference and inlining (such that inference can produce an inferred version of the finalizer for inlining). The current status is that this fixes the minimal example I wanted to have work, but does not yet extend to the motivating case I had. Nevertheless, I felt that this was a good checkpoint to synchronize with other efforts along these lines. Currently working demo: ``` julia> const total_deallocations = Ref{Int}(0) Base.RefValue{Int64}(0) julia> mutable struct DoAlloc function DoAlloc() this = new() Core._add_finalizer(this, function(this) global total_deallocations[] += 1 end) return this end end julia> function foo() for i = 1:1000 DoAlloc() end end foo (generic function with 1 method) julia> @code_llvm foo() ; @ REPL[3]:1 within `foo` define void @julia_foo_111() #0 { top: %.promoted = load i64, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:2 within `foo` %0 = add i64 %.promoted, 1000 ; @ REPL[3] within `foo` store i64 %0, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:4 within `foo` ret void } ``` --- base/compiler/abstractinterpretation.jl | 11 ++ base/compiler/optimize.jl | 5 +- base/compiler/ssair/inlining.jl | 133 +++++++++++++++------ base/compiler/ssair/ir.jl | 60 +++++----- base/compiler/ssair/passes.jl | 149 ++++++++++++++++++++++-- base/compiler/stmtinfo.jl | 11 ++ base/compiler/tfuncs.jl | 2 +- src/builtins.c | 2 +- test/compiler/inline.jl | 86 ++++++++++++++ 9 files changed, 381 insertions(+), 78 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 080a0e218cb1d1..6827c18c2e7c04 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1589,6 +1589,15 @@ function invoke_rewrite(xs::Vector{Any}) return newxs end +function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState) + if length(argtypes) == 3 + finalizer_argvec = Any[argtypes[2], argtypes[3]] + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1) + return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects)) + end + return CallMeta(Nothing, Effects(), false) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, sv::InferenceState, @@ -1603,6 +1612,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), return abstract_invoke(interp, arginfo, sv) elseif f === modifyfield! return abstract_modifyfield!(interp, argtypes, sv) + elseif f === Core.finalizer + return abstract_finalizer(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) return CallMeta(rt, builtin_effects(f, argtypes, rt), false) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index af4ef61704c1db..701aca7e0d5511 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 +# This statement was proven not to throw +const IR_FLAG_NOTHROW = 0x01 << 5 + const TOP_TUPLE = GlobalRef(Core, :tuple) @@ -564,7 +567,7 @@ function run_passes( @pass "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @pass "compact 2" ir = compact!(ir) - @pass "SROA" ir = sroa_pass!(ir) + @pass "SROA" ir = sroa_pass!(ir, sv.inlining) @pass "ADCE" ir = adce_pass!(ir) @pass "type lift" ir = type_lift_pass!(ir) @pass "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 8b9cdfe7cf21a5..9e71b56070e17c 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -308,21 +308,17 @@ function finish_cfg_inline!(state::CFGInliningState) end end -function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, - linetable::Vector{LineInfoNode}, item::InliningTodo, - boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) - # Ok, do the inlining here - spec = item.spec::ResolvedInliningSpec - sparam_vals = item.mi.sparam_vals - def = item.mi.def::Method +function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode, + inlinee::Method, + inlined_at::Int32) + coverage = coverage_enabled(inlinee.module) linetable_offset::Int32 = length(linetable) # Append the linetable of the inlined function to our line table - inlined_at = compact.result[idx][:line] topline::Int32 = linetable_offset + Int32(1) - coverage = coverage_enabled(def.module) coverage_by_path = JLOptions().code_coverage == 3 - push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at)) - oldlinetable = spec.ir.linetable + push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at)) + oldlinetable = inlinee_ir.linetable + extra_coverage_line = 0 for oldline in 1:length(oldlinetable) entry = oldlinetable[oldline] if !coverage && coverage_by_path && is_file_tracked(entry.file) @@ -341,8 +337,25 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector end push!(linetable, newentry) end - if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline - insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline)) + if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline + extra_coverage_line = topline + end + return linetable_offset, extra_coverage_line +end + +function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, + linetable::Vector{LineInfoNode}, item::InliningTodo, + boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + # Ok, do the inlining here + spec = item.spec::ResolvedInliningSpec + sparam_vals = item.mi.sparam_vals + def = item.mi.def::Method + inlined_at = compact.result[idx][:line] + linetable_offset::Int32 = length(linetable) + topline::Int32 = linetable_offset + Int32(1) + linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at) + if extra_coverage_line != 0 + insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -848,7 +861,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) src === nothing && return compileable_specialization(et, match, effects) et !== nothing && push!(et, mi) - return InliningTodo(mi, src, effects) + return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects) end function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8) @@ -870,7 +883,8 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, - flag::UInt8, state::InliningState) + flag::UInt8, state::InliningState, + do_resolve::Bool = true) method = match.method spec_types = match.spec_types @@ -904,7 +918,7 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, todo = InliningTodo(mi, match, argtypes) # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) - state.mi_cache === nothing && return todo + do_resolve && state.mi_cache === nothing && return todo return resolve_todo(todo, state, flag) end @@ -913,17 +927,12 @@ function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects) return InliningTodo(mi, ResolvedInliningSpec(ir, effects)) end -function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Vector{UInt8}}, effects::Effects) - if !isa(src, CodeInfo) - src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo - else - src = copy(src) - end - @timeit "inline IR inflation" begin - ir = inflate_ir!(src, mi)::IRCode - return InliningTodo(mi, ResolvedInliningSpec(ir, effects)) - end +function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1}) + src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo + return inflate_ir!(src, mi) end +retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)::IRCode +retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir) function handle_single_case!( ir::IRCode, idx::Int, stmt::Expr, @@ -1205,7 +1214,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end end - if sig.f !== Core.invoke && is_builtin(sig) + if sig.f !== Core.invoke && sig.f !== Core.finalizer && is_builtin(sig) # No inlining for builtins (other invoke/apply/typeassert) return nothing end @@ -1222,9 +1231,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end # TODO inline non-`isdispatchtuple`, union-split callsites? -function analyze_single_call!( - ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, - sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) +function compute_inlining_cases( + infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, + do_resolve::Bool = true) argtypes = sig.argtypes cases = InliningCase[] local any_fully_covered = false @@ -1241,7 +1251,7 @@ function analyze_single_call!( continue end for match in meth - handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true) + handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve) any_fully_covered |= match.fully_covers end end @@ -1251,8 +1261,18 @@ function analyze_single_call!( filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases, - handled_all_cases & any_fully_covered, todo, state.params) + return cases, handled_all_cases & any_fully_covered +end + +function analyze_single_call!( + ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) + + r = compute_inlining_cases(infos, flag, sig, state) + r === nothing && return nothing + cases, all_covered = r + handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, + all_covered, todo, state.params) end # similar to `analyze_single_call!`, but with constant results @@ -1304,14 +1324,15 @@ end function handle_match!( match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, - cases::Vector{InliningCase}, allow_abstract::Bool = false) + cases::Vector{InliningCase}, allow_abstract::Bool = false, + do_resolve::Bool = true) spec_types = match.spec_types allow_abstract || isdispatchtuple(spec_types) || return false # we may see duplicated dispatch signatures here when a signature gets widened # during abstract interpretation: for the purpose of inlining, we can just skip # processing this dispatch candidate _any(case->case.sig === spec_types, cases) && return true - item = analyze_method!(match, argtypes, flag, state) + item = analyze_method!(match, argtypes, flag, state, do_resolve) item === nothing && return false push!(cases, InliningCase(spec_types, item)) return true @@ -1424,6 +1445,48 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end + # Handle finalizer + if sig.f === Core.finalizer + if isa(info, FinalizerInfo) + # Only inline finalizers that are known nothrow and notls. + # This avoids having to set up state for finalizer isolation + (is_nothrow(info.effects) && is_notaskstate(info.effects)) || continue + + info = info.info + if isa(info, MethodMatchInfo) + infos = MethodMatchInfo[info] + elseif isa(info, UnionSplitInfo) + infos = info.matches + else + continue + end + + ft = argextype(stmt.args[2], ir) + has_free_typevars(ft) && return nothing + f = singleton_type(ft) + argtypes = Vector{Any}(undef, 2) + argtypes[1] = ft + argtypes[2] = argextype(stmt.args[3], ir) + sig = Signature(f, ft, argtypes) + + cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false) + length(cases) == 0 && continue + if all_covered && length(cases) == 1 + if isa(cases[1], InliningCase) + case1 = cases[1].item + if isa(case1, InliningTodo) + push!(stmt.args, true) + push!(stmt.args, case1.mi) + elseif isa(case1, InvokeCase) + push!(stmt.args, false) + push!(stmt.args, case1.invoke) + end + end + end + continue + end + end + # if inference arrived here with constant-prop'ed result(s), # we can perform a specialized analysis for just this case if isa(info, ConstCallInfo) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 548c19eb031e7a..e96fe2e92d0c1b 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -163,36 +163,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue} # SSA-indexed nodes - -struct NewInstruction - stmt::Any - type::Any - info::Any - # If nothing, copy the line from previous statement - # in the insertion location - line::Union{Int32, Nothing} - flag::UInt8 - - ## Insertion options - - # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). - # Don't bother redoing so on insertion. - effect_free_computed::Bool - NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), - line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = - new(stmt, type, info, line, flag, effect_free_computed) -end -NewInstruction(@nospecialize(stmt), @nospecialize(type)) = - NewInstruction(stmt, type, nothing) -NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = - NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) - -effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) -non_effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) - - struct InstructionStream inst::Vector{Any} type::Vector{Any} @@ -292,6 +262,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool) end copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info)) +struct NewInstruction + stmt::Any + type::Any + info::Any + # If nothing, copy the line from previous statement + # in the insertion location + line::Union{Int32, Nothing} + flag::UInt8 + + ## Insertion options + + # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). + # Don't bother redoing so on insertion. + effect_free_computed::Bool + NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), + line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = + new(stmt, type, info, line, flag, effect_free_computed) +end +NewInstruction(@nospecialize(stmt), @nospecialize(type)) = + NewInstruction(stmt, type, nothing) +NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = + NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) +NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) = + NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true) + +effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) +non_effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) + struct IRCode stmts::InstructionStream argtypes::Vector{Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 9d36e52fb9f86d..0e79404e9d3465 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -14,6 +14,7 @@ GetfieldUse(idx::Int) = SSAUse(:getfield, idx) PreserveUse(idx::Int) = SSAUse(:preserve, idx) NoPreserve() = SSAUse(:nopreserve, 0) IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx) +AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx) """ du::SSADefUse @@ -735,7 +736,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of succeeding dead code elimination. """ -function sroa_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode) for ((_, idx), stmt) in compact # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement) isa(stmt, Expr) || continue - is_setfield = is_isdefined = false + is_setfield = is_isdefined = is_finalizer = false field_ordering = :unspecified if is_known_call(stmt, setfield!, compact) 4 <= length(stmt.args) <= 5 || continue @@ -767,6 +768,13 @@ function sroa_pass!(ir::IRCode) field_ordering = argextype(stmt.args[4], compact) widenconst(field_ordering) === Bool && (field_ordering = :unspecified) end + elseif is_known_call(stmt, Core.finalizer, compact) + 3 <= length(stmt.args) <= 5 || continue + # Inlining performs legality checks on the finalizer to determine + # whether or not we may inline it. If so, it appends extra arguments + # at the end of the intrinsic. Detect that here. + length(stmt.args) == 5 || continue + is_finalizer = true elseif isexpr(stmt, :foreigncall) nccallargs = length(stmt.args[3]::SimpleVector) preserved = Int[] @@ -824,10 +832,13 @@ function sroa_pass!(ir::IRCode) # analyze this `getfield` / `isdefined` / `setfield!` call - field = try_compute_field_stmt(compact, stmt) - field === nothing && continue - - val = stmt.args[2] + if !is_finalizer + field = try_compute_field_stmt(compact, stmt) + field === nothing && continue + val = stmt.args[2] + else + val = stmt.args[3] + end struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) if isa(struct_typ, Union) && struct_typ <: Tuple @@ -864,14 +875,16 @@ function sroa_pass!(ir::IRCode) push!(defuse.defs, idx) elseif is_isdefined push!(defuse.uses, IsdefinedUse(idx)) + elseif is_finalizer + push!(defuse.uses, AddFinalizerUse(idx)) else push!(defuse.uses, GetfieldUse(idx)) end union!(mid, intermediaries) end continue - elseif is_setfield - continue # invalid `setfield!` call, but just ignore here + elseif is_setfield || is_finalizer + continue # invalid `setfield!` or `Core.finalizer` call, but just ignore here elseif is_isdefined continue # TODO? end @@ -921,7 +934,7 @@ function sroa_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) - sroa_mutables!(ir, defuses, used_ssas, lazydomtree) + sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining) return ir else simple_dce!(compact) @@ -929,7 +942,60 @@ function sroa_pass!(ir::IRCode) end end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) + code = get(inlining.mi_cache, mi, nothing) + if code isa CodeInstance + if use_const_api(code) + # No code in the function - Nothing to do + inlining.et !== nothing && push!(inlining.et, mi) + return true + end + src = code.inferred + else + src = code + end + + src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[]) + src === nothing && return false + src = retrieve_ir_for_inlining(mi, src) + + # For now: Require finalizer to only have one basic block + length(src.cfg.blocks) == 1 || return false + + # Ok, we're committed to inlining the finalizer + inlining.et !== nothing && push!(inlining.et, mi) + + linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line]) + if extra_coverage_line != 0 + insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) + end + + # TODO: Use the actual inliner here rather than open coding this special + # purpose inliner. + spvals = mi.sparam_vals + ssa_rename = Vector{Any}(undef, length(src.stmts)) + for idx′ = 1:length(src.stmts) + urs = userefs(src[SSAValue(idx′)][:inst]) + for ur in urs + if isa(ur[], SSAValue) + ur[] = ssa_rename[ur[].id] + elseif isa(ur[], Argument) + ur[] = argexprs[ur[].n] + elseif isexpr(ur[], :static_parameter) + ur[] = spvals[ur[].args[1]] + end + end + # TODO: Scan newly added statement into the sroa defuse struct + stmt = urs[] + isa(stmt, ReturnNode) && continue + inst = src[SSAValue(idx′)] + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true) + end + return true +end + +is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -952,9 +1018,72 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # error at runtime, but is not illegal to have in the IR. ismutabletype(typ) || continue typ = typ::DataType + # First check for any add_finalizer calls + add_finalizer_idx = nothing + for use in defuse.uses + if use.kind === :add_finalizer + # For now: Only allow one add_finalizer per allocation + add_finalizer_idx !== nothing && @goto skip + add_finalizer_idx = use.idx + end + end + if add_finalizer_idx !== nothing + # For now: Require that all uses and defs are in the same basic block, + # so that live range calculations are easy. + bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] + minval = typemax(Int) + maxval = 0 + + check_in_range(defuse) = check_in_range(defuse.idx) + function check_in_range(didx::Int) + didx in bb.stmts || return false + if didx < minval + minval = didx + end + if didx > maxval + maxval = didx + end + return true + end + + check_in_range(idx) || continue + _all(check_in_range, defuse.uses) || continue + _all(check_in_range, defuse.defs) || continue + + # For now: Require all statements in the basic block range to be + # nothrow. + all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval) + all_nothrow || continue + + # Ok, finalizer rewrite is legal. + add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst] + argexprs = Any[add_finalizer_stmt.args[2], add_finalizer_stmt.args[3]] + may_inline = add_finalizer_stmt.args[4]::Bool + mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing} + if may_inline && mi !== nothing + if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining) + @goto done_finalizer + end + mi = compileable_specialization(inlining.et, mi, Effects()).invoke + end + if mi !== nothing + insert_node!(ir, maxval, + NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), + true) + else + insert_node!(ir, maxval, + NewInstruction(Expr(:call, argexprs...), Nothing), + true) + end + @label done_finalizer + # Erase call to add_finalizer + ir[SSAValue(add_finalizer_idx)][:inst] = nothing + continue + end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] all_eliminated = all_forwarded = true + has_finalizer = false for use in defuse.uses if use.kind === :preserve for du in fielddefuse diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 3f9a562061a12c..99f39563946159 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -183,4 +183,15 @@ struct ReturnTypeCallInfo info::Any end +""" + info::FinalizerInfo + +Represents a the information of a potential call to the finalizer on the given +object type. +""" +struct FinalizerInfo + info::Any + effects::Effects +end + @specialize diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index c6c34a30fe229d..195d41c8312d5f 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -559,7 +559,7 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) -add_tfunc(Core.finalizer, 2, 2, (@nospecialize args...)->Nothing, 5) +add_tfunc(Core.finalizer, 2, 4, (@nospecialize args...)->Nothing, 5) # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) diff --git a/src/builtins.c b/src/builtins.c index 26363491846f9c..2e93a752c3d29f 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1602,7 +1602,7 @@ JL_CALLABLE(jl_f_donotdelete) JL_CALLABLE(jl_f_finalizer) { - JL_NARGS(finalizer, 2, 2); + JL_NARGS(finalizer, 2, 4); jl_task_t *ct = jl_current_task; jl_gc_add_finalizer_(ct->ptls, args[1], args[0]); return jl_nothing; diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 4f2e8f8783f584..75eed6dd772e5f 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1279,3 +1279,89 @@ end # Test that inlining doesn't accidentally delete a bad return_type call f_bad_return_type() = Core.Compiler.return_type(+, 1, 2) @test_throws MethodError f_bad_return_type() + +# Test that we can inline a finalizer for a struct that does not otherwise escape +global total_deallocations::Int = 0 + +mutable struct DoAllocNoEscape + function DoAllocNoEscape() + finalizer(new()) do this + global total_deallocations += 1 + end + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscape() + end + end + @test count(isnew, src.code) == 0 +end + +# Test that finalizer elision doesn't cause a throw to be inlined into a function +# that shouldn't have it +const finalizer_should_throw = Ref{Bool}(true) +mutable struct DoAllocFinalizerThrows + function DoAllocFinalizerThrows() + finalizer(new()) do this + finalizer_should_throw[] && error("Unexpected finalizer throw") + end + end +end + +function f_finalizer_throws() + prev = GC.enable(false) + for i = 1:100 + DoAllocFinalizerThrows() + end + finalizer_should_throw[] = false + GC.enable(prev) + GC.gc() + return true +end + +@test f_finalizer_throws() + +# Test finalizers with static parameters +global last_finalizer_type::Type = Any +mutable struct DoAllocNoEscapeSparam{T} + x::T + function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T} + global total_deallocations += 1 + global last_finalizer_type = T + end + function DoAllocNoEscapeSparam{T}(x::T) where {T} + finalizer(finalizer_sparam, new{T}(x)) + end +end +DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x) + +let src = code_typed1(Tuple{Any}) do x + for i = 1:1000 + DoAllocNoEscapeSparam(x) + end + end + # This requires more inlining enhancments. For now just make sure this + # doesn't error. + @test count(isnew, src.code) in (0, 1) # == 0 +end + +# Test noinline finalizer +@noinline function noinline_finalizer(d) + global total_deallocations += 1 +end +mutable struct DoAllocNoEscapeNoInline + function DoAllocNoEscapeNoInline() + finalizer(noinline_finalizer, new()) + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscapeNoInline() + end + end + @test count(isnew, src.code) == 1 + @test count(isinvoke(:noinline_finalizer), src.code) == 1 +end