From ff35e48493434a6d1c537ceb0227e5b8af2d91a4 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 11 May 2022 06:59:44 +0000 Subject: [PATCH] Very WIP: Eager finalizer insertion This is a variant of the eager-finalization idea (e.g. as seen in #44056), but with a focus on the mechanism of finalizer insertion, since I need a similar pass downstream. Integration of EscapeAnalysis is left to #44056. My motivation for this change is somewhat different. In particular, I want to be able to insert finalize call such that I can subsequently SROA the mutable object. This requires a couple design points that are more stringent than the pass from #44056, so I decided to prototype them as an independent PR. The primary things I need here that are not seen in #44056 are: - The ability to forgo finalizer registration with the runtime entirely (requires additional legality analyis) - The ability to inline the registered finalizer at the deallocation point (to enable subsequent SROA) To this end, adding a finalizer is promoted to a builtin that is recognized by inference and inlining (such that inference can produce an inferred version of the finalizer for inlining). The current status is that this fixes the minimal example I wanted to have work, but does not yet extend to the motivating case I had. Nevertheless, I felt that this was a good checkpoint to synchronize with other efforts along these lines. Currently working demo: ``` julia> const total_deallocations = Ref{Int}(0) Base.RefValue{Int64}(0) julia> mutable struct DoAlloc function DoAlloc() this = new() Core._add_finalizer(this, function(this) global total_deallocations[] += 1 end) return this end end julia> function foo() for i = 1:1000 DoAlloc() end end foo (generic function with 1 method) julia> @code_llvm foo() ; @ REPL[3]:1 within `foo` define void @julia_foo_111() #0 { top: %.promoted = load i64, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:2 within `foo` %0 = add i64 %.promoted, 1000 ; @ REPL[3] within `foo` store i64 %0, i64* inttoptr (i64 140370001753968 to i64*), align 16 ; @ REPL[3]:4 within `foo` ret void } ``` --- base/compiler/abstractinterpretation.jl | 12 +++ base/compiler/optimize.jl | 5 +- base/compiler/ssair/inlining.jl | 69 ++++++++++--- base/compiler/ssair/ir.jl | 60 ++++++------ base/compiler/ssair/passes.jl | 124 ++++++++++++++++++++++-- base/compiler/stmtinfo.jl | 10 ++ base/compiler/tfuncs.jl | 2 + base/gcutils.jl | 3 +- src/builtin_proto.h | 2 + src/builtins.c | 9 ++ src/codegen.cpp | 3 +- src/gc.c | 6 +- src/julia_internal.h | 1 + src/staticdata.c | 2 +- test/compiler/inline.jl | 21 ++++ 15 files changed, 271 insertions(+), 58 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 36ab6b81f47a06..417f4e31599c0c 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1590,6 +1590,16 @@ function invoke_rewrite(xs::Vector{Any}) return newxs end +function abstract_add_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState) + if length(argtypes) == 3 + tt = argtypes[3] + finalizer_argvec = Any[argtypes[3], argtypes[2]] + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1) + return CallMeta(Nothing, FinalizerInfo(call.info)) + end + return CallMeta(Nothing, false) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, sv::InferenceState, @@ -1607,6 +1617,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), elseif f === modifyfield! tristate_merge!(sv, Effects()) # TODO return abstract_modifyfield!(interp, argtypes, sv) + elseif f === Core._add_finalizer + return abstract_add_finalizer(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) tristate_merge!(sv, builtin_effects(f, argtypes, rt)) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index b00e24aec97348..a07990064b03b4 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 +# This statement was proven not to throw +const IR_FLAG_NOTHROW = 0x01 << 5 + const TOP_TUPLE = GlobalRef(Core, :tuple) @@ -543,7 +546,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @timeit "compact 2" ir = compact!(ir) - @timeit "SROA" ir = sroa_pass!(ir) + @timeit "SROA" ir = sroa_pass!(ir, sv.inlining) @timeit "ADCE" ir = adce_pass!(ir) @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 1738c05678211b..dde375796a5c79 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -874,7 +874,8 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, - flag::UInt8, state::InliningState) + flag::UInt8, state::InliningState, + do_resolve::Bool = true) method = match.method spec_types = match.spec_types @@ -908,7 +909,7 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, todo = InliningTodo(mi, match, argtypes) # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) - state.mi_cache === nothing && return todo + do_resolve && state.mi_cache === nothing && return todo return resolve_todo(todo, state, flag) end @@ -1206,7 +1207,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end end - if sig.f !== Core.invoke && is_builtin(sig) + if sig.f !== Core.invoke && sig.f !== Core._add_finalizer && is_builtin(sig) # No inlining for builtins (other invoke/apply/typeassert) return nothing end @@ -1226,9 +1227,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end # TODO inline non-`isdispatchtuple`, union-split callsites? -function analyze_single_call!( - ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, - sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) +function compute_inlining_cases( + infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, + do_resolve::Bool = true) argtypes = sig.argtypes cases = InliningCase[] local any_fully_covered = false @@ -1245,7 +1247,7 @@ function analyze_single_call!( continue end for match in meth - handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true) + handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve) any_fully_covered |= match.fully_covers end end @@ -1255,8 +1257,18 @@ function analyze_single_call!( filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases, - handled_all_cases & any_fully_covered, todo, state.params) + return cases, handled_all_cases & any_fully_covered +end + +function analyze_single_call!( + ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) + + r = compute_inlining_cases(infos, flag, sig, state) + r === nothing && return nothing + cases, all_covered = r + handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, + all_covered, todo, state.params) end # similar to `analyze_single_call!`, but with constant results @@ -1308,14 +1320,15 @@ end function handle_match!( match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, - cases::Vector{InliningCase}, allow_abstract::Bool = false) + cases::Vector{InliningCase}, allow_abstract::Bool = false, + do_resolve::Bool = true) spec_types = match.spec_types allow_abstract || isdispatchtuple(spec_types) || return false # we may see duplicated dispatch signatures here when a signature gets widened # during abstract interpretation: for the purpose of inlining, we can just skip # processing this dispatch candidate _any(case->case.sig === spec_types, cases) && return true - item = analyze_method!(match, argtypes, flag, state) + item = analyze_method!(match, argtypes, flag, state, do_resolve) item === nothing && return false push!(cases, InliningCase(spec_types, item)) return true @@ -1430,6 +1443,40 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end + # Handle finalizer + if sig.f === Core._add_finalizer + if isa(info, FinalizerInfo) + info = info.info + if isa(info, MethodMatchInfo) + infos = MethodMatchInfo[info] + elseif isa(info, UnionSplitInfo) + infos = info.matches + else + continue + end + + ft = argextype(stmt.args[3], ir) + has_free_typevars(ft) && return nothing + f = singleton_type(ft) + argtypes = Vector{Any}(undef, 2) + argtypes[1] = ft + argtypes[2] = argextype(stmt.args[2], ir) + sig = Signature(f, ft, argtypes) + + cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false) + length(cases) == 0 && continue + if all_covered && length(cases) == 1 + if isa(cases[1], InliningCase) + case1 = cases[1].item + if isa(case1, InliningTodo) + push!(stmt.args, case1.mi) + end + end + end + continue + end + end + # if inference arrived here with constant-prop'ed result(s), # we can perform a specialized analysis for just this case if isa(info, ConstCallInfo) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 2f1359e4002aea..a0fe8dbe26870d 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -166,36 +166,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue} # SSA-indexed nodes - -struct NewInstruction - stmt::Any - type::Any - info::Any - # If nothing, copy the line from previous statement - # in the insertion location - line::Union{Int32, Nothing} - flag::UInt8 - - ## Insertion options - - # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). - # Don't bother redoing so on insertion. - effect_free_computed::Bool - NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), - line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = - new(stmt, type, info, line, flag, effect_free_computed) -end -NewInstruction(@nospecialize(stmt), @nospecialize(type)) = - NewInstruction(stmt, type, nothing) -NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = - NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) - -effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) -non_effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) - - struct InstructionStream inst::Vector{Any} type::Vector{Any} @@ -295,6 +265,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool) end copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info)) +struct NewInstruction + stmt::Any + type::Any + info::Any + # If nothing, copy the line from previous statement + # in the insertion location + line::Union{Int32, Nothing} + flag::UInt8 + + ## Insertion options + + # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). + # Don't bother redoing so on insertion. + effect_free_computed::Bool + NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), + line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = + new(stmt, type, info, line, flag, effect_free_computed) +end +NewInstruction(@nospecialize(stmt), @nospecialize(type)) = + NewInstruction(stmt, type, nothing) +NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = + NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) +NewInstruction(@nospecialize(stmt), meta::Instruction) = + NewInstruction(stmt, meta[:type], meta[:info], meta[:line], meta[:flag], true) + +effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) +non_effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) + struct IRCode stmts::InstructionStream argtypes::Vector{Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index c2597363df2824..93f79cbaf11acb 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -14,6 +14,7 @@ GetfieldUse(idx::Int) = SSAUse(:getfield, idx) PreserveUse(idx::Int) = SSAUse(:preserve, idx) NoPreserve() = SSAUse(:nopreserve, 0) IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx) +AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx) """ du::SSADefUse @@ -735,7 +736,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of succeeding dead code elimination. """ -function sroa_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode) for ((_, idx), stmt) in compact # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement) isa(stmt, Expr) || continue - is_setfield = is_isdefined = false + is_setfield = is_isdefined = is_add_finalizer = false field_ordering = :unspecified if is_known_call(stmt, setfield!, compact) 4 <= length(stmt.args) <= 5 || continue @@ -767,6 +768,9 @@ function sroa_pass!(ir::IRCode) field_ordering = argextype(stmt.args[4], compact) widenconst(field_ordering) === Bool && (field_ordering = :unspecified) end + elseif is_known_call(stmt, Core._add_finalizer, compact) + 3 <= length(stmt.args) <= 4 || continue + is_add_finalizer = true elseif isexpr(stmt, :foreigncall) nccallargs = length(stmt.args[3]::SimpleVector) preserved = Int[] @@ -824,9 +828,10 @@ function sroa_pass!(ir::IRCode) # analyze this `getfield` / `isdefined` / `setfield!` call - field = try_compute_field_stmt(compact, stmt) - field === nothing && continue - + if !is_add_finalizer + field = try_compute_field_stmt(compact, stmt) + field === nothing && continue + end val = stmt.args[2] struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) @@ -864,14 +869,16 @@ function sroa_pass!(ir::IRCode) push!(defuse.defs, idx) elseif is_isdefined push!(defuse.uses, IsdefinedUse(idx)) + elseif is_add_finalizer + push!(defuse.uses, AddFinalizerUse(idx)) else push!(defuse.uses, GetfieldUse(idx)) end union!(mid, intermediaries) end continue - elseif is_setfield - continue # invalid `setfield!` call, but just ignore here + elseif is_setfield || is_add_finalizer + continue # invalid `setfield!` or `_add_finalizer` call, but just ignore here elseif is_isdefined continue # TODO? end @@ -921,7 +928,7 @@ function sroa_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) - sroa_mutables!(ir, defuses, used_ssas, lazydomtree) + sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining) return ir else simple_dce!(compact) @@ -929,7 +936,56 @@ function sroa_pass!(ir::IRCode) end end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) + code = get(inlining.mi_cache, mi, nothing) + if code isa CodeInstance + if use_const_api(code) + # in this case function can be inlined to a constant + et !== nothing && push!(et, mi) + return ConstantCase(quoted(code.rettype_const)) + else + src = code.inferred + end + effects = decode_effects(code.ipo_purity_bits) + else + effects = Effects() + src = code + end + src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[]) + src === nothing && return false + if isa(src, IRCode) + src = copy(src) + end + + if !isa(src, IRCode) && !isa(src, CodeInfo) + src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo + end + + if isa(src, CodeInfo) + src = inflate_ir(src, mi)::IRCode + end + + ssa_rename = Vector{Any}(undef, length(src.stmts)) + for idx′ = 1:length(src.stmts) + urs = userefs(src[SSAValue(idx′)][:inst]) + for ur in urs + if isa(ur[], SSAValue) + ur[] = ssa_rename[ur[].id] + elseif isa(ur[], Argument) + ur[] = argexprs[ur[].n] + end + end + # TODO: Scan newly added statement into the sroa defuse struct + stmt = urs[] + isa(stmt, ReturnNode) && continue + # TODO: Line table adjustment + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, src[SSAValue(idx′)]), true) + end + return true +end + +is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -952,9 +1008,59 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # error at runtime, but is not illegal to have in the IR. ismutabletype(typ) || continue typ = typ::DataType + # First check for any add_finalizer calls + add_finalizer_idx = nothing + for use in defuse.uses + if use.kind === :add_finalizer + # For now: Only allow one add_finalizer per allocation + add_finalizer_idx !== nothing && @goto skip + add_finalizer_idx = use.idx + end + end + if add_finalizer_idx !== nothing + # For now: Require that all uses and defs are in the same basic block, + # so that live range calculations are easy. + bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] + minval = typemax(Int) + maxval = 0 + + check_in_range(defuse) = check_in_range(defuse.idx) + function check_in_range(didx::Int) + didx in bb.stmts || return false + if didx < minval + minval = didx + end + if didx > maxval + maxval = didx + end + return true + end + + check_in_range(idx) || continue + _all(check_in_range, defuse.uses) || continue + _all(check_in_range, defuse.defs) || continue + + # For now: Require all statements in the basic block range to be + # nothrow. + all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval) + all_nothrow || continue + + # Ok, finalizer rewrite is legal. + add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst] + argexprs = Any[add_finalizer_stmt.args[3], add_finalizer_stmt.args[2]] + if !try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[4], inlining) + insert_node!(ir, maxval, + NewInstruction(Expr(:call, argexprs...), Nothing), + true) + end + # Erase call to add_finalizer + ir[SSAValue(add_finalizer_idx)][:inst] = nothing + continue + end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] all_eliminated = all_forwarded = true + has_add_finalizer = false for use in defuse.uses if use.kind === :preserve for du in fielddefuse diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 4832ce1af4a3aa..c03277ab09ddf8 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -182,4 +182,14 @@ struct ReturnTypeCallInfo info::Any end +""" + info::FinalizerInfo + +Represents a the information of a potential call to the finalizer on the given +object type. +""" +struct FinalizerInfo + info::Any +end + @specialize diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index e6625e2d559259..996c6b6dfe00ce 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -559,6 +559,8 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) +add_tfunc(Core._add_finalizer, 2, 2, (@nospecialize args...)->Nothing, 5) + # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) diff --git a/base/gcutils.jl b/base/gcutils.jl index d17301a1be9b07..7fb887aeb37df0 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -45,8 +45,7 @@ function finalizer(@nospecialize(f), @nospecialize(o)) if !ismutable(o) error("objects of type ", typeof(o), " cannot be finalized") end - ccall(:jl_gc_add_finalizer_th, Cvoid, (Ptr{Cvoid}, Any, Any), - Core.getptls(), o, f) + Core._add_finalizer(o, f) return o end diff --git a/src/builtin_proto.h b/src/builtin_proto.h index c820751ab56e23..94f07a71642034 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -57,6 +57,7 @@ DECLARE_BUILTIN(_typevar); DECLARE_BUILTIN(donotdelete); DECLARE_BUILTIN(getglobal); DECLARE_BUILTIN(setglobal); +DECLARE_BUILTIN(_add_finalizer); JL_CALLABLE(jl_f_invoke_kwsorter); #ifdef DEFINE_BUILTIN_GLOBALS @@ -73,6 +74,7 @@ JL_CALLABLE(jl_f_get_binding_type); JL_CALLABLE(jl_f_set_binding_type); JL_CALLABLE(jl_f_donotdelete); JL_CALLABLE(jl_f_setglobal); +JL_CALLABLE(jl_f__add_finalizer); #ifdef __cplusplus } diff --git a/src/builtins.c b/src/builtins.c index 90dc0ec6a0e5c4..8d9bc01c8416bc 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1591,6 +1591,14 @@ JL_CALLABLE(jl_f_donotdelete) return jl_nothing; } +JL_CALLABLE(jl_f__add_finalizer) +{ + JL_NARGS(_add_finalizer, 2, 3); + jl_task_t *ct = jl_current_task; + jl_gc_add_finalizer_(ct->ptls, args[0], args[1]); + return jl_nothing; +} + static int equiv_field_types(jl_value_t *old, jl_value_t *ft) { size_t nf = jl_svec_len(ft); @@ -1961,6 +1969,7 @@ void jl_init_primitives(void) JL_GC_DISABLED jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody); add_builtin_func("_equiv_typedef", jl_f__equiv_typedef); jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete); + add_builtin_func("_add_finalizer", jl_f__add_finalizer); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/codegen.cpp b/src/codegen.cpp index fa7485a8448af1..a605c08e85277f 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1125,7 +1125,8 @@ static const auto &builtin_func_map() { { jl_f_arrayset_addr, new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} }, { jl_f_arraysize_addr, new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} }, { jl_f_apply_type_addr, new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} }, - { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} } + { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} }, + { jl_f__add_finalizer, new JuliaFunction{XSTR(jl_f__add_finalizer), get_func_sig, get_func_attrs} } }; return builtins; } diff --git a/src/gc.c b/src/gc.c index e299661db87d42..2d34df0edcd88b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -488,7 +488,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) run_finalizers(ct); } -static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; @@ -518,7 +518,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { - gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); + jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -527,7 +527,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); } else { - gc_add_finalizer_(ptls, v, f); + jl_gc_add_finalizer_(ptls, v, f); } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 02130ef963198f..be5716be997288 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -466,6 +466,7 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT; void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT; void jl_gc_run_all_finalizers(jl_task_t *ct); void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task); +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; diff --git a/src/staticdata.c b/src/staticdata.c index 27fbb0fb336cf1..a6a6b3d8e62ee6 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -314,7 +314,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype, &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type, &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, - &jl_f_getglobal, &jl_f_setglobal, + &jl_f_getglobal, &jl_f_setglobal, &jl_f__add_finalizer, NULL }; typedef struct { diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 06cbfbb3ce2279..80941eb2227d88 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1259,3 +1259,24 @@ end @test fully_eliminated() do return maybe_error_int(1) end + +# Test that we can inline a finalizer for a struct that does not otherwise escape +global total_deallocations::Int = 0 + +mutable struct DoAllocNoEscape + function DoAlloc() + this = new() + Core._add_finalizer(this, function(this) + global total_deallocations += 1 + end) + return this + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscape() + end + end + @test count(isnew, src.code) == 0 +end