From c4effda45688d82039525d6f9d364e0823b1d268 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Mon, 6 Jun 2022 18:21:17 -0700
Subject: [PATCH] Eager finalizer insertion (#45272)

* Eager finalizer insertion

This is a variant of the eager-finalization idea
(e.g. as seen in #44056), but with a focus on the mechanism
of finalizer insertion, since I need a similar pass downstream.
Integration of EscapeAnalysis is left to #44056.

My motivation for this change is somewhat different. In particular,
I want to be able to insert finalize call such that I can
subsequently SROA the mutable object. This requires a couple
design points that are more stringent than the pass from #44056,
so I decided to prototype them as an independent PR. The primary
things I need here that are not seen in #44056 are:

- The ability to forgo finalizer registration with the runtime
  entirely (requires additional legality analyis)
- The ability to inline the registered finalizer at the deallocation
  point (to enable subsequent SROA)

To this end, adding a finalizer is promoted to a builtin
that is recognized by inference and inlining (such that inference
can produce an inferred version of the finalizer for inlining).

The current status is that this fixes the minimal example I wanted
to have work, but does not yet extend to the motivating case I had.
Nevertheless, I felt that this was a good checkpoint to synchronize
with other efforts along these lines.

Currently working demo:

```
julia> const total_deallocations = Ref{Int}(0)
Base.RefValue{Int64}(0)

julia> mutable struct DoAlloc
               function DoAlloc()
                   this = new()
                       Core._add_finalizer(this, function(this)
                               global total_deallocations[] += 1
                       end)
                       return this
               end
       end

julia> function foo()
               for i = 1:1000
                       DoAlloc()
               end
       end
foo (generic function with 1 method)

julia> @code_llvm foo()
;  @ REPL[3]:1 within `foo`
define void @julia_foo_111() #0 {
top:
  %.promoted = load i64, i64* inttoptr (i64 140370001753968 to i64*), align 16
;  @ REPL[3]:2 within `foo`
  %0 = add i64 %.promoted, 1000
;  @ REPL[3] within `foo`
  store i64 %0, i64* inttoptr (i64 140370001753968 to i64*), align 16
;  @ REPL[3]:4 within `foo`
  ret void
}
```

* rm redundant copy

Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
---
 base/compiler/abstractinterpretation.jl |  11 ++
 base/compiler/optimize.jl               |   5 +-
 base/compiler/ssair/inlining.jl         | 134 +++++++++++++++------
 base/compiler/ssair/ir.jl               |  60 +++++-----
 base/compiler/ssair/passes.jl           | 149 ++++++++++++++++++++++--
 base/compiler/stmtinfo.jl               |  11 ++
 base/compiler/tfuncs.jl                 |   2 +-
 src/builtins.c                          |   2 +-
 test/compiler/inline.jl                 |  87 ++++++++++++++
 9 files changed, 382 insertions(+), 79 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index a254083d84fdd..d645240b52166 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1619,6 +1619,15 @@ function invoke_rewrite(xs::Vector{Any})
     return newxs
 end
 
+function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+    if length(argtypes) == 3
+        finalizer_argvec = Any[argtypes[2], argtypes[3]]
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1)
+        return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
+    end
+    return CallMeta(Nothing, Effects(), false)
+end
+
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         arginfo::ArgInfo, sv::InferenceState,
@@ -1633,6 +1642,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_invoke(interp, arginfo, sv)
         elseif f === modifyfield!
             return abstract_modifyfield!(interp, argtypes, sv)
+        elseif f === Core.finalizer
+            return abstract_finalizer(interp, argtypes, sv)
         end
         rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods)
         return CallMeta(rt, builtin_effects(f, argtypes, rt), false)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index e80f5353823ca..580b307838110 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3
 # This statement may be removed if its result is unused. In particular it must
 # thus be both pure and effect free.
 const IR_FLAG_EFFECT_FREE = 0x01 << 4
+# This statement was proven not to throw
+const IR_FLAG_NOTHROW     = 0x01 << 5
+
 
 const TOP_TUPLE = GlobalRef(Core, :tuple)
 
@@ -567,7 +570,7 @@ function run_passes(
     @pass "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
     # @timeit "verify 2" verify_ir(ir)
     @pass "compact 2" ir = compact!(ir)
-    @pass "SROA"      ir = sroa_pass!(ir)
+    @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
     @pass "ADCE"      ir = adce_pass!(ir)
     @pass "type lift" ir = type_lift_pass!(ir)
     @pass "compact 3" ir = compact!(ir)
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index c1205cccb132e..6b3c5b2e44c34 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -308,21 +308,17 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
-function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
-                         linetable::Vector{LineInfoNode}, item::InliningTodo,
-                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
-    # Ok, do the inlining here
-    spec = item.spec::ResolvedInliningSpec
-    sparam_vals = item.mi.sparam_vals
-    def = item.mi.def::Method
+function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
+                              inlinee::Method,
+                              inlined_at::Int32)
+    coverage = coverage_enabled(inlinee.module)
     linetable_offset::Int32 = length(linetable)
     # Append the linetable of the inlined function to our line table
-    inlined_at = compact.result[idx][:line]
     topline::Int32 = linetable_offset + Int32(1)
-    coverage = coverage_enabled(def.module)
     coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at))
-    oldlinetable = spec.ir.linetable
+    push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at))
+    oldlinetable = inlinee_ir.linetable
+    extra_coverage_line = 0
     for oldline in 1:length(oldlinetable)
         entry = oldlinetable[oldline]
         if !coverage && coverage_by_path && is_file_tracked(entry.file)
@@ -341,8 +337,25 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         end
         push!(linetable, newentry)
     end
-    if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline
-        insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
+    if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline
+        extra_coverage_line = topline
+    end
+    return linetable_offset, extra_coverage_line
+end
+
+function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
+                         linetable::Vector{LineInfoNode}, item::InliningTodo,
+                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+    # Ok, do the inlining here
+    spec = item.spec::ResolvedInliningSpec
+    sparam_vals = item.mi.sparam_vals
+    def = item.mi.def::Method
+    inlined_at = compact.result[idx][:line]
+    linetable_offset::Int32 = length(linetable)
+    topline::Int32 = linetable_offset + Int32(1)
+    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at)
+    if extra_coverage_line != 0
+        insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
     end
     if def.isva
         nargs_def = Int(def.nargs::Int32)
@@ -839,7 +852,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
     src === nothing && return compileable_specialization(et, match, effects)
 
     et !== nothing && push!(et, mi)
-    return InliningTodo(mi, src, effects)
+    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
 end
 
 function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8)
@@ -861,7 +874,8 @@ function validate_sparams(sparams::SimpleVector)
 end
 
 function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
-                         flag::UInt8, state::InliningState)
+                         flag::UInt8, state::InliningState,
+                         do_resolve::Bool = true)
     method = match.method
     spec_types = match.spec_types
 
@@ -895,26 +909,20 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     todo = InliningTodo(mi, match, argtypes)
     # If we don't have caches here, delay resolving this MethodInstance
     # until the batch inlining step (or an external post-processing pass)
-    state.mi_cache === nothing && return todo
+    do_resolve && state.mi_cache === nothing && return todo
     return resolve_todo(todo, state, flag)
 end
 
 function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    ir = copy(ir)
     return InliningTodo(mi, ResolvedInliningSpec(ir, effects))
 end
 
-function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Vector{UInt8}}, effects::Effects)
-    if !isa(src, CodeInfo)
-        src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo
-    else
-        src = copy(src)
-    end
-    @timeit "inline IR inflation" begin
-        ir = inflate_ir!(src, mi)::IRCode
-        return InliningTodo(mi, ResolvedInliningSpec(ir, effects))
-    end
+function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1})
+    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo
+    return inflate_ir!(src, mi)
 end
+retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)::IRCode
+retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir)
 
 function handle_single_case!(
     ir::IRCode, idx::Int, stmt::Expr,
@@ -1196,7 +1204,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto
         end
     end
 
-    if sig.f !== Core.invoke && is_builtin(sig)
+    if sig.f !== Core.invoke && sig.f !== Core.finalizer && is_builtin(sig)
         # No inlining for builtins (other invoke/apply/typeassert)
         return nothing
     end
@@ -1213,9 +1221,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto
 end
 
 # TODO inline non-`isdispatchtuple`, union-split callsites?
-function analyze_single_call!(
-    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+function compute_inlining_cases(
+        infos::Vector{MethodMatchInfo}, flag::UInt8,
+        sig::Signature, state::InliningState,
+        do_resolve::Bool = true)
     argtypes = sig.argtypes
     cases = InliningCase[]
     local any_fully_covered = false
@@ -1232,7 +1241,7 @@ function analyze_single_call!(
             continue
         end
         for match in meth
-            handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
+            handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve)
             any_fully_covered |= match.fully_covers
         end
     end
@@ -1242,8 +1251,18 @@ function analyze_single_call!(
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
 
-    handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
-        handled_all_cases & any_fully_covered, todo, state.params)
+    return cases, handled_all_cases & any_fully_covered
+end
+
+function analyze_single_call!(
+    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+
+    r = compute_inlining_cases(infos, flag, sig, state)
+    r === nothing && return nothing
+    cases, all_covered = r
+    handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
+        all_covered, todo, state.params)
 end
 
 # similar to `analyze_single_call!`, but with constant results
@@ -1295,14 +1314,15 @@ end
 
 function handle_match!(
     match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
-    cases::Vector{InliningCase}, allow_abstract::Bool = false)
+    cases::Vector{InliningCase}, allow_abstract::Bool = false,
+    do_resolve::Bool = true)
     spec_types = match.spec_types
     allow_abstract || isdispatchtuple(spec_types) || return false
     # we may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
     # processing this dispatch candidate
     _any(case->case.sig === spec_types, cases) && return true
-    item = analyze_method!(match, argtypes, flag, state)
+    item = analyze_method!(match, argtypes, flag, state, do_resolve)
     item === nothing && return false
     push!(cases, InliningCase(spec_types, item))
     return true
@@ -1417,6 +1437,48 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
             continue
         end
 
+        # Handle finalizer
+        if sig.f === Core.finalizer
+            if isa(info, FinalizerInfo)
+                # Only inline finalizers that are known nothrow and notls.
+                # This avoids having to set up state for finalizer isolation
+                (is_nothrow(info.effects) && is_notaskstate(info.effects)) || continue
+
+                info = info.info
+                if isa(info, MethodMatchInfo)
+                    infos = MethodMatchInfo[info]
+                elseif isa(info, UnionSplitInfo)
+                    infos = info.matches
+                else
+                    continue
+                end
+
+                ft = argextype(stmt.args[2], ir)
+                has_free_typevars(ft) && return nothing
+                f = singleton_type(ft)
+                argtypes = Vector{Any}(undef, 2)
+                argtypes[1] = ft
+                argtypes[2] = argextype(stmt.args[3], ir)
+                sig = Signature(f, ft, argtypes)
+
+                cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false)
+                length(cases) == 0 && continue
+                if all_covered && length(cases) == 1
+                    if isa(cases[1], InliningCase)
+                        case1 = cases[1].item
+                        if isa(case1, InliningTodo)
+                            push!(stmt.args, true)
+                            push!(stmt.args, case1.mi)
+                        elseif isa(case1, InvokeCase)
+                            push!(stmt.args, false)
+                            push!(stmt.args, case1.invoke)
+                        end
+                    end
+                end
+                continue
+            end
+        end
+
         # if inference arrived here with constant-prop'ed result(s),
         # we can perform a specialized analysis for just this case
         if isa(info, ConstCallInfo)
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index bc38e61fac630..1054484c84cf0 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -163,36 +163,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 
 
 # SSA-indexed nodes
-
-struct NewInstruction
-    stmt::Any
-    type::Any
-    info::Any
-    # If nothing, copy the line from previous statement
-    # in the insertion location
-    line::Union{Int32, Nothing}
-    flag::UInt8
-
-    ## Insertion options
-
-    # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced).
-    # Don't bother redoing so on insertion.
-    effect_free_computed::Bool
-    NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info),
-            line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) =
-        new(stmt, type, info, line, flag, effect_free_computed)
-end
-NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
-    NewInstruction(stmt, type, nothing)
-NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
-    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
-
-effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
-non_effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true)
-
-
 struct InstructionStream
     inst::Vector{Any}
     type::Vector{Any}
@@ -292,6 +262,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool)
 end
 copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info))
 
+struct NewInstruction
+    stmt::Any
+    type::Any
+    info::Any
+    # If nothing, copy the line from previous statement
+    # in the insertion location
+    line::Union{Int32, Nothing}
+    flag::UInt8
+
+    ## Insertion options
+
+    # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced).
+    # Don't bother redoing so on insertion.
+    effect_free_computed::Bool
+    NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info),
+            line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) =
+        new(stmt, type, info, line, flag, effect_free_computed)
+end
+NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
+    NewInstruction(stmt, type, nothing)
+NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
+    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
+NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) =
+    NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true)
+
+effect_free(inst::NewInstruction) =
+    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
+non_effect_free(inst::NewInstruction) =
+    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true)
+
 struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 20b276b5f3f3e..8b5dc71720001 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -14,6 +14,7 @@ GetfieldUse(idx::Int)  = SSAUse(:getfield, idx)
 PreserveUse(idx::Int)  = SSAUse(:preserve, idx)
 NoPreserve()           = SSAUse(:nopreserve, 0)
 IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx)
+AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx)
 
 """
     du::SSADefUse
@@ -735,7 +736,7 @@ its argument).
 In a case when all usages are fully eliminated, `struct` allocation may also be erased as
 a result of succeeding dead code elimination.
 """
-function sroa_pass!(ir::IRCode)
+function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing)
     compact = IncrementalCompact(ir)
     defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
     lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
@@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode)
     for ((_, idx), stmt) in compact
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
         isa(stmt, Expr) || continue
-        is_setfield = is_isdefined = false
+        is_setfield = is_isdefined = is_finalizer = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
             4 <= length(stmt.args) <= 5 || continue
@@ -767,6 +768,13 @@ function sroa_pass!(ir::IRCode)
                 field_ordering = argextype(stmt.args[4], compact)
                 widenconst(field_ordering) === Bool && (field_ordering = :unspecified)
             end
+        elseif is_known_call(stmt, Core.finalizer, compact)
+            3 <= length(stmt.args) <= 5 || continue
+            # Inlining performs legality checks on the finalizer to determine
+            # whether or not we may inline it. If so, it appends extra arguments
+            # at the end of the intrinsic. Detect that here.
+            length(stmt.args) == 5 || continue
+            is_finalizer = true
         elseif isexpr(stmt, :foreigncall)
             nccallargs = length(stmt.args[3]::SimpleVector)
             preserved = Int[]
@@ -824,10 +832,13 @@ function sroa_pass!(ir::IRCode)
 
         # analyze this `getfield` / `isdefined` / `setfield!` call
 
-        field = try_compute_field_stmt(compact, stmt)
-        field === nothing && continue
-
-        val = stmt.args[2]
+        if !is_finalizer
+            field = try_compute_field_stmt(compact, stmt)
+            field === nothing && continue
+            val = stmt.args[2]
+        else
+            val = stmt.args[3]
+        end
 
         struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
         if isa(struct_typ, Union) && struct_typ <: Tuple
@@ -864,14 +875,16 @@ function sroa_pass!(ir::IRCode)
                     push!(defuse.defs, idx)
                 elseif is_isdefined
                     push!(defuse.uses, IsdefinedUse(idx))
+                elseif is_finalizer
+                    push!(defuse.uses, AddFinalizerUse(idx))
                 else
                     push!(defuse.uses, GetfieldUse(idx))
                 end
                 union!(mid, intermediaries)
             end
             continue
-        elseif is_setfield
-            continue # invalid `setfield!` call, but just ignore here
+        elseif is_setfield || is_finalizer
+            continue # invalid `setfield!` or `Core.finalizer` call, but just ignore here
         elseif is_isdefined
             continue # TODO?
         end
@@ -921,7 +934,7 @@ function sroa_pass!(ir::IRCode)
         used_ssas = copy(compact.used_ssas)
         simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1)
         ir = complete(compact)
-        sroa_mutables!(ir, defuses, used_ssas, lazydomtree)
+        sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining)
         return ir
     else
         simple_dce!(compact)
@@ -929,7 +942,60 @@ function sroa_pass!(ir::IRCode)
     end
 end
 
-function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree)
+function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState)
+    code = get(inlining.mi_cache, mi, nothing)
+    if code isa CodeInstance
+        if use_const_api(code)
+            # No code in the function - Nothing to do
+            inlining.et !== nothing && push!(inlining.et, mi)
+            return true
+        end
+        src = code.inferred
+    else
+        src = code
+    end
+
+    src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[])
+    src === nothing && return false
+    src = retrieve_ir_for_inlining(mi, src)
+
+    # For now: Require finalizer to only have one basic block
+    length(src.cfg.blocks) == 1 || return false
+
+    # Ok, we're committed to inlining the finalizer
+    inlining.et !== nothing && push!(inlining.et, mi)
+
+    linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line])
+    if extra_coverage_line != 0
+        insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+    end
+
+    # TODO: Use the actual inliner here rather than open coding this special
+    # purpose inliner.
+    spvals = mi.sparam_vals
+    ssa_rename = Vector{Any}(undef, length(src.stmts))
+    for idx′ = 1:length(src.stmts)
+        urs = userefs(src[SSAValue(idx′)][:inst])
+        for ur in urs
+            if isa(ur[], SSAValue)
+                ur[] = ssa_rename[ur[].id]
+            elseif isa(ur[], Argument)
+                ur[] = argexprs[ur[].n]
+            elseif isexpr(ur[], :static_parameter)
+                ur[] = spvals[ur[].args[1]]
+            end
+        end
+        # TODO: Scan newly added statement into the sroa defuse struct
+        stmt = urs[]
+        isa(stmt, ReturnNode) && continue
+        inst = src[SSAValue(idx′)]
+        ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true)
+    end
+    return true
+end
+
+is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
@@ -952,9 +1018,72 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # error at runtime, but is not illegal to have in the IR.
         ismutabletype(typ) || continue
         typ = typ::DataType
+        # First check for any add_finalizer calls
+        add_finalizer_idx = nothing
+        for use in defuse.uses
+            if use.kind === :add_finalizer
+                # For now: Only allow one add_finalizer per allocation
+                add_finalizer_idx !== nothing && @goto skip
+                add_finalizer_idx = use.idx
+            end
+        end
+        if add_finalizer_idx !== nothing
+            # For now: Require that all uses and defs are in the same basic block,
+            # so that live range calculations are easy.
+            bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)]
+            minval::Int = typemax(Int)
+            maxval::Int = 0
+
+            check_in_range(defuse) = check_in_range(defuse.idx)
+            function check_in_range(didx::Int)
+                didx in bb.stmts || return false
+                if didx < minval
+                    minval = didx
+                end
+                if didx > maxval
+                    maxval = didx
+                end
+                return true
+            end
+
+            check_in_range(idx) || continue
+            _all(check_in_range, defuse.uses) || continue
+            _all(check_in_range, defuse.defs) || continue
+
+            # For now: Require all statements in the basic block range to be
+            # nothrow.
+            all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval)
+            all_nothrow || continue
+
+            # Ok, finalizer rewrite is legal.
+            add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst]
+            argexprs = Any[add_finalizer_stmt.args[2], add_finalizer_stmt.args[3]]
+            may_inline = add_finalizer_stmt.args[4]::Bool
+            mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing}
+            if may_inline && mi !== nothing
+                if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining)
+                    @goto done_finalizer
+                end
+                mi = compileable_specialization(inlining.et, mi, Effects()).invoke
+            end
+            if mi !== nothing
+                insert_node!(ir, maxval,
+                    NewInstruction(Expr(:invoke, mi, argexprs...), Nothing),
+                    true)
+            else
+                insert_node!(ir, maxval,
+                    NewInstruction(Expr(:call, argexprs...), Nothing),
+                    true)
+            end
+            @label done_finalizer
+            # Erase call to add_finalizer
+            ir[SSAValue(add_finalizer_idx)][:inst] = nothing
+            continue
+        end
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
         all_eliminated = all_forwarded = true
+        has_finalizer = false
         for use in defuse.uses
             if use.kind === :preserve
                 for du in fielddefuse
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 3f9a562061a12..72b4c8b829c06 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -183,4 +183,15 @@ struct ReturnTypeCallInfo
     info::Any
 end
 
+"""
+    info::FinalizerInfo
+
+Represents the information of a potential (later) call to the finalizer on the given
+object type.
+"""
+struct FinalizerInfo
+    info::Any
+    effects::Effects
+end
+
 @specialize
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 7f22916048cf7..05fb8443437ac 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -559,7 +559,7 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt
 add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
 add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
 add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0)
-add_tfunc(Core.finalizer, 2, 2, (@nospecialize args...)->Nothing, 5)
+add_tfunc(Core.finalizer, 2, 4, (@nospecialize args...)->Nothing, 5)
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
diff --git a/src/builtins.c b/src/builtins.c
index 3e7b32e45b01a..8db1fa92ec783 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -1602,7 +1602,7 @@ JL_CALLABLE(jl_f_donotdelete)
 
 JL_CALLABLE(jl_f_finalizer)
 {
-    JL_NARGS(finalizer, 2, 2);
+    JL_NARGS(finalizer, 2, 4);
     jl_task_t *ct = jl_current_task;
     jl_gc_add_finalizer_(ct->ptls, args[1], args[0]);
     return jl_nothing;
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index a75372075da06..3dac08370c123 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -1288,3 +1288,90 @@ let src = code_typed1(Tuple{Int}) do x
     end
     @test count(x -> isa(x, Core.GlobalRef) && x.name === :nothing, src.code) == 0
 end
+
+# Test that we can inline a finalizer for a struct that does not otherwise escape
+@noinline nothrow_side_effect(x) =
+    @Base.assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
+
+mutable struct DoAllocNoEscape
+    function DoAllocNoEscape()
+        finalizer(new()) do this
+            nothrow_side_effect(nothing)
+        end
+    end
+end
+
+let src = code_typed1() do
+        for i = 1:1000
+            DoAllocNoEscape()
+        end
+    end
+    @test count(isnew, src.code) == 0
+end
+
+# Test that finalizer elision doesn't cause a throw to be inlined into a function
+# that shouldn't have it
+const finalizer_should_throw = Ref{Bool}(true)
+mutable struct DoAllocFinalizerThrows
+    function DoAllocFinalizerThrows()
+        finalizer(new()) do this
+            finalizer_should_throw[] && error("Unexpected finalizer throw")
+        end
+    end
+end
+
+function f_finalizer_throws()
+    prev = GC.enable(false)
+    for i = 1:100
+        DoAllocFinalizerThrows()
+    end
+    finalizer_should_throw[] = false
+    GC.enable(prev)
+    GC.gc()
+    return true
+end
+
+@test f_finalizer_throws()
+
+# Test finalizers with static parameters
+global last_finalizer_type::Type = Any
+mutable struct DoAllocNoEscapeSparam{T}
+    x::T
+    function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T}
+        nothrow_side_effect(nothing)
+        nothrow_side_effect(T)
+    end
+    function DoAllocNoEscapeSparam{T}(x::T) where {T}
+        finalizer(finalizer_sparam, new{T}(x))
+    end
+end
+DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x)
+
+let src = code_typed1(Tuple{Any}) do x
+        for i = 1:1000
+            DoAllocNoEscapeSparam(x)
+        end
+    end
+    # This requires more inlining enhancments. For now just make sure this
+    # doesn't error.
+    @test count(isnew, src.code) in (0, 1) # == 0
+end
+
+# Test noinline finalizer
+@noinline function noinline_finalizer(d)
+    nothrow_side_effect(nothing)
+end
+mutable struct DoAllocNoEscapeNoInline
+    function DoAllocNoEscapeNoInline()
+        finalizer(noinline_finalizer, new())
+    end
+end
+
+let src = code_typed1() do
+        for i = 1:1000
+            DoAllocNoEscapeNoInline()
+        end
+    end
+    @test count(isnew, src.code) == 1
+    @test count(isinvoke(:noinline_finalizer), src.code) == 1
+end