From 1a1f3d70739020de705af7d611ee2c78d5630c7a Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 29 Nov 2021 11:21:14 +0900 Subject: [PATCH] optimizer: SROA `mutable(immutable(...))` case correctly (#43239) Our SROA should be able to handle `mutable(immutable(...))` case even without "proper" alias analysis, since we eliminate `immutable` first and then process `mutable` within our iterative approach. It turns out it didn't work because after the immutable handling we keep the reference count _before_ DCE, but didn't update dead reference count. This commit fixes it and now we should be able to eliminate more allocations, e.g.: ```julia julia> simple_sroa(s) = broadcast(identity, Ref(s)) simple_sroa (generic function with 1 method) julia> simple_sroa("compile"); @allocated(simple_sroa("julia")) 0 ``` --- base/compiler/ssair/ir.jl | 12 +++++++---- base/compiler/ssair/passes.jl | 10 ++++----- test/compiler/irpasses.jl | 38 +++++++++++++++++++++++++++++++++-- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index d8b4e0c084d50..3838f8d6ec6ab 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1311,7 +1311,9 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= compact.result[old_result_idx][:inst]), (compact.idx, active_bb) end -function maybe_erase_unused!(extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, callback = x::SSAValue->nothing) +function maybe_erase_unused!( + extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, + callback = null_dce_callback) stmt = compact.result[idx][:inst] stmt === nothing && return false if compact_exprtype(compact, SSAValue(idx)) === Bottom @@ -1404,19 +1406,21 @@ function just_fixup!(compact::IncrementalCompact) end end -function simple_dce!(compact::IncrementalCompact) +function simple_dce!(compact::IncrementalCompact, callback = null_dce_callback) # Perform simple DCE for unused values extra_worklist = Int[] for (idx, nused) in Iterators.enumerate(compact.used_ssas) idx >= compact.result_idx && break nused == 0 || continue - maybe_erase_unused!(extra_worklist, compact, idx) + maybe_erase_unused!(extra_worklist, compact, idx, callback) end while !isempty(extra_worklist) - maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist)) + maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist), callback) end end +null_dce_callback(x::SSAValue) = return + function non_dce_finish!(compact::IncrementalCompact) result_idx = compact.result_idx resize!(compact.result, result_idx - 1) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 6bdb162e1f4cf..d1be326529112 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -790,9 +790,10 @@ function sroa_pass!(ir::IRCode) # now go through analyzed mutable structs and see which ones we can eliminate # NOTE copy the use count here, because `simple_dce!` may modify it and we need it # consistent with the state of the IR here (after tracking `PhiNode` arguments, - # but before the DCE) for our predicate within `sroa_mutables!` + # but before the DCE) for our predicate within `sroa_mutables!`, but we also + # try an extra effort using a callback so that reference counts are updated used_ssas = copy(compact.used_ssas) - simple_dce!(compact) + simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) sroa_mutables!(ir, defuses, used_ssas) return ir @@ -862,10 +863,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse isempty(du.uses) && continue push!(du.defs, newidx) ldu = compute_live_ins(ir.cfg, du) - phiblocks = Int[] - if !isempty(ldu.live_in_bbs) - phiblocks = iterated_dominance_frontier(ir.cfg, ldu, domtree) - end + phiblocks = isempty(ldu.live_in_bbs) ? Int[] : iterated_dominance_frontier(ir.cfg, ldu, domtree) allblocks = sort(vcat(phiblocks, ldu.def_bbs)) blocks[fidx] = phiblocks, allblocks if fidx + 1 > length(defexpr.args) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 9c5e3a41770bb..2151d938b525f 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -227,14 +227,40 @@ let src = code_typed1((Any,Any,Any)) do x, y, z x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)] end end -# FIXME our analysis isn't yet so powerful at this moment, e.g. it can't handle nested mutable objects + +# FIXME our analysis isn't yet so powerful at this moment: may be unable to handle nested objects well +# OK: mutable(immutable(...)) case +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + t = (xyz,) + v = t[1].x + v, v, v + end + @test !any(isnew, src.code) +end let src = code_typed1((Any,Any,Any)) do x, y, z xyz = MutableXYZ(x, y, z) outer = ImmutableOuter(xyz, xyz, xyz) outer.x.x, outer.y.y, outer.z.z end - @test_broken !any(isnew, src.code) + @test !any(isnew, src.code) + @test any(src.code) do @nospecialize x + iscall((src, tuple), x) && + x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)] + end +end +let # this is a simple end to end test case, which demonstrates allocation elimination + # by handling `mutable[RefValue{String}](immutable[Tuple](...))` case correctly + # NOTE this test case isn't so robust and might be subject to future changes of the broadcasting implementation, + # in that case you don't really need to stick to keeping this test case around + simple_sroa(s) = broadcast(identity, Ref(s)) + s = Base.inferencebarrier("julia")::String + simple_sroa(s) + # NOTE don't hard-code `"julia"` in `@allocated` clause and make sure to execute the + # compiled code for `simple_sroa`, otherwise everything can be folded even without SROA + @test @allocated(simple_sroa(s)) == 0 end +# FIXME: immutable(mutable(...)) case let src = code_typed1((Any,Any,Any)) do x, y, z xyz = ImmutableXYZ(x, y, z) outer = MutableOuter(xyz, xyz, xyz) @@ -242,6 +268,14 @@ let src = code_typed1((Any,Any,Any)) do x, y, z end @test_broken !any(isnew, src.code) end +# FIXME: mutable(mutable(...)) case +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + outer = MutableOuter(xyz, xyz, xyz) + outer.x.x, outer.y.y, outer.z.z + end + @test_broken !any(isnew, src.code) +end # should work nicely with inlining to optimize away a complicated case # adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B