diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 4f7626d2f65ade..8c41ff9d0f0e45 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -325,7 +325,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @timeit "compact 2" ir = compact!(ir) - @timeit "SROA" ir = getfield_elim_pass!(ir) + @timeit "SROA" ir = sroa_pass!(ir) @timeit "ADCE" ir = adce_pass!(ir) @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 0ba1c38fc72bb7..e4e0d1939ed8c0 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -75,7 +75,22 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector end function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) - # Find the first dominating def + def, stmtblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use_idx) + if def == 0 + if !haskey(phinodes, curblock) + # If this happens, we need to search the predecessors for defs. Which + # one doesn't matter - if it did, we'd have had a phinode + return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) + end + # The use is the phinode + return phinodes[curblock] + else + return val_for_def_expr(ir, def, fidx) + end +end + +# find the first dominating def for the given use +function find_def_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use_idx::Int) stmtblock = block_for_inst(ir.cfg, use_idx) curblock = find_curblock(domtree, allblocks, stmtblock) local def = 0 @@ -90,17 +105,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I end end end - if def == 0 - if !haskey(phinodes, curblock) - # If this happens, we need to search the predecessors for defs. Which - # one doesn't matter - if it did, we'd have had a phinode - return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) - end - # The use is the phinode - return phinodes[curblock] - else - return val_for_def_expr(ir, def, fidx) - end + return def, stmtblock, curblock end function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), @@ -538,7 +543,7 @@ function perform_lifting!(compact::IncrementalCompact, end """ - getfield_elim_pass!(ir::IRCode) -> newir::IRCode + sroa_pass!(ir::IRCode) -> newir::IRCode `getfield` elimination pass, a.k.a. Scalar Replacements of Aggregates optimization. @@ -555,7 +560,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of dead code elimination. """ -function getfield_elim_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode) compact = IncrementalCompact(ir) defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}() lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -784,7 +789,6 @@ function getfield_elim_pass!(ir::IRCode) typ = typ::DataType # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] - ok = true for use in defuse.uses stmt = ir[SSAValue(use)] # We may have discovered above that this use is dead @@ -793,39 +797,52 @@ function getfield_elim_pass!(ir::IRCode) # the use in that case. stmt === nothing && continue field = try_compute_fieldidx_stmt(compact, stmt::Expr, typ) - field === nothing && (ok = false; break) + field === nothing && @goto skip push!(fielddefuse[field].uses, use) end - ok || continue for use in defuse.defs field = try_compute_fieldidx_stmt(compact, ir[SSAValue(use)]::Expr, typ) - field === nothing && (ok = false; break) + field === nothing && @goto skip push!(fielddefuse[field].defs, use) end - ok || continue # Check that the defexpr has defined values for all the fields # we're accessing. In the future, we may want to relax this, # but we should come up with semantics for well defined semantics # for uninitialized fields first. - for (fidx, du) in pairs(fielddefuse) + ndefuse = length(fielddefuse) + blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse) + for fidx in 1:ndefuse + du = fielddefuse[fidx] isempty(du.uses) && continue + push!(du.defs, idx) + ldu = compute_live_ins(ir.cfg, du) + phiblocks = Int[] + if !isempty(ldu.live_in_bbs) + phiblocks = idf(ir.cfg, ldu, domtree) + end + allblocks = sort(vcat(phiblocks, ldu.def_bbs)) + blocks[fidx] = phiblocks, allblocks if fidx + 1 > length(defexpr.args) - ok = false - break + # even if the allocation contains an uninitialized field, we try an extra effort + # to check if all uses have any "solid" `setfield!` calls that define the field + # although we give up the cases below: + # `def == idx`: this field can only defined at the allocation site (thus this case will throw) + # `def == 0`: this field comes from `PhiNode` + # we may be able to traverse control flows of PhiNode values, but it sounds + # more complicated than beneficial under the current implementation + for use in du.uses + def = find_def_for_use(ir, domtree, allblocks, du, use)[1] + (def == 0 || def == idx) && @goto skip + end end end - ok || continue preserve_uses = IdDict{Int, Vector{Any}}((idx=>Any[] for idx in IdSet{Int}(defuse.ccall_preserve_uses))) # Everything accounted for. Go field by field and perform idf - for (fidx, du) in pairs(fielddefuse) + for fidx in 1:ndefuse + du = fielddefuse[fidx] ftyp = fieldtype(typ, fidx) if !isempty(du.uses) - push!(du.defs, idx) - ldu = compute_live_ins(ir.cfg, du) - phiblocks = Int[] - if !isempty(ldu.live_in_bbs) - phiblocks = idf(ir.cfg, ldu, domtree) - end + phiblocks, allblocks = blocks[fidx] phinodes = IdDict{Int, SSAValue}() for b in phiblocks n = PhiNode() @@ -833,7 +850,6 @@ function getfield_elim_pass!(ir::IRCode) NewInstruction(n, ftyp)) end # Now go through all uses and rewrite them - allblocks = sort(vcat(phiblocks, ldu.def_bbs)) for stmt in du.uses ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt) end @@ -855,7 +871,6 @@ function getfield_elim_pass!(ir::IRCode) stmt == idx && continue ir[SSAValue(stmt)] = nothing end - continue end isempty(defuse.ccall_preserve_uses) && continue push!(intermediaries, idx) @@ -870,6 +885,8 @@ function getfield_elim_pass!(ir::IRCode) old_preserves..., new_preserves...) ir[SSAValue(use)] = new_expr end + + @label skip end return ir @@ -919,14 +936,14 @@ In addition to a simple DCE for unused values and allocations, this pass also nullifies `typeassert` calls that can be proved to be no-op, in order to allow LLVM to emit simpler code down the road. -Note that this pass is more effective after SROA optimization (i.e. `getfield_elim_pass!`), +Note that this pass is more effective after SROA optimization (i.e. `sroa_pass!`), since SROA often allows this pass to: - eliminate allocation of object whose field references are all replaced with scalar values, and - nullify `typeassert` call whose first operand has been replaced with a scalar value (, which may have introduced new type information that inference did not understand) -Also note that currently this pass _needs_ to run after `getfield_elim_pass!`, because -the `typeassert` elimination depends on the transformation within `getfield_elim_pass!` +Also note that currently this pass _needs_ to run after `sroa_pass!`, because +the `typeassert` elimination depends on the transformation within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`. """ function adce_pass!(ir::IRCode) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index c2f05ea4db297c..aac9da83deebe5 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -69,6 +69,171 @@ end # Tests for SROA +import Core.Compiler: argextype, singleton_type +const EMPTY_SPTYPES = Core.Compiler.EMPTY_SLOTTYPES + +code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo +get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code + +# check if `x` is a statement with a given `head` +isnew(@nospecialize x) = Meta.isexpr(x, :new) + +# check if `x` is a dynamic call of a given function +iscall(y) = @nospecialize(x) -> iscall(y, x) +function iscall((src, f)::Tuple{Core.CodeInfo,Function}, @nospecialize(x)) + return iscall(x) do @nospecialize x + singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f + end +end +iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1]) + +struct ImmutableXYZ; x; y; z; end +mutable struct MutableXYZ; x; y; z; end + +# should optimize away very basic cases +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = ImmutableXYZ(x, y, z) + xyz.x, xyz.y, xyz.z + end + @test !any(isnew, src.code) +end +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + xyz.x, xyz.y, xyz.z + end + @test !any(isnew, src.code) +end + +# should handle simple mutabilities +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + xyz.y = 42 + xyz.x, xyz.y, xyz.z + end + @test !any(isnew, src.code) + @test any(src.code) do @nospecialize x + iscall((src, tuple), x) && + x.args[2:end] == Any[#=x=# Core.Argument(2), 42, #=x=# Core.Argument(4)] + end +end +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + xyz.x, xyz.z = xyz.z, xyz.x + xyz.x, xyz.y, xyz.z + end + @test !any(isnew, src.code) + @test any(src.code) do @nospecialize x + iscall((src, tuple), x) && + x.args[2:end] == Any[#=z=# Core.Argument(4), #=y=# Core.Argument(3), #=x=# Core.Argument(2)] + end +end +# circumvent uninitialized fields as far as there is a solid `setfield!` definition +let src = code_typed1() do + r = Ref{Any}() + r[] = 42 + return r[] + end + @test !any(isnew, src.code) +end +let src = code_typed1((Bool,)) do cond + r = Ref{Any}() + if cond + r[] = 42 + return r[] + else + r[] = 32 + return r[] + end + end + @test !any(isnew, src.code) +end +# FIXME to handle this case, we need a more strong alias analysis +let src = code_typed1((Bool,)) do cond + r = Ref{Any}() + if cond + r[] = 42 + else + r[] = 32 + end + return r[] + end + @test_broken !any(isnew, src.code) +end +let src = code_typed1((Bool,)) do cond + r = Ref{Any}() + if cond + r[] = 42 + end + return r[] + end + # N.B. `r` should be allocated since `cond` might be `false` and then it will be thrown + @test any(isnew, src.code) +end + +# should include a simple alias analysis +struct ImmutableOuter{T}; x::T; y::T; z::T; end +mutable struct MutableOuter{T}; x::T; y::T; z::T; end +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = ImmutableXYZ(x, y, z) + outer = ImmutableOuter(xyz, xyz, xyz) + outer.x.x, outer.y.y, outer.z.z + end + @test !any(src.code) do @nospecialize x + Meta.isexpr(x, :new) + end + @test any(src.code) do @nospecialize x + iscall((src, tuple), x) && + x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)] + end +end +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = ImmutableXYZ(x, y, z) + # #42831 forms ::PartialStruct(ImmutableOuter{Any}, Any[ImmutableXYZ, ImmutableXYZ, ImmutableXYZ]) + # so the succeeding `getproperty`s are type stable and inlined + outer = ImmutableOuter{Any}(xyz, xyz, xyz) + outer.x.x, outer.y.y, outer.z.z + end + @test !any(isnew, src.code) + @test any(src.code) do @nospecialize x + iscall((src, tuple), x) && + x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)] + end +end +# FIXME our analysis isn't yet so powerful at this moment, e.g. it can't handle nested mutable objects +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = MutableXYZ(x, y, z) + outer = ImmutableOuter(xyz, xyz, xyz) + outer.x.x, outer.y.y, outer.z.z + end + @test_broken !any(isnew, src.code) +end +let src = code_typed1((Any,Any,Any)) do x, y, z + xyz = ImmutableXYZ(x, y, z) + outer = MutableOuter(xyz, xyz, xyz) + outer.x.x, outer.y.y, outer.z.z + end + @test_broken !any(isnew, src.code) +end + +# should work nicely with inlining to optimize away a complicated case +# adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B +struct Point + x::Float64 + y::Float64 +end +#=@inline=# add(a::Point, b::Point) = Point(a.x + b.x, a.y + b.y) +function compute() + a = Point(1.5, 2.5) + b = Point(2.25, 4.75) + for i in 0:(100000000-1) + a = add(add(a, b), b) + end + a.x, a.y +end +let src = code_typed1(compute) + @test !any(isnew, src.code) +end + mutable struct Foo30594; x::Float64; end Base.copy(x::Foo30594) = Foo30594(x.x) function add!(p::Foo30594, off::Foo30594) @@ -180,7 +345,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any]) @test Core.Compiler.verify_ir(ir) === nothing - ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir) + ir = @test_nowarn Core.Compiler.sroa_pass!(ir) @test Core.Compiler.verify_ir(ir) === nothing end @@ -384,7 +549,7 @@ exc39508 = ErrorException("expected") end @test test39508() === exc39508 -let # `getfield_elim_pass!` should work with constant globals +let # `sroa_pass!` should work with constant globals # immutable pass src = @eval Module() begin const REF_FLD = :x