From ce70402ca1cd1a42e9563ec54e4cd25463062049 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 24 Apr 2018 13:39:43 +0200 Subject: [PATCH] [NewOptimizer] Union Split during Inlining This adds union splitting optimizations to the inliner. This works a bit differently from the old optimizer, which did union splitting on the bail out path. Instead, we try to run the full inliner on any union split case that's a dispatch tuple (and record what to do in that case). Doing that allows an effective resolution of #23338, though this PR doesn't quite do that yet, since some further (minor) fixes are needed. --- base/compiler/compiler.jl | 9 - base/compiler/ssair/domtree.jl | 2 + base/compiler/ssair/driver.jl | 3 +- base/compiler/ssair/inlining2.jl | 1019 +++++++++++++++++++----------- base/compiler/ssair/show.jl | 22 +- base/compiler/ssair/verify.jl | 22 +- base/compiler/utilities.jl | 13 + 7 files changed, 702 insertions(+), 388 deletions(-) diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 80879dd66b5d8..faf95ce00544a 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -101,15 +101,6 @@ using .Sort # compiler # ############ -inlining_enabled() = (JLOptions().can_inline == 1) -coverage_enabled() = (JLOptions().code_coverage != 0) -function inbounds_option() - opt_check_bounds = JLOptions().check_bounds - opt_check_bounds == 0 && return :default - opt_check_bounds == 1 && return :on - return :off -end - include("compiler/utilities.jl") include("compiler/validation.jl") diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 38e1e29d0c6ec..6a65aac823cf7 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -23,6 +23,8 @@ function dominates(domtree::DomTree, bb1::Int, bb2::Int) return bb1 == bb2 end +bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1 + function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int) domtree[node] = DomTreeNode(level, domtree[node].children) foreach(domtree[node].children) do child diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index cb42408d6973b..18dfb82161653 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -169,6 +169,7 @@ function run_passes(ci::CodeInfo, nargs::Int, linetable::Vector{LineInfoNode}, s @timeit "compact 2" ir = compact!(ir) @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) - #@timeit "verify 3" (verify_ir(ir); verify_linetable(linetable)) + #@Base.show ir + @timeit "verify 3" (verify_ir(ir); verify_linetable(linetable)) return ir end diff --git a/base/compiler/ssair/inlining2.jl b/base/compiler/ssair/inlining2.jl index 085cb2733fb83..39e673070b6a8 100644 --- a/base/compiler/ssair/inlining2.jl +++ b/base/compiler/ssair/inlining2.jl @@ -8,6 +8,7 @@ struct InliningTodo na::Int method::Method # The method being inlined sparams::Vector{Any} # The static parameters we computed for this call site + metharg::Any # The LineTable and IR of the inlinee linetable::Vector{LineInfoNode} ir::IRCode @@ -16,6 +17,29 @@ struct InliningTodo linear_inline_eligible::Bool end +struct ConstantCase + val::Any + method::Method + sparams::Vector{Any} + metharg::Any +end + +struct DynamicCase + method::Method + sparams::Vector{Any} + metharg::Any +end + +struct UnionSplit + idx::Int # The statement to replace + fully_covered::Bool + atype::Any + isinvoke::Bool + cases::Vector{Pair{Type, Any}} + bbs::Vector{Int} +end +UnionSplit(idx, fully_covered, atype, isinvoke, cases) = UnionSplit(idx, fully_covered, atype, isinvoke, cases, Int[]) + function ssa_inlining_pass!(ir::IRCode, linetable::Vector{LineInfoNode}, sv::OptimizationState) # Go through the function, perfoming simple ininlingin (e.g. replacing call by constants # and analyzing legality of inlining). @@ -26,169 +50,406 @@ function ssa_inlining_pass!(ir::IRCode, linetable::Vector{LineInfoNode}, sv::Opt return ir end -function batch_inline!(todo::Vector{InliningTodo}, ir::IRCode, linetable::Vector{LineInfoNode}, sv::OptimizationState) - # Compute the new CFG first (modulo statement ranges, which will be computed below) - new_cfg_blocks = BasicBlock[] - inserted_block_ranges = UnitRange{Int}[] - todo_bbs = Tuple{Int, Int}[] - first_bb = 0 - bb_rename = zeros(Int, length(ir.cfg.blocks)) - split_targets = BitSet() - merged_orig_blocks = BitSet() - boundscheck = inbounds_option() - if boundscheck === :default && sv.src.propagate_inbounds - boundscheck = :propagate +mutable struct CFGInliningState + new_cfg_blocks::Vector{BasicBlock} + inserted_block_ranges::Vector{UnitRange{Int}} + todo_bbs::Vector{Tuple{Int, Int}} + first_bb::Int + bb_rename::Vector{Int} + split_targets::BitSet + merged_orig_blocks::BitSet + cfg::CFG +end + +function CFGInliningState(ir::IRCode) + CFGInliningState( + BasicBlock[], + UnitRange{Int}[], + Tuple{Int, Int}[], + 0, + zeros(Int, length(ir.cfg.blocks)), + BitSet(), + BitSet(), + ir.cfg + ) +end + +# Tells the inliner that we're now inlining into block `block`, meaning +# all previous blocks have been proceesed and can be added to the new cfg +function inline_into_block!(state::CFGInliningState, block::Int) + if state.first_bb != block + new_range = state.first_bb+1:block + l = length(state.new_cfg_blocks) + state.bb_rename[new_range] = (l+1:l+length(new_range)) + append!(state.new_cfg_blocks, map(copy, state.cfg.blocks[new_range])) + push!(state.merged_orig_blocks, last(new_range)) end - for item in todo - local idx, ir2, lie - # A linear inline does not modify the CFG - item.linear_inline_eligible && continue - inlinee_cfg = item.ir.cfg - # Figure out if we need to split the BB - need_split_before = false - need_split = true - block = block_for_inst(ir.cfg, item.idx) - last_block_idx = last(ir.cfg.blocks[block].stmts) - - if !isempty(inlinee_cfg.blocks[1].preds) - need_split_before = true - end - - if first_bb != block - new_range = first_bb+1:block - bb_rename[new_range] = (1+length(new_cfg_blocks)):(length(new_range)+length(new_cfg_blocks)) - append!(new_cfg_blocks, map(copy, ir.cfg.blocks[new_range])) - push!(merged_orig_blocks, last(new_range)) - end - first_bb = block - if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode)) - need_split = false - post_bb_id = -ir[SSAValue(last_block_idx)].label - else - post_bb_id = length(new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0) - need_split = true #!(idx == last_block_idx) - end + state.first_bb = block + return +end - if !need_split - delete!(merged_orig_blocks, last(new_range)) - end +function cfg_inline_item!(item::InliningTodo, state::CFGInliningState, from_unionsplit::Bool=false) + inlinee_cfg = item.ir.cfg + # Figure out if we need to split the BB + need_split_before = false + need_split = true + block = block_for_inst(state.cfg, item.idx) + inline_into_block!(state, block) - push!(todo_bbs, (length(new_cfg_blocks) - 1 + (need_split_before ? 1 : 0), post_bb_id)) + if !isempty(inlinee_cfg.blocks[1].preds) + need_split_before = true + end - delete!(split_targets, length(new_cfg_blocks)) - orig_succs = copy(new_cfg_blocks[end].succs) - empty!(new_cfg_blocks[end].succs) - if need_split_before - bb_rename_range = (1+length(new_cfg_blocks)):(length(inlinee_cfg.blocks)+length(new_cfg_blocks)) - push!(new_cfg_blocks[end].succs, length(new_cfg_blocks)+1) - append!(new_cfg_blocks, inlinee_cfg.blocks) - else - # Merge the last block that was already there with the first block we're adding - bb_rename_range = length(new_cfg_blocks):(length(inlinee_cfg.blocks)+length(new_cfg_blocks)-1) - append!(new_cfg_blocks[end].succs, inlinee_cfg.blocks[1].succs) - append!(new_cfg_blocks, inlinee_cfg.blocks[2:end]) - end - if need_split - push!(new_cfg_blocks, BasicBlock(ir.cfg.blocks[block].stmts, - Int[], orig_succs)) - push!(split_targets, length(new_cfg_blocks)) - end - new_block_range = (length(new_cfg_blocks)-length(inlinee_cfg.blocks)+1):length(new_cfg_blocks) - push!(inserted_block_ranges, new_block_range) - - # Fixup the edges of the newely added blocks - for (old_block, new_block) in enumerate(bb_rename_range) - if old_block != 1 || need_split_before - p = new_cfg_blocks[new_block].preds - map!(p, p) do old_pred_block - return bb_rename_range[old_pred_block] - end + last_block_idx = last(state.cfg.blocks[block].stmts) + if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode)) + need_split = false + post_bb_id = -ir[SSAValue(last_block_idx)].label + else + post_bb_id = length(state.new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0) + need_split = true #!(idx == last_block_idx) + end + + if !need_split + delete!(state.merged_orig_blocks, last(new_range)) + end + + push!(state.todo_bbs, (length(state.new_cfg_blocks) - 1 + (need_split_before ? 1 : 0), post_bb_id)) + + from_unionsplit || delete!(state.split_targets, length(state.new_cfg_blocks)) + orig_succs = copy(state.new_cfg_blocks[end].succs) + empty!(state.new_cfg_blocks[end].succs) + if need_split_before + l = length(state.new_cfg_blocks) + bb_rename_range = (1+l:length(inlinee_cfg.blocks)+l) + push!(state.new_cfg_blocks[end].succs, length(state.new_cfg_blocks)+1) + append!(state.new_cfg_blocks, inlinee_cfg.blocks) + else + # Merge the last block that was already there with the first block we're adding + l = length(state.new_cfg_blocks) + bb_rename_range = (l:length(inlinee_cfg.blocks)+l-1) + append!(state.new_cfg_blocks[end].succs, inlinee_cfg.blocks[1].succs) + append!(state.new_cfg_blocks, inlinee_cfg.blocks[2:end]) + end + if need_split + push!(state.new_cfg_blocks, BasicBlock(state.cfg.blocks[block].stmts, + Int[], orig_succs)) + from_unionsplit || push!(state.split_targets, length(state.new_cfg_blocks)) + end + new_block_range = (length(state.new_cfg_blocks)-length(inlinee_cfg.blocks)+1):length(state.new_cfg_blocks) + push!(state.inserted_block_ranges, new_block_range) + + # Fixup the edges of the newely added blocks + for (old_block, new_block) in enumerate(bb_rename_range) + if old_block != 1 || need_split_before + p = state.new_cfg_blocks[new_block].preds + map!(p, p) do old_pred_block + return bb_rename_range[old_pred_block] end - if new_block != last(new_block_range) - s = new_cfg_blocks[new_block].succs - map!(s, s) do old_succ_block - return bb_rename_range[old_succ_block] - end + end + if new_block != last(new_block_range) + s = state.new_cfg_blocks[new_block].succs + map!(s, s) do old_succ_block + return bb_rename_range[old_succ_block] end end + end - if need_split_before - push!(new_cfg_blocks[first(bb_rename_range)].preds, first(bb_rename_range)-1) - end + if need_split_before + push!(state.new_cfg_blocks[first(bb_rename_range)].preds, first(bb_rename_range)-1) + end - for (old_block, new_block) in enumerate(bb_rename_range) - if (length(new_cfg_blocks[new_block].succs) == 0) - terminator_idx = last(inlinee_cfg.blocks[old_block].stmts) - terminator = item.ir[SSAValue(terminator_idx)] - if isa(terminator, ReturnNode) && isdefined(terminator, :val) - push!(new_cfg_blocks[new_block].succs, post_bb_id) - if need_split - push!(new_cfg_blocks[post_bb_id].preds, new_block) - end + for (old_block, new_block) in enumerate(bb_rename_range) + if (length(state.new_cfg_blocks[new_block].succs) == 0) + terminator_idx = last(inlinee_cfg.blocks[old_block].stmts) + terminator = item.ir[SSAValue(terminator_idx)] + if isa(terminator, ReturnNode) && isdefined(terminator, :val) + push!(state.new_cfg_blocks[new_block].succs, post_bb_id) + if need_split + push!(state.new_cfg_blocks[post_bb_id].preds, new_block) end end end end - new_range = (first_bb + 1):length(ir.cfg.blocks) - bb_rename[new_range] = (1+length(new_cfg_blocks)):(length(new_range)+length(new_cfg_blocks)) - append!(new_cfg_blocks, ir.cfg.blocks[new_range]) +end + +function cfg_inline_unionsplit!(item::UnionSplit, state::CFGInliningState) + block = block_for_inst(state.cfg, item.idx) + inline_into_block!(state, block) + from_bbs = Int[] + delete!(state.split_targets, length(state.new_cfg_blocks)) + orig_succs = copy(state.new_cfg_blocks[end].succs) + empty!(state.new_cfg_blocks[end].succs) + for (i, (_, case)) in enumerate(item.cases) + # The condition gets sunk into the previous block + # Add a block for the union-split body + push!(state.new_cfg_blocks, BasicBlock(StmtRange(item.idx, item.idx))) + cond_bb = length(state.new_cfg_blocks)-1 + push!(state.new_cfg_blocks[end].preds, cond_bb) + push!(state.new_cfg_blocks[cond_bb].succs, cond_bb+1) + if isa(case, InliningTodo) && !case.linear_inline_eligible + cfg_inline_item!(case, state, true) + end + bb = length(state.new_cfg_blocks) + push!(from_bbs, bb) + # TODO: Right now we unconditionally generate a fallback block + # in case of subtyping errors - This is probably unnecessary. + if true # i != length(item.cases) || !item.fully_covered + # This block will have the next condition or the final else case + push!(state.new_cfg_blocks, BasicBlock(StmtRange(item.idx, item.idx))) + push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks)) + push!(state.new_cfg_blocks[end].preds, cond_bb) + push!(item.bbs, length(state.new_cfg_blocks)) + end + end + # The edge from the fallback block. + if !item.fully_covered + push!(from_bbs, length(state.new_cfg_blocks)) + end + # This block will be the block everyone returns to + push!(state.new_cfg_blocks, BasicBlock(StmtRange(item.idx, item.idx), from_bbs, orig_succs)) + join_bb = length(state.new_cfg_blocks) + push!(state.split_targets, join_bb) + push!(item.bbs, join_bb) + for bb in from_bbs + push!(state.new_cfg_blocks[bb].succs, join_bb) + end +end + +function finish_cfg_inline!(state::CFGInliningState) + new_range = (state.first_bb + 1):length(state.cfg.blocks) + l = length(state.new_cfg_blocks) + state.bb_rename[new_range] = (l+1:l+length(new_range)) + append!(state.new_cfg_blocks, state.cfg.blocks[new_range]) # Rename edges original bbs - for (orig_bb, bb) in pairs(bb_rename) - p, s = new_cfg_blocks[bb].preds, new_cfg_blocks[bb].succs + for (orig_bb, bb) in pairs(state.bb_rename) + p, s = state.new_cfg_blocks[bb].preds, state.new_cfg_blocks[bb].succs map!(p, p) do pred_bb - pred_bb == length(bb_rename) && return length(new_cfg_blocks) - return bb_rename[pred_bb + 1] - 1 + pred_bb == length(state.bb_rename) && return length(state.new_cfg_blocks) + return state.bb_rename[pred_bb + 1] - 1 end - if !(orig_bb in merged_orig_blocks) + if !(orig_bb in state.merged_orig_blocks) map!(s, s) do succ_bb - return bb_rename[succ_bb] + return state.bb_rename[succ_bb] end end end - for bb in collect(split_targets) - s = new_cfg_blocks[bb].succs + for bb in collect(state.split_targets) + s = state.new_cfg_blocks[bb].succs map!(s, s) do succ_bb - return bb_rename[succ_bb] + return state.bb_rename[succ_bb] end end # Rename any annotated original bb references - for bb in 1:length(new_cfg_blocks) - s = new_cfg_blocks[bb].succs + for bb in 1:length(state.new_cfg_blocks) + s = state.new_cfg_blocks[bb].succs map!(s, s) do succ_bb - return succ_bb < 0 ? bb_rename[-succ_bb] : succ_bb + return succ_bb < 0 ? state.bb_rename[-succ_bb] : succ_bb end end +end + +function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, + linetable::Vector{LineInfoNode}, item::InliningTodo, + boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + # Ok, do the inlining here + inline_cfg = item.ir.cfg + stmt = compact.result[idx] + linetable_offset = length(linetable) + # Append the linetable of the inlined function to our line table + inlined_at = compact.result_lines[idx] + for entry in item.linetable + push!(linetable, LineInfoNode(entry.mod, entry.method, entry.file, entry.line, + (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at))) + end + if item.isva + vararg = mk_tuplecall!(compact, argexprs[item.na:end], compact.result_lines[idx]) + argexprs = Any[argexprs[1:(item.na - 1)]..., vararg] + end + flag = compact.result_flags[idx] + boundscheck_idx = boundscheck + if boundscheck_idx === :default || boundscheck_idx === :propagate + if (flag & IR_FLAG_INBOUNDS) != 0 + boundscheck_idx = :off + end + end + # If the iterator already moved on to the next basic block, + # temorarily re-open in again. + local return_value + # Special case inlining that maintains the current basic block if there's only one BB in the target + if item.linear_inline_eligible + terminator = item.ir[SSAValue(last(inline_cfg.blocks[1].stmts))] + #compact[idx] = nothing + inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx) + for (idx′, stmt′) in inline_compact + # This dance is done to maintain accurate usage counts in the + # face of rename_arguments! mutating in place - should figure out + # something better eventually. + inline_compact[idx′] = nothing + stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.method.sig, item.sparams, linetable_offset, boundscheck_idx, compact) + if isa(stmt′, ReturnNode) + isa(stmt′.val, SSAValue) && (compact.used_ssas[stmt′.val.id] += 1) + return_value = stmt′.val + stmt′ = nothing + end + inline_compact[idx′] = stmt′ + end + just_fixup!(inline_compact) + compact.result_idx = inline_compact.result_idx + else + bb_offset, post_bb_id = popfirst!(todo_bbs) + # This implements the need_split_before flag above + need_split_before = !isempty(item.ir.cfg.blocks[1].preds) + if need_split_before + finish_current_bb!(compact) + end + pn = PhiNode() + #compact[idx] = nothing + inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx) + for (idx′, stmt′) in inline_compact + inline_compact[idx′] = nothing + stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.method.sig, item.sparams, linetable_offset, boundscheck_idx, compact) + if isa(stmt′, ReturnNode) + if isdefined(stmt′, :val) + push!(pn.edges, inline_compact.active_result_bb-1) + push!(pn.values, stmt′.val) + stmt′ = GotoNode(post_bb_id) + end + elseif isa(stmt′, GotoNode) + stmt′ = GotoNode(stmt′.label + bb_offset) + elseif isa(stmt′, Expr) && stmt′.head == :enter + stmt′ = Expr(:enter, stmt′.args[1] + bb_offset) + elseif isa(stmt′, GotoIfNot) + stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset) + elseif isa(stmt′, PhiNode) + stmt′ = PhiNode(Any[edge+bb_offset for edge in stmt′.edges], stmt′.values) + end + inline_compact[idx′] = stmt′ + end + just_fixup!(inline_compact) + compact.result_idx = inline_compact.result_idx + compact.active_result_bb = inline_compact.active_result_bb + for i = 1:length(pn.values) + isassigned(pn.values, i) || continue + if isa(pn.values[i], SSAValue) + compact.used_ssas[pn.values[i].id] += 1 + end + end + if length(pn.edges) == 1 + return_value = pn.values[1] + else + return_value = insert_node_here!(compact, pn, stmt.typ, compact.result_lines[idx]) + end + end + return_value +end + +function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, + argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, + item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + stmt, typ, line = compact.result[idx], compact.result_types[idx], compact.result_lines[idx] + atype = item.atype + generic_bb = item.bbs[end-1] + join_bb = item.bbs[end] + bb = compact.active_result_bb + pn = PhiNode() + has_generic = false + @assert length(item.bbs) > length(item.cases) + for ((metharg, case), next_cond_bb) in zip(item.cases, item.bbs) + @assert !isa(metharg, UnionAll) + cond = true + @assert length(atype.parameters) == length(metharg.parameters) + for i in 2:length(atype.parameters) + a, m = atype.parameters[i], metharg.parameters[i] + # If this is always true, we don't need to check for it + a <: m && continue + # Generate isa check + isa_expr = Expr(:call, isa, argexprs[i], m) + isa_expr.typ = Bool + ssa = insert_node_here!(compact, isa_expr, Bool, line) + if cond === true + cond = ssa + else + and_expr = Expr(:call, and_int, cond, ssa) + cond = insert_node_here!(compact, and_expr, Bool, line) + end + end + insert_node_here!(compact, GotoIfNot(cond, next_cond_bb), Union{}, line) + bb = next_cond_bb - 1 + finish_current_bb!(compact) + # Insert Pi nodes here + if isa(case, InliningTodo) + val = ir_inline_item!(compact, idx, argexprs, linetable, case, boundscheck, todo_bbs) + elseif isa(case, MethodInstance) + val = insert_node_here!(compact, Expr(:invoke, case, argexprs...), typ, line) + else + case = case::ConstantCase + val = case.val + end + push!(pn.edges, bb) + push!(pn.values, val) + insert_node_here!(compact, GotoNode(join_bb), Union{}, line) + finish_current_bb!(compact) + end + bb += 1 + # We're now in the fall through block, decide what to do + if item.fully_covered + e = Expr(:call, :error, "fatal error in type inference (type bound)") + e.typ = Union{} + insert_node_here!(compact, e, Union{}, line) + insert_node_here!(compact, ReturnNode(), Union{}, line) + finish_current_bb!(compact) + else + ssa = insert_node_here!(compact, stmt, typ, line) + push!(pn.edges, bb) + push!(pn.values, ssa) + insert_node_here!(compact, GotoNode(join_bb), Union{}, line) + finish_current_bb!(compact) + end + + # We're now in the join block. + compact.ssa_rename[compact.idx-1] = insert_node_here!(compact, pn, typ, line) +end + +function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfoNode}, sv::OptimizationState) + # Compute the new CFG first (modulo statement ranges, which will be computed below) + state = CFGInliningState(ir) + for item in todo + if isa(item, UnionSplit) + cfg_inline_unionsplit!(item::UnionSplit, state) + else + item = item::InliningTodo + # A linear inline does not modify the CFG + item.linear_inline_eligible && continue + cfg_inline_item!(item, state) + end + end + finish_cfg_inline!(state) + + boundscheck = inbounds_option() + if boundscheck === :default && sv.src.propagate_inbounds + boundscheck = :propagate + end let compact = IncrementalCompact(ir) - compact.result_bbs = new_cfg_blocks + compact.result_bbs = state.new_cfg_blocks + # This needs to be a minimum and is more of a size hint nnewnodes = length(compact.result) + (sum(todo) do item - return length(item.ir.stmts) + length(item.ir.new_nodes) + return isa(item, InliningTodo) ? (length(item.ir.stmts) + length(item.ir.new_nodes)) : 0 end) resize!(compact, nnewnodes) item = popfirst!(todo) inline_idx = item.idx for (idx, stmt) in compact if compact.idx - 1 == inline_idx - # Ok, do the inlining here - inline_cfg = item.ir.cfg - linetable_offset = length(linetable) - # Append the linetable of the inlined function to our line table - inlined_at = compact.result_lines[idx] - for entry in item.linetable - push!(linetable, LineInfoNode(entry.mod, entry.method, entry.file, entry.line, - (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at))) - end - # If the iterator already moved on to the next basic block, - # temorarily re-open in again. + argexprs = copy(stmt.args) refinish = false if compact.result_idx == first(compact.result_bbs[compact.active_result_bb].stmts) compact.active_result_bb -= 1 refinish = true end - argexprs = copy(stmt.args) # At the moment we will allow globalrefs in argument position, turn those into ssa values for aidx in 1:length(argexprs) aexpr = argexprs[aidx] @@ -196,87 +457,17 @@ function batch_inline!(todo::Vector{InliningTodo}, ir::IRCode, linetable::Vector argexprs[aidx] = insert_node_here!(compact, aexpr, compact_exprtype(compact, aexpr), compact.result_lines[idx]) end end - argexprs = rewrite_exprargs((node, typ)->insert_node_here!(compact, node, typ, compact.result_lines[idx]), - arg->compact_exprtype(compact, arg), item.isinvoke, item.isapply, argexprs) - if item.isva - vararg = mk_tuplecall!(compact, argexprs[item.na:end], compact.result_lines[idx]) - argexprs = Any[argexprs[1:(item.na - 1)]..., vararg] + if item.isinvoke + argexprs = rewrite_invoke_exprargs!((node, typ)->insert_node_here!(compact, node, typ, compact.result_lines[idx]), + arg->compact_exprtype(compact, arg), argexprs) end - flag = compact.result_flags[idx] - boundscheck_idx = boundscheck - if boundscheck_idx === :default || boundscheck_idx === :propagate - if (flag & IR_FLAG_INBOUNDS) != 0 - boundscheck_idx = :off - end - end - # Special case inlining that maintains the current basic block if there's only one BB in the target - if item.linear_inline_eligible - terminator = item.ir[SSAValue(last(inline_cfg.blocks[1].stmts))] - compact[idx] = nothing - inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx) - for (idx′, stmt′) in inline_compact - # This dance is done to maintain accurate usage counts in the - # face of rename_arguments! mutating in place - should figure out - # something better eventually. - inline_compact[idx′] = nothing - stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.method.sig, item.sparams, linetable_offset, boundscheck_idx, compact) - if isa(stmt′, ReturnNode) - isa(stmt′.val, SSAValue) && (compact.used_ssas[stmt′.val.id] += 1) - compact.ssa_rename[compact.idx-1] = stmt′.val - stmt′ = nothing - end - inline_compact[idx′] = stmt′ - end - just_fixup!(inline_compact) - compact.result_idx = inline_compact.result_idx - refinish && finish_current_bb!(compact) - else - bb_offset, post_bb_id = popfirst!(todo_bbs) - # This implements the need_split_before flag above - need_split_before = !isempty(item.ir.cfg.blocks[1].preds) - if need_split_before - finish_current_bb!(compact) - end - pn = PhiNode() - compact[idx] = nothing - inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx) - for (idx′, stmt′) in inline_compact - inline_compact[idx′] = nothing - stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.method.sig, item.sparams, linetable_offset, boundscheck_idx, compact) - if isa(stmt′, ReturnNode) - if isdefined(stmt′, :val) - push!(pn.edges, inline_compact.active_result_bb-1) - push!(pn.values, stmt′.val) - stmt′ = GotoNode(post_bb_id) - end - elseif isa(stmt′, GotoNode) - stmt′ = GotoNode(stmt′.label + bb_offset) - elseif isa(stmt′, Expr) && stmt′.head == :enter - stmt′ = Expr(:enter, stmt′.args[1] + bb_offset) - elseif isa(stmt′, GotoIfNot) - stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset) - elseif isa(stmt′, PhiNode) - stmt′ = PhiNode(Any[edge+bb_offset for edge in stmt′.edges], stmt′.values) - end - inline_compact[idx′] = stmt′ - end - just_fixup!(inline_compact) - compact.result_idx = inline_compact.result_idx - compact.active_result_bb = inline_compact.active_result_bb - for i = 1:length(pn.values) - isassigned(pn.values, i) || continue - if isa(pn.values[i], SSAValue) - compact.used_ssas[pn.values[i].id] += 1 - end - end - if length(pn.edges) == 1 - compact.ssa_rename[compact.idx-1] = pn.values[1] - else - pn_ssa = insert_node_here!(compact, pn, stmt.typ, compact.result_lines[idx]) - compact.ssa_rename[compact.idx-1] = pn_ssa - end - refinish && finish_current_bb!(compact) + if isa(item, InliningTodo) + compact.ssa_rename[compact.idx-1] = ir_inline_item!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs) + elseif isa(item, UnionSplit) + ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs) end + compact[idx] = nothing + refinish && finish_current_bb!(compact) if !isempty(todo) item = popfirst!(todo) inline_idx = item.idx @@ -284,13 +475,13 @@ function batch_inline!(todo::Vector{InliningTodo}, ir::IRCode, linetable::Vector inline_idx = -1 end elseif isa(stmt, GotoNode) - compact[idx] = GotoNode(bb_rename[stmt.label]) + compact[idx] = GotoNode(state.bb_rename[stmt.label]) elseif isa(stmt, Expr) && stmt.head == :enter - compact[idx] = Expr(:enter, bb_rename[stmt.args[1]]) + compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]]) elseif isa(stmt, GotoIfNot) - compact[idx] = GotoIfNot(stmt.cond, bb_rename[stmt.dest]) + compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest]) elseif isa(stmt, PhiNode) - compact[idx] = PhiNode(Any[edge == length(bb_rename) ? length(new_cfg_blocks) : bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) + compact[idx] = PhiNode(Any[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) end end @@ -299,7 +490,7 @@ function batch_inline!(todo::Vector{InliningTodo}, ir::IRCode, linetable::Vector return ir end -function spec_lambda(@nospecialize(atype), sv::OptimizationState, @nospecialize(invoke_data)) +function _spec_lambda(@nospecialize(atype), sv::OptimizationState, @nospecialize(invoke_data)) if invoke_data === nothing return ccall(:jl_get_spec_lambda, Any, (Any, UInt), atype, sv.params.world) else @@ -310,54 +501,34 @@ function spec_lambda(@nospecialize(atype), sv::OptimizationState, @nospecialize( end end -function rewrite_exprargs(inserter, exprtype, isinvoke::Bool, isapply::Bool, argexprs::Vector{Any}) - if isapply - new_argexprs = Any[argexprs[2]] - # Flatten all tuples - for arg in argexprs[3:end] - tupT = exprtype(arg) - t = widenconst(tupT) - for i = 1:length(t.parameters) - # Insert a getfield call here - new_call = Expr(:call, Core.getfield, arg, i) - new_call.typ = getfield_tfunc(tupT, Const(i)) - push!(new_argexprs, inserter(new_call, new_call.typ)) - end +function spec_lambda(@nospecialize(atype), sv::OptimizationState, @nospecialize(invoke_data)) + linfo = _spec_lambda(atype, sv, invoke_data) + linfo !== nothing && add_backedge!(linfo, sv) + linfo +end + +function rewrite_apply_exprargs!(inserter, exprtype, argexprs::Vector{Any}) + new_argexprs = Any[argexprs[2]] + # Flatten all tuples + for arg in argexprs[3:end] + tupT = exprtype(arg) + t = widenconst(tupT) + for i = 1:length(t.parameters) + # Insert a getfield call here + new_call = Expr(:call, Core.getfield, arg, i) + new_call.typ = getfield_tfunc(tupT, Const(i)) + push!(new_argexprs, inserter(new_call, new_call.typ)) end - argexprs = new_argexprs - end - if isinvoke - argexpr0 = argexprs[2] - argexprs = argexprs[4:end] - pushfirst!(argexprs, argexpr0) end + argexprs = new_argexprs return argexprs end -function maybe_make_invoke!(ir::IRCode, idx::Int, @nospecialize(etype), atypes::Vector{Any}, sv::OptimizationState, - @nospecialize(atype_unlimited), isinvoke::Bool, isapply::Bool, @nospecialize(invoke_data)) - nu = countunionsplit(atypes) - nu > 1 && return # TODO: The old optimizer did union splitting here. Is this the right place? - linfo = spec_lambda(atype_unlimited, sv, invoke_data) - if linfo === nothing - if !isapply || isinvoke - return - end - # We might not have an linfo, but we can still rewrite the _apply into a regular call - # based on our analysis - ex = Expr(:call) - else - ex = Expr(:invoke) - add_backedge!(linfo, sv) - end - argexprs = ir[SSAValue(idx)].args - argexprs = rewrite_exprargs((node, typ)->insert_node!(ir, idx, typ, node), arg->exprtype(arg, ir, ir.mod), - isinvoke, isapply, argexprs) - linfo !== nothing && pushfirst!(argexprs, linfo) - ex.typ = etype - ex.args = argexprs - ir[SSAValue(idx)] = ex - nothing +function rewrite_invoke_exprargs!(inserter, exprtype, argexprs::Vector{Any}) + argexpr0 = argexprs[2] + argexprs = argexprs[4:end] + pushfirst!(argexprs, argexpr0) + return argexprs end function singleton_type(@nospecialize(ft)) @@ -367,9 +538,166 @@ function singleton_type(@nospecialize(ft)) return nothing end +function analyze_method!(idx, f, ft, metharg, methsp, method, stmt, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) + methsig = method.sig + + # Check whether this call just evaluates to a constant + if isa(f, widenconst(ft)) && !isdefined(method, :generator) && method.pure && + isa(stmt.typ, Const) && stmt.typ.actual && is_inlineable_constant(stmt.typ.val) + return ConstantCase(quoted(stmt.typ.val), method, Any[methsp...], metharg) + end + + # Check that we habe the correct number of arguments + na = Int(method.nargs) + npassedargs = length(atypes) + if na != npassedargs && !(na > 0 && method.isva) + # we have a method match only because an earlier + # inference step shortened our call args list, even + # though we have too many arguments to actually + # call this function + return nothing + end + + # Bail out if any static parameters are left as TypeVar + ok = true + for i = 1:length(methsp) + isa(methsp[i], TypeVar) && return nothing + end + + # Find the linfo for this methods (Generated functions are expanded here if necessary) + linfo = code_for_method(method, metharg, methsp, sv.params.world, true) # Union{Nothing, MethodInstance} + if !isa(linfo, MethodInstance) + return spec_lambda(atype_unlimited, sv, invoke_data) + end + + if invoke_api(linfo) == 2 + # in this case function can be inlined to a constant + add_backedge!(linfo, sv) + return ConstantCase(quoted(linfo.inferred_const), method, Any[methsp...], metharg) + end + + # Handle vararg functions + isva = na > 0 && method.isva + if isva + @assert length(atypes) >= na - 1 + va_type = tuple_tfunc(Tuple{Any[widenconst(atypes[i]) for i in 1:length(atypes)]...}) + atypes = Any[atypes[1:(na - 1)]..., va_type] + end + + # Go see if we already have a pre-inferred result + res = find_inferred(linfo, atypes, sv) + res === nothing && return nothing + + if length(res::Tuple) == 1 + return ConstantCase(res[1], method, Any[methsp...], metharg) + end + (rettype, inferred) = res::Tuple + + if inferred === nothing + return spec_lambda(atype_unlimited, sv, invoke_data) + end + + src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), inferred) + src_inlineable = ccall(:jl_ast_flag_inlineable, Bool, (Any,), inferred) + + if !(src_inferred && src_inlineable) + return spec_lambda(atype_unlimited, sv, invoke_data) + end + + # At this point we're committedd to performing the inlining, add the backedge + add_backedge!(linfo, sv) + + if isa(inferred, CodeInfo) + src = inferred + ast = copy_exprargs(inferred.code) + else + src = ccall(:jl_uncompress_ast, Any, (Any, Any), method, inferred::Vector{UInt8})::CodeInfo + # TODO: It seems like PhiNodes are shared between compressed codeinfos, making this copy necessary + ast = copy_exprargs(src.code) + end + + @timeit "inline IR inflation" if src.codelocs === nothing + topline = LineInfoNode(method.module, method.name, method.file, Int(method.line), 0) + inline_linetable = [topline] + push!(ast, LabelNode(length(ast) + 1)) + ir2 = just_construct_ssa(src, ast, na-1, inline_linetable) + else + ir2, inline_linetable = inflate_ir(src), src.linetable + end + #verify_ir(ir2) + + return InliningTodo(idx, + isva, isinvoke, isapply, na, + method, Any[methsp...], metharg, + inline_linetable, ir2, linear_inline_eligible(ir2)) +end + +# Neither the product iterator not CartesianIndices are available +# here, so use this poor man's version +struct SimpleCartesian + ranges::Vector{UnitRange{Int}} +end +start(s::SimpleCartesian) = Int[1 for _ in 1:length(s.ranges)] +done(s::SimpleCartesian, state) = state[end] > last(s.ranges[end]) +function next(s::SimpleCartesian, state) + vals = copy(state) + any = false + for i = 1:length(s.ranges) + if state[i] < last(s.ranges[i]) + state[i] += 1 + any = true + break + end + end + if !any + state[end] += 1 + end + (vals, state) +end + +# Given a signure, iterate over the signatures to union split over +struct UnionSplitSignature + it::SimpleCartesian + typs::Vector{Any} +end + +function UnionSplitSignature(atypes::Vector{Any}) + typs = Any[uniontypes(widenconst(atypes[i])) for i = 1:length(atypes)] + ranges = UnitRange{Int}[1:length(typs[i]) for i = 1:length(typs)] + UnionSplitSignature(SimpleCartesian(ranges), typs) +end + +start(split::UnionSplitSignature) = start(split.it) +done(split::UnionSplitSignature, state) = done(split.it, state) +function next(split::UnionSplitSignature, state) + idxs, state = next(split.it, state) + sig = Any[split.typs[i][j] for (i,j) in enumerate(idxs)] + sig, state +end + +function handle_single_case!(ir, stmt, idx, case, isinvoke, todo) + if isa(case, ConstantCase) + ir[SSAValue(idx)] = case.val + elseif isa(case, MethodInstance) + if isinvoke + stmt.args = rewrite_invoke_exprargs!( + (node, typ)->insert_node!(ir, idx, typ, node), + arg->exprtype(arg, ir, ir.mod), + stmt.args) + end + stmt.head = :invoke + pushfirst!(stmt.args, case) + elseif case === nothing + # Do, well, nothing + else + push!(todo, case::InliningTodo) + end +end + + function assemble_inline_todo!(ir::IRCode, linetable::Vector{LineInfoNode}, sv::OptimizationState) # todo = (inline_idx, (isva, isinvoke, isapply, na), method, spvals, inline_linetable, inline_ir, lie) - todo = InliningTodo[] + todo = Any[] for idx in 1:length(ir.stmts) stmt = ir.stmts[idx] isexpr(stmt, :call) || continue @@ -393,7 +721,7 @@ function assemble_inline_todo!(ir::IRCode, linetable::Vector{LineInfoNode}, sv:: ok || continue # Check if we match any of the early inliners - res = early_inline_special_case(ir, f, ft, stmt, atypes) + res = early_inline_special_case(ir, f, ft, stmt, atypes, sv.params) if res !== nothing ir.stmts[idx] = res[1] continue @@ -447,10 +775,13 @@ function assemble_inline_todo!(ir::IRCode, linetable::Vector{LineInfoNode}, sv:: isapply = true end - if isapply && f !== Core.invoke && (isa(f, IntrinsicFunction) || ft ⊑ IntrinsicFunction || isa(f, Builtin) || ft ⊑ Builtin) - # Even though we don't do inlining or :invoke, for intrinsic functions, we do want to eliminate apply if possible. - stmt.args = rewrite_exprargs((node, typ)->insert_node!(ir, idx, typ, node), arg->exprtype(arg, ir, ir.mod), - false, isapply, stmt.args) + # Independent of whether we can inline, the above analysis allows us to rewrite + # this apply call to a regular call + if isapply + stmt.args = rewrite_apply_exprargs!((node, typ)->insert_node!(ir, idx, typ, node), arg->exprtype(arg, ir, ir.mod), stmt.args) + end + + if f !== Core.invoke && (isa(f, IntrinsicFunction) || ft ⊑ IntrinsicFunction || isa(f, Builtin) || ft ⊑ Builtin) continue end @@ -469,7 +800,8 @@ function assemble_inline_todo!(ir::IRCode, linetable::Vector{LineInfoNode}, sv:: # signature we're invoking. (invoke_data === nothing || atype_unlimited <: invoke_data.types0) || continue - # TODO: Bail out here if inlining is disabled + # Bail out here if inlining is disabled + sv.params.inlining || continue # Special case inliners for regular functions if late_inline_special_case!(ir, idx, stmt, atypes, f, ft, _topmod(ir.mod)) @@ -484,123 +816,99 @@ function assemble_inline_todo!(ir::IRCode, linetable::Vector{LineInfoNode}, sv:: end # Ok, now figure out what method to call - @timeit "method matching" if invoke_data === nothing - min_valid = UInt[typemin(UInt)] - max_valid = UInt[typemax(UInt)] - meth = _methods_by_ftype(atype, 1, sv.params.world, min_valid, max_valid) - if meth === false || length(meth) != 1 - maybe_make_invoke!(ir, idx, stmt.typ, atypes, sv, atype_unlimited, false, isapply, nothing) - continue - end - meth = meth[1]::SimpleVector - metharg = meth[1]::Type - methsp = meth[2]::SimpleVector - method = meth[3]::Method - else + if invoke_data !== nothing method = invoke_data.entry.func (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype_unlimited, method.sig)::SimpleVector methsp = methsp::SimpleVector - end - - methsig = method.sig - if !(atype <: metharg) - maybe_make_invoke!(ir, idx, stmt.typ, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) + result = analyze_method!(idx, f, ft, metharg, methsp, method, stmt, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) + handle_single_case!(ir, stmt, idx, result, isinvoke, todo) continue end - # Check whether this call just evaluates to a constant - if isa(f, widenconst(ft)) && !isdefined(method, :generator) && method.pure && - isa(stmt.typ, Const) && stmt.typ.actual && is_inlineable_constant(stmt.typ.val) - ir[SSAValue(idx)] = quoted(stmt.typ.val) + # Regular case: Perform method matching + min_valid = UInt[typemin(UInt)] + max_valid = UInt[typemax(UInt)] + meth = _methods_by_ftype(atype, sv.params.MAX_METHODS, sv.params.world, min_valid, max_valid) + if meth === false || length(meth) == 0 + # No applicable method, or too many applicable methods continue end - # Check that we habe the correct number of arguments - na = Int(method.nargs) - npassedargs = length(atypes) - if na != npassedargs && !(na > 0 && method.isva) - # we have a method match only because an earlier - # inference step shortened our call args list, even - # though we have too many arguments to actually - # call this function - continue - end - - # Bail out if any static parameters are left as TypeVar - ok = true - for i = 1:length(methsp) - isa(methsp[i], TypeVar) && (ok = false; break) - end - ok || continue - - # Find the linfo for this methods (Generated functions are expanded here if necessary) - linfo = code_for_method(method, metharg, methsp, sv.params.world, true) # Union{Nothing, MethodInstance} - if !isa(linfo, MethodInstance) - maybe_make_invoke!(ir, idx, stmt.typ, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) - continue - end + cases = Pair{Type, Any}[] + # TODO: This could be better + signature_union = Union{Any[match[1]::Type for match in meth]...} + signature_fully_covered = atype <: signature_union + fully_covered = signature_fully_covered + split_out_sigs = Any[] - if invoke_api(linfo) == 2 - # in this case function can be inlined to a constant - add_backedge!(linfo, sv) - ir[SSAValue(idx)] = linfo.inferred_const - continue + # For any method match that's a dispatch tuple, extract those cases first + for (i, match) in enumerate(meth) + (metharg, methsp, method) = (match[1]::Type, match[2]::SimpleVector, match[3]::Method) + if !isdispatchtuple(metharg) + fully_covered = false + continue + end + case = analyze_method!(idx, f, ft, metharg, methsp, method, stmt, atypes, sv, metharg, isinvoke, isapply, invoke_data) + if case === nothing + fully_covered = false + continue + end + push!(cases, Pair{Type,Any}(metharg, case)) + push!(split_out_sigs, metharg) end - # Handle vararg functions - isva = na > 0 && method.isva - if isva - @assert length(atypes) >= na - 1 - va_type = tuple_tfunc(Tuple{Any[widenconst(atypes[i]) for i in 1:length(atypes)]...}) - atypes = Any[atypes[1:(na - 1)]..., va_type] + # Now, if profitable union split the atypes into dispatch tuples and match the appropriate method + nu = countunionsplit(atypes) + if nu != 1 && nu <= sv.params.MAX_UNION_SPLITTING + for sig in UnionSplitSignature(atypes) + metharg′ = argtypes_to_type(sig) + if !isdispatchtuple(metharg′) + fully_covered = false + continue + elseif any(x->x === metharg′, split_out_sigs) + continue + end + # `meth` is in specificity order, so find the first applicable method + found_any = false + for (i, match) in enumerate(meth) + (metharg, methsp, method) = (match[1]::Type, match[2]::SimpleVector, match[3]::Method) + metharg′ <: method.sig || continue + case = analyze_method!(idx, f, ft, metharg′, methsp, method, stmt, atypes, sv, metharg′, isinvoke, isapply, invoke_data) + if case !== nothing + found_any = true + push!(cases, Pair{Type,Any}(metharg′, case)) + end + break + end + if !found_any + fully_covered = false + continue + end + end end - # Go see if we already have a pre-inferred result - res = find_inferred!(ir, idx, linfo, atypes, sv) - res === nothing && continue - - (rettype, inferred) = res - - if inferred === nothing - maybe_make_invoke!(ir, idx, stmt.typ, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) - continue + # If we're fully covered and there's only one applicable method, + # we inline, even if the signature is not a dispatch tuple + if signature_fully_covered && length(cases) == 0 && length(meth) == 1 + metharg = meth[1][1]::Type + methsp = meth[1][2]::SimpleVector + method = meth[1][3]::Method + fully_covered = true + case = analyze_method!(idx, f, ft, metharg, methsp, method, stmt, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) + case == nothing && continue + push!(cases, Pair{Type,Any}(metharg, case)) end - src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), inferred) - src_inlineable = ccall(:jl_ast_flag_inlineable, Bool, (Any,), inferred) - - if !(src_inferred && src_inlineable) - maybe_make_invoke!(ir, idx, stmt.typ, atypes, sv, atype_unlimited, isinvoke, isapply, invoke_data) + # If we only have one case and that case is fully covered, we may either + # be able to do the inlining now (for constant cases), or push it directly + # onto the todo list + if fully_covered && length(cases) == 1 + handle_single_case!(ir, stmt, idx, cases[1][2], isinvoke, todo) continue end - - # At this point we're committedd to performing the inlining, add the backedge - add_backedge!(linfo, sv) - - if isa(inferred, CodeInfo) - src = inferred - ast = copy_exprargs(inferred.code) - else - src = ccall(:jl_uncompress_ast, Any, (Any, Any), method, inferred::Vector{UInt8})::CodeInfo - # TODO: It seems like PhiNodes are shared between compressed codeinfos, making this copy necessary - ast = copy_exprargs(src.code) - end - - @timeit "inline IR inflation" if src.codelocs === nothing - topline = LineInfoNode(method.module, method.name, method.file, Int(method.line), 0) - inline_linetable = [topline] - push!(ast, LabelNode(length(ast) + 1)) - ir2 = just_construct_ssa(src, ast, na-1, inline_linetable) - else - ir2, inline_linetable = inflate_ir(src), src.linetable - end - #verify_ir(ir2) - - push!(todo, InliningTodo(idx, - isva, isinvoke, isapply, na, - method, Any[methsp...], - inline_linetable, ir2, linear_inline_eligible(ir2))) + length(cases) == 0 && continue + push!(todo, UnionSplit(idx, fully_covered, atype_unlimited, isinvoke, cases)) end todo end @@ -650,7 +958,7 @@ function compute_invoke_data(@nospecialize(atypes), argexprs::Vector{Any}, sv::O return svec(f, ft, atypes, argexprs, invoke_data) end -function early_inline_special_case(ir::IRCode, @nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector{Any}) +function early_inline_special_case(ir::IRCode, @nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector{Any}, params) if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes) == 3 # typeassert(x::S, T) => x, when S<:T a3 = atypes[3] @@ -661,7 +969,7 @@ function early_inline_special_case(ir::IRCode, @nospecialize(f), @nospecialize(f end topmod = _topmod(ir.mod) # special-case inliners for known pure functions that compute types - if true #sv.params.inlining + if params.inlining if isa(e.typ, Const) # || isconstType(e.typ) val = e.typ.val if (f === apply_type || f === fieldtype || f === typeof || f === (===) || @@ -769,7 +1077,7 @@ function ssa_substitute_op!(@nospecialize(val), arg_replacements::Vector{Any}, return urs[] end -function find_inferred!(ir::IRCode, idx::Int, linfo::MethodInstance, @nospecialize(atypes), sv::OptimizationState) +function find_inferred(linfo::MethodInstance, @nospecialize(atypes), sv::OptimizationState) # see if the method has a InferenceResult in the current cache # or an existing inferred code info store in `.inferred` haveconst = false @@ -797,8 +1105,7 @@ function find_inferred!(ir::IRCode, idx::Int, linfo::MethodInstance, @nospeciali end if @isdefined(inferred_const) && is_inlineable_constant(inferred_const) add_backedge!(linfo, sv) - ir[SSAValue(idx)] = quoted(inferred_const) - return nothing + return (quoted(inferred_const),) end end inferred = inf_result.src diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl index 5550404377777..26b734586e596 100644 --- a/base/compiler/ssair/show.jl +++ b/base/compiler/ssair/show.jl @@ -80,23 +80,29 @@ function Base.show(io::IO, code::IRCode) used = IdSet{Int}() Base.println(io, "Code") foreach(stmt->scan_ssa_use!(used, stmt), code.stmts) + cfg = code.cfg + max_bb_idx_size = length(string(length(cfg.blocks))) + bb_idx = 1 + if any(i->!isassigned(code.new_nodes, i), 1:length(code.new_nodes)) + printstyled(io, :red, "ERROR: New node array has unset entry\n") + end + new_nodes = code.new_nodes[filter(i->isassigned(code.new_nodes, i), 1:length(code.new_nodes))] foreach(nn -> scan_ssa_use!(used, nn.node), new_nodes) + perm = sortperm(new_nodes, by = x->x[1]) + new_nodes_perm = Iterators.Stateful(perm) + if isempty(used) maxsize = 0 else maxused = maximum(used) maxsize = length(string(maxused)) end - cfg = code.cfg - max_bb_idx_size = length(string(length(cfg.blocks))) - bb_idx = 1 - perm = sortperm(code.new_nodes, by = x->x[1]) - new_nodes_perm = Iterators.Stateful(perm) + for idx in eachindex(code.stmts) if !isassigned(code.stmts, idx) # This is invalid, but do something useful rather # than erroring, to make debugging easier - printstyled(:red, "UNDEF\n") + printstyled(io, :red, "UNDEF\n") continue end stmt = code.stmts[idx] @@ -116,9 +122,9 @@ function Base.show(io::IO, code::IRCode) print_sep = true end floop = true - while !isempty(new_nodes_perm) && code.new_nodes[peek(new_nodes_perm)][1] == idx + while !isempty(new_nodes_perm) && new_nodes[peek(new_nodes_perm)][1] == idx node_idx = popfirst!(new_nodes_perm) - new_node = code.new_nodes[node_idx] + new_node = new_nodes[node_idx] node_idx += length(code.stmts) if print_sep if floop diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 96638a649473e..36238b3530879 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -22,16 +22,14 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, @assert op.id < use_idx end else - if !dominates(domtree, def_bb, use_bb) - enable_new_optimizer[] = false - @show ir + if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb)) + #@Base.show ir @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))" error() end end elseif isa(op, Union{SlotNumber, TypedSlot}) - enable_new_optimizer[] = false - #@error "Left over slot detected in converted IR" + @verify_error "Left over slot detected in converted IR" error() end end @@ -43,10 +41,7 @@ function verify_ir(ir::IRCode) last_end = 0 for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 - enable_new_optimizer[] = false #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] - @show ranges - @show (first(block.stmts), last_end) @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)" error() end @@ -59,9 +54,9 @@ function verify_ir(ir::IRCode) end for s in block.succs if !(idx in ir.cfg.blocks[s].preds) - @show ir.cfg - @show ir - @show ir.argtypes + #@Base.show ir.cfg + #@Base.show ir + #@Base.show ir.argtypes @verify_error "Successor $s of block $idx not in predecessor list" error() end @@ -77,9 +72,8 @@ function verify_ir(ir::IRCode) for i = 1:length(stmt.edges) edge = stmt.edges[i] if !(edge == 0 && bb == 1) && !(edge in ir.cfg.blocks[bb].preds) - enable_new_optimizer[] = false - @show ir.argtypes - @show ir + #@Base.show ir.argtypes + #@Base.show ir @verify_error "Edge $edge of φ node $idx not in predecessor list" error() end diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 94a7d087a3bad..34aadcc7a9d7f 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -293,3 +293,16 @@ function get_label_map(body::Vector{Any}) end return labelmap end + +########### +# options # +########### + +inlining_enabled() = (JLOptions().can_inline == 1) +coverage_enabled() = (JLOptions().code_coverage != 0) +function inbounds_option() + opt_check_bounds = JLOptions().check_bounds + opt_check_bounds == 0 && return :default + opt_check_bounds == 1 && return :on + return :off +end