diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 67ad2c2ae1f48..623b3535e9ab9 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -71,7 +71,7 @@ const BBNumber = Int const PreNumber = Int const PostNumber = Int -struct DFSTree +mutable struct DFSTree # These map between BB number and pre- or postorder numbers to_pre::Vector{PreNumber} from_pre::Vector{BBNumber} @@ -258,9 +258,27 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) # we wanted to. resize!(state, n_nodes) for w in 1:max_pre + # Only reset semidominators for nodes we want to recompute state[w] = SNCAData(typemax(PreNumber), w) end + # If we are only recomputing some of the semidominators, the remaining + # labels should be reset, because they may have become inapplicable to the + # node/semidominator we are currently processing/recomputing. They can + # become inapplicable because of path compressions that were triggered by + # nodes that should only be processed after the current one (but were + # processed the last time `SNCA!` was run). + # + # So, for every node that is not being reprocessed, we reset its label to + # its semidominator, which is the value that its label assumes once its + # semidominator is computed. If this was too conservative, i.e. if the + # label would have been updated before we process the current node in a + # situation where all semidominators were recomputed, then path compression + # will produce the correct label. + for w in max_pre+1:n_nodes + state[w].label = state[w].semi + end + # Calculate semidominators, but only for blocks with preorder number up to # max_pre ancestors = copy(D.to_parent_pre) @@ -372,6 +390,11 @@ end "Given updated blocks, update the given dominator tree with an inserted edge." function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + # Implements Section 3.1 of [GI16] dt = domtree.dfs_tree from_pre = dt.to_pre[from] @@ -393,6 +416,11 @@ end "Given updated blocks, update the given dominator tree with a deleted edge." function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + # Implements Section 3.1 of [GI16] if is_parent(domtree.dfs_tree, from, to) # The `from` block is the parent of the `to` block in the DFS tree, so @@ -445,6 +473,76 @@ function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber) return false end +""" +Rename basic block numbers in a dominator tree, removing the block if it is +renamed to -1. +""" +function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber}) + # Rename DFS tree + rename_nodes!(domtree.dfs_tree, rename_bb) + + # `snca_state` is indexed by preorder number, so should be unchanged + + # Rename `idoms_bb` and `nodes` + new_idoms_bb = zeros(BBNumber, length(domtree.idoms_bb)) + new_nodes = Vector{DomTreeNode}(undef, length(domtree.nodes)) + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + new_idoms_bb[new_bb] = (new_bb == 1) ? + 0 : rename_bb[domtree.idoms_bb[old_bb]] + new_nodes[new_bb] = domtree.nodes[old_bb] + map!(i -> rename_bb[i], + new_nodes[new_bb].children, + new_nodes[new_bb].children) + end + end + + # length of `to_pre` after renaming DFS tree is new number of basic blocks + resize!(new_idoms_bb, length(domtree.dfs_tree.to_pre)) # maybe? + resize!(new_nodes, length(domtree.dfs_tree.to_pre)) + + domtree.idoms_bb = new_idoms_bb + domtree.nodes = new_nodes + return domtree +end + +""" +Rename basic block numbers in a DFS tree, removing the block if it is renamed +to -1. +""" +function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) + n_blocks = length(D.to_pre) + n_reachable_blocks = length(D.from_pre) + + new_to_pre = zeros(PreNumber, n_blocks) + new_from_pre = Vector{BBNumber}(undef, n_reachable_blocks) + new_to_post = zeros(PostNumber, n_blocks) + new_from_post = Vector{BBNumber}(undef, n_reachable_blocks) + max_new_bb = 0 + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + new_to_pre[new_bb] = D.to_pre[old_bb] + new_from_pre[D.to_pre[old_bb]] = new_bb + new_to_post[new_bb] = D.to_post[old_bb] + new_from_post[D.to_post[old_bb]] = new_bb + + # Keep track of highest BB number to resize arrays with + if new_bb > max_new_bb + max_new_bb = new_bb + end + end + end + resize!(new_to_pre, max_new_bb) + resize!(new_to_post, max_new_bb) + + D.to_pre = new_to_pre + D.from_pre = new_from_pre + D.to_post = new_to_post + D.from_post = new_from_post + # `to_parent_pre` should be unchanged + return D +end + """ Checks if bb1 dominates bb2. bb1 and bb2 are indexes into the CFG blocks. diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 196e85aad27a9..5a8be98f8fab6 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -110,7 +110,6 @@ end function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) ir = just_construct_ssa(ci, copy_exprargs(ci.code), nargs, sv) #@Base.show ("after_construct", ir) - # TODO: Domsorting can produce an updated domtree - no need to recompute here @timeit "compact 1" ir = compact!(ir) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv) #@timeit "verify 2" verify_ir(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 7f1325ad84c09..77af5227e5432 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -295,6 +295,7 @@ function finish_cfg_inline!(state::CFGInliningState) end end +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -412,6 +413,7 @@ end const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)") +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -494,7 +496,9 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, end function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool) - # Compute the new CFG first (modulo statement ranges, which will be computed below) + # Compute the new CFG first (modulo statement ranges, which will be + # computed below, and the domtree, which will be updated below before we + # iterate through the statements) state = CFGInliningState(ir) for item in todo if isa(item, UnionSplit) @@ -515,6 +519,9 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo let compact = IncrementalCompact(ir, false) compact.result_bbs = state.new_cfg_blocks + # Recompute the domtree now that the CFG has been modified + compact.result_domtree = construct_domtree(compact.result_bbs) + # This needs to be a minimum and is more of a size hint nn = 0 for item in todo @@ -568,7 +575,6 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo compact[idx] = PhiNode(Any[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) end end - ir = finish(compact) end return ir diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 9eb0670418b4e..16fb36079c291 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -52,7 +52,7 @@ struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? - domtree + domtree # TODO type end function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) @@ -72,11 +72,7 @@ function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) end function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) - preds = cfg.blocks[to].preds - succs = cfg.blocks[from].succs - # Assumes that blocks appear at most once in preds and succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + kill_edge!(cfg.blocks, from, to) domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to) nothing end @@ -491,6 +487,7 @@ mutable struct IncrementalCompact result_lines::Vector{Int32} result_flags::Vector{UInt8} result_bbs::Vector{BasicBlock} + result_domtree # TODO type ssa_rename::Vector{Any} bb_rename_pred::Vector{Int} @@ -516,7 +513,7 @@ mutable struct IncrementalCompact cfg_transforms_enabled::Bool fold_constant_branches::Bool function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=true) - # Sort by position with attach after nodes affter regular ones + # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[(code.new_nodes[i].pos*2 + Int(code.new_nodes[i].attach_after)) for i in 1:length(code.new_nodes)]) new_len = length(code.stmts) + length(code.new_nodes) result = Array{Any}(undef, new_len) @@ -553,18 +550,23 @@ mutable struct IncrementalCompact let blocks=blocks result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1] end + result_domtree = copy(code.cfg.domtree) + rename_nodes!(result_domtree, bb_rename) else bb_rename = Vector{Int}() result_bbs = code.cfg.blocks + result_domtree = code.cfg.domtree end ssa_rename = Any[SSAValue(i) for i = 1:new_len] late_fixup = Vector{Int}() new_new_nodes = NewNode[] pending_nodes = NewNode[] pending_perm = Int[] - return new(code, result, result_types, result_lines, result_flags, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms) + return new(code, result, result_types, result_lines, result_flags, + result_bbs, result_domtree, ssa_rename, bb_rename, + bb_rename, used_ssas, late_fixup, perm, 1, new_new_nodes, + pending_nodes, pending_perm, 1, 1, 1, false, + allow_cfg_transforms, allow_cfg_transforms) end # For inlining @@ -578,11 +580,12 @@ mutable struct IncrementalCompact new_new_nodes = NewNode[] pending_nodes = NewNode[] pending_perm = Int[] - return new(code, parent.result, parent.result_types, parent.result_lines, parent.result_flags, - parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas, - late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, result_offset, parent.active_result_bb, false, false, false) + return new(code, parent.result, parent.result_types, + parent.result_lines, parent.result_flags, parent.result_bbs, + parent.result_domtree, ssa_rename, bb_rename, bb_rename, + parent.used_ssas, late_fixup, perm, 1, new_new_nodes, + pending_nodes, pending_perm, 1, result_offset, + parent.active_result_bb, false, false, false) end end @@ -860,60 +863,73 @@ function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int) preds, succs = bbs[to].preds, bbs[from].succs deleteat!(preds, findfirst(x->x === from, preds)::Int) deleteat!(succs, findfirst(x->x === to, succs)::Int) - if length(preds) == 0 - for succ in copy(bbs[to].succs) - kill_edge!(bbs, to, succ) - end - end + nothing end # N.B.: from and to are non-renamed indices function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int) - # Note: We recursively kill as many edges as are obviously dead. However, this - # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or - # worstcase during codegen). - preds = compact.result_bbs[compact.bb_rename_succ[to]].preds - succs = compact.result_bbs[compact.bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) - # Check if the block is now dead - if length(preds) == 0 - for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)) + renamed_from = compact.bb_rename_pred[from] + renamed_to = compact.bb_rename_succ[to] + + preds = compact.result_bbs[renamed_to].preds + succs = compact.result_bbs[renamed_from].succs + + deleteat!(preds, findfirst(x->x === renamed_from, preds)::Int) + deleteat!(succs, findfirst(x->x === renamed_to, succs)::Int) + + domtree_delete_edge!(compact.result_domtree, + compact.result_bbs, + renamed_from, + renamed_to) + + # Recursively kill edges to dead blocks. This is not necessary for removing + # dead blocks with a subsequent `IncrementalCompact`, but killing all the + # statements in dead blocks allows passes not to have to worry about + # whether statements have become invalid by becoming dead. + if bb_unreachable(compact.result_domtree, renamed_to) + for succ in copy(compact.result_bbs[renamed_to].succs) + # Make sure edge hasn't already been killed in a previous iteration + # of this loop + if succ in compact.result_bbs[renamed_to].succs + kill_edge!(compact, active_bb, to, + findfirst(x->x === succ, compact.bb_rename_pred)) + end end if to < active_bb # Kill all statements in the block - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + stmts = compact.result_bbs[renamed_to].stmts for stmt in stmts compact.result[stmt] = nothing end compact.result[last(stmts)] = ReturnNode() end - else - # We need to remove this edge from any phi nodes - if to < active_bb - idx = first(compact.result_bbs[compact.bb_rename_succ[to]].stmts) - while idx < length(compact.result) - stmt = compact.result[idx] - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === compact.bb_rename_pred[from], stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end - idx += 1 + + # TODO: kill statements in blocks past `active_bb` + end + + # Remove this edge from any phi nodes + if to < active_bb + idx = first(compact.result_bbs[renamed_to].stmts) + while idx < length(compact.result) + stmt = compact.result[idx] + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x-> x === renamed_from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end - else - idx = first(compact.ir.cfg.blocks[to].stmts) - for stmt in CompactPeekIterator(compact, idx) - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === from, stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + idx += 1 + end + else + idx = first(compact.ir.cfg.blocks[to].stmts) + for stmt in CompactPeekIterator(compact, idx) + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x-> x === from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end end end @@ -1361,7 +1377,9 @@ end function complete(compact::IncrementalCompact) result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1) - cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)]) + cfg = CFG(result_bbs, + Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)], + compact.result_domtree) return IRCode(compact.ir, compact.result, compact.result_types, compact.result_lines, compact.result_flags, cfg, compact.new_new_nodes) end diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index b96225f045e12..969d38cb09a28 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -384,7 +384,6 @@ function domsort_ssa!(ir::IRCode) push!(result_order, node) cs = ir.cfg.domtree.nodes[node].children terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)] - iscondbr = isa(terminator, GotoIfNot) let old_node = node + 1 if length(cs) >= 1 # Adding the nodes in reverse sorted order attempts to retain @@ -493,7 +492,11 @@ function domsort_ssa!(ir::IRCode) new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs) end result_stmts = Any[renumber_ssa!(result_stmts[i], inst_rename, true) for i in 1:length(result_stmts)] - cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]]) + domtree = rename_nodes!( + ir.cfg.domtree, + Int[haskey(bb_rename, old_bb) ? bb_rename[old_bb] : -1 + for old_bb in 1:length(ir.cfg.blocks)]) + cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]], domtree) new_new_nodes = Vector{NewNode}(undef, length(ir.new_nodes)) for i = 1:length(ir.new_nodes) entry = ir.new_nodes[i]