Skip to content

Commit

Permalink
Rollup merge of rust-lang#70973 - ecstatic-morse:recursion-lint, r=jo…
Browse files Browse the repository at this point in the history
…nas-schievink

Use forward traversal for unconditional recursion lint

While reviewing rust-lang#70822, I noted that rust-lang#54444 could be solved without requiring the predecessor graph and without allocating a `Vec<Span>` for every basic block. The unconditional recursion lint is not a performance bottleneck however, so I approved rust-lang#70822 as it was.

Nevertheless, I wanted to try implementing my idea using `TriColorDepthFirstSearch`, which is a DFS that can differentiate between [forward/tree edges and backward ones](https://en.wikipedia.org/wiki/Depth-first_search#Output_of_a_depth-first_search). I found this approach more straightforward than the existing one, so I'm opening this PR to see if it is desirable.

The pass is now just a DFS across the control-flow graph. We ignore false edges and false unwinds, as well as the successors of recursive calls, just like existing pass does. If we see a back-edge (loop) or a terminator that would cause us to yield control-flow back to the caller (`Return`, `Resume`, etc.), we know that the function does not unconditionally recurse.

r? @jonas-schievink
  • Loading branch information
Centril authored Apr 10, 2020
2 parents 6b2df46 + 0fc0f34 commit 0d89287
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 138 deletions.
13 changes: 10 additions & 3 deletions src/librustc_data_structures/graph/iterate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ where
// schedule its successors for examination.
self.stack.push(Event { node, becomes: Settled });
for succ in self.graph.successors(node) {
self.stack.push(Event { node: succ, becomes: Visited });
if !visitor.ignore_edge(node, succ) {
self.stack.push(Event { node: succ, becomes: Visited });
}
}
}
}
Expand Down Expand Up @@ -255,16 +257,21 @@ where
/// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms
fn node_examined(
&mut self,
_target: G::Node,
_node: G::Node,
_prior_status: Option<NodeStatus>,
) -> ControlFlow<Self::BreakVal> {
ControlFlow::Continue
}

/// Called after all nodes reachable from this one have been examined.
fn node_settled(&mut self, _target: G::Node) -> ControlFlow<Self::BreakVal> {
fn node_settled(&mut self, _node: G::Node) -> ControlFlow<Self::BreakVal> {
ControlFlow::Continue
}

/// Behave as if no edges exist from `source` to `target`.
fn ignore_edge(&mut self, _source: G::Node, _target: G::Node) -> bool {
false
}
}

/// This `TriColorVisitor` looks for back edges in a graph, which indicate that a cycle exists.
Expand Down
4 changes: 2 additions & 2 deletions src/librustc_mir_build/build/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,11 @@ fn mir_build(tcx: TyCtxt<'_>, def_id: DefId) -> BodyAndCache<'_> {
build::construct_const(cx, body_id, return_ty, return_ty_span)
};

lints::check(tcx, &body, def_id);

let mut body = BodyAndCache::new(body);
body.ensure_predecessors();

lints::check(tcx, &body.unwrap_read_only(), def_id);

// The borrow checker will replace all the regions here with its own
// inference variables. There's no point having non-erased regions here.
// The exception is `body.user_type_annotations`, which is used unmodified
Expand Down
242 changes: 109 additions & 133 deletions src/librustc_mir_build/lints.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use rustc_data_structures::graph::iterate::{
ControlFlow, NodeStatus, TriColorDepthFirstSearch, TriColorVisitor,
};
use rustc_hir::def_id::DefId;
use rustc_hir::intravisit::FnKind;
use rustc_index::bit_set::BitSet;
use rustc_index::vec::IndexVec;
use rustc_middle::hir::map::blocks::FnLikeNode;
use rustc_middle::mir::{BasicBlock, Body, ReadOnlyBodyAndCache, TerminatorKind, START_BLOCK};
use rustc_middle::ty::subst::InternalSubsts;
use rustc_middle::mir::{BasicBlock, Body, Operand, TerminatorKind};
use rustc_middle::ty::subst::{GenericArg, InternalSubsts};
use rustc_middle::ty::{self, AssocItem, AssocItemContainer, Instance, TyCtxt};
use rustc_session::lint::builtin::UNCONDITIONAL_RECURSION;
use std::collections::VecDeque;
use rustc_span::Span;

crate fn check<'tcx>(tcx: TyCtxt<'tcx>, body: &ReadOnlyBodyAndCache<'_, 'tcx>, def_id: DefId) {
crate fn check<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>, def_id: DefId) {
let hir_id = tcx.hir().as_local_hir_id(def_id).unwrap();

if let Some(fn_like_node) = FnLikeNode::from_node(tcx.hir().get(hir_id)) {
Expand All @@ -18,158 +19,133 @@ crate fn check<'tcx>(tcx: TyCtxt<'tcx>, body: &ReadOnlyBodyAndCache<'_, 'tcx>, d
return;
}

check_fn_for_unconditional_recursion(tcx, body, def_id);
}
}

fn check_fn_for_unconditional_recursion<'tcx>(
tcx: TyCtxt<'tcx>,
body: &ReadOnlyBodyAndCache<'_, 'tcx>,
def_id: DefId,
) {
let self_calls = find_blocks_calling_self(tcx, &body, def_id);

// Stores a list of `Span`s for every basic block. Those are the spans of self-calls where we
// know that one of them will definitely be reached. If the list is empty, the block either
// wasn't processed yet or will not always go to a self-call.
let mut results = IndexVec::from_elem_n(vec![], body.basic_blocks().len());

// We start the analysis at the self calls and work backwards.
let mut queue: VecDeque<_> = self_calls.iter().collect();

while let Some(bb) = queue.pop_front() {
if !results[bb].is_empty() {
// Already propagated.
continue;
}

let locations = if self_calls.contains(bb) {
// `bb` *is* a self-call.
// We don't look at successors here because they are irrelevant here and we don't want
// to lint them (eg. `f(); f()` should only lint the first call).
vec![bb]
} else {
// If *all* successors of `bb` lead to a self-call, emit notes at all of their
// locations.

// Determine all "relevant" successors. We ignore successors only reached via unwinding.
let terminator = body[bb].terminator();
let relevant_successors = match &terminator.kind {
TerminatorKind::Call { destination: None, .. }
| TerminatorKind::Yield { .. }
| TerminatorKind::GeneratorDrop => None.into_iter().chain(&[]),
TerminatorKind::SwitchInt { targets, .. } => None.into_iter().chain(targets),
TerminatorKind::Goto { target }
| TerminatorKind::Drop { target, .. }
| TerminatorKind::DropAndReplace { target, .. }
| TerminatorKind::Assert { target, .. }
| TerminatorKind::FalseEdges { real_target: target, .. }
| TerminatorKind::FalseUnwind { real_target: target, .. }
| TerminatorKind::Call { destination: Some((_, target)), .. } => {
Some(target).into_iter().chain(&[])
}
TerminatorKind::Resume
| TerminatorKind::Abort
| TerminatorKind::Return
| TerminatorKind::Unreachable => {
// We propagate backwards, so these should never be encountered here.
unreachable!("unexpected terminator {:?}", terminator.kind)
}
};

// If all our successors are known to lead to self-calls, then we do too.
let all_are_self_calls =
relevant_successors.clone().all(|&succ| !results[succ].is_empty());

if all_are_self_calls {
// We'll definitely lead to a self-call. Merge all call locations of the successors
// for linting them later.
relevant_successors.flat_map(|&succ| results[succ].iter().copied()).collect()
} else {
// At least 1 successor does not always lead to a self-call, so we also don't.
vec![]
// If this is trait/impl method, extract the trait's substs.
let trait_substs = match tcx.opt_associated_item(def_id) {
Some(AssocItem {
container: AssocItemContainer::TraitContainer(trait_def_id), ..
}) => {
let trait_substs_count = tcx.generics_of(trait_def_id).count();
&InternalSubsts::identity_for_item(tcx, def_id)[..trait_substs_count]
}
_ => &[],
};

if !locations.is_empty() {
// This is a newly confirmed-to-always-reach-self-call block.
results[bb] = locations;

// Propagate backwards through the CFG.
debug!("propagate loc={:?} in {:?} -> {:?}", results[bb], bb, body.predecessors()[bb]);
queue.extend(body.predecessors()[bb].iter().copied());
let mut vis = Search { tcx, body, def_id, reachable_recursive_calls: vec![], trait_substs };
if let Some(NonRecursive) = TriColorDepthFirstSearch::new(&body).run_from_start(&mut vis) {
return;
}
}

debug!("unconditional recursion results: {:?}", results);

let self_call_locations = &mut results[START_BLOCK];
self_call_locations.sort();
self_call_locations.dedup();
vis.reachable_recursive_calls.sort();

if !self_call_locations.is_empty() {
let hir_id = tcx.hir().as_local_hir_id(def_id).unwrap();
let sp = tcx.sess.source_map().guess_head_span(tcx.hir().span(hir_id));
tcx.struct_span_lint_hir(UNCONDITIONAL_RECURSION, hir_id, sp, |lint| {
let mut db = lint.build("function cannot return without recursing");
db.span_label(sp, "cannot return without recursing");
// offer some help to the programmer.
for bb in self_call_locations {
let span = body.basic_blocks()[*bb].terminator().source_info.span;
db.span_label(span, "recursive call site");
for call_span in vis.reachable_recursive_calls {
db.span_label(call_span, "recursive call site");
}
db.help("a `loop` may express intention better if this is on purpose");
db.emit();
});
}
}

/// Finds blocks with `Call` terminators that would end up calling back into the same method.
fn find_blocks_calling_self<'tcx>(
struct NonRecursive;

struct Search<'mir, 'tcx> {
tcx: TyCtxt<'tcx>,
body: &Body<'tcx>,
body: &'mir Body<'tcx>,
def_id: DefId,
) -> BitSet<BasicBlock> {
let param_env = tcx.param_env(def_id);
trait_substs: &'tcx [GenericArg<'tcx>],

// If this is trait/impl method, extract the trait's substs.
let trait_substs_count = match tcx.opt_associated_item(def_id) {
Some(AssocItem { container: AssocItemContainer::TraitContainer(trait_def_id), .. }) => {
tcx.generics_of(trait_def_id).count()
reachable_recursive_calls: Vec<Span>,
}

impl<'mir, 'tcx> Search<'mir, 'tcx> {
/// Returns `true` if `func` refers to the function we are searching in.
fn is_recursive_call(&self, func: &Operand<'tcx>) -> bool {
let Search { tcx, body, def_id, trait_substs, .. } = *self;
let param_env = tcx.param_env(def_id);

let func_ty = func.ty(body, tcx);
if let ty::FnDef(fn_def_id, substs) = func_ty.kind {
let (call_fn_id, call_substs) =
if let Some(instance) = Instance::resolve(tcx, param_env, fn_def_id, substs) {
(instance.def_id(), instance.substs)
} else {
(fn_def_id, substs)
};

// FIXME(#57965): Make this work across function boundaries

// If this is a trait fn, the substs on the trait have to match, or we might be
// calling into an entirely different method (for example, a call from the default
// method in the trait to `<A as Trait<B>>::method`, where `A` and/or `B` are
// specific types).
return call_fn_id == def_id && &call_substs[..trait_substs.len()] == trait_substs;
}

false
}
}

impl<'mir, 'tcx> TriColorVisitor<&'mir Body<'tcx>> for Search<'mir, 'tcx> {
type BreakVal = NonRecursive;

fn node_examined(
&mut self,
bb: BasicBlock,
prior_status: Option<NodeStatus>,
) -> ControlFlow<Self::BreakVal> {
// Back-edge in the CFG (loop).
if let Some(NodeStatus::Visited) = prior_status {
return ControlFlow::Break(NonRecursive);
}

match self.body[bb].terminator().kind {
// These terminators return control flow to the caller.
TerminatorKind::Abort
| TerminatorKind::GeneratorDrop
| TerminatorKind::Resume
| TerminatorKind::Return
| TerminatorKind::Unreachable
| TerminatorKind::Yield { .. } => ControlFlow::Break(NonRecursive),

// These do not.
TerminatorKind::Assert { .. }
| TerminatorKind::Call { .. }
| TerminatorKind::Drop { .. }
| TerminatorKind::DropAndReplace { .. }
| TerminatorKind::FalseEdges { .. }
| TerminatorKind::FalseUnwind { .. }
| TerminatorKind::Goto { .. }
| TerminatorKind::SwitchInt { .. } => ControlFlow::Continue,
}
_ => 0,
};
let trait_substs = &InternalSubsts::identity_for_item(tcx, def_id)[..trait_substs_count];

let mut self_calls = BitSet::new_empty(body.basic_blocks().len());

for (bb, data) in body.basic_blocks().iter_enumerated() {
if let TerminatorKind::Call { func, .. } = &data.terminator().kind {
let func_ty = func.ty(body, tcx);

if let ty::FnDef(fn_def_id, substs) = func_ty.kind {
let (call_fn_id, call_substs) =
if let Some(instance) = Instance::resolve(tcx, param_env, fn_def_id, substs) {
(instance.def_id(), instance.substs)
} else {
(fn_def_id, substs)
};

// FIXME(#57965): Make this work across function boundaries

// If this is a trait fn, the substs on the trait have to match, or we might be
// calling into an entirely different method (for example, a call from the default
// method in the trait to `<A as Trait<B>>::method`, where `A` and/or `B` are
// specific types).
let is_self_call =
call_fn_id == def_id && &call_substs[..trait_substs.len()] == trait_substs;

if is_self_call {
self_calls.insert(bb);
}
}

fn node_settled(&mut self, bb: BasicBlock) -> ControlFlow<Self::BreakVal> {
// When we examine a node for the last time, remember it if it is a recursive call.
let terminator = self.body[bb].terminator();
if let TerminatorKind::Call { func, .. } = &terminator.kind {
if self.is_recursive_call(func) {
self.reachable_recursive_calls.push(terminator.source_info.span);
}
}

ControlFlow::Continue
}

self_calls
fn ignore_edge(&mut self, bb: BasicBlock, target: BasicBlock) -> bool {
// Don't traverse successors of recursive calls or false CFG edges.
match self.body[bb].terminator().kind {
TerminatorKind::Call { ref func, .. } => self.is_recursive_call(func),

TerminatorKind::FalseUnwind { unwind: Some(imaginary_target), .. }
| TerminatorKind::FalseEdges { imaginary_target, .. } => imaginary_target == target,

_ => false,
}
}
}

0 comments on commit 0d89287

Please sign in to comment.